├── .codecov.yml
├── .github
    ├── CODEOWNERS
    ├── dependabot.yml
    └── workflows
    │   ├── ci.yaml
    │   ├── generate.yaml
    │   └── helmrelease.yaml
├── .gitignore
├── .golangci.yml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── cmd
    └── sloth
    │   ├── commands
    │       ├── commands.go
    │       ├── generate.go
    │       ├── helpers.go
    │       ├── k8scontroller.go
    │       ├── validate.go
    │       └── version.go
    │   └── main.go
├── deploy
    └── kubernetes
    │   ├── helm
    │       └── sloth
    │       │   ├── .helmignore
    │       │   ├── Chart.yaml
    │       │   ├── crds
    │       │       └── sloth.slok.dev_prometheusservicelevels.yaml
    │       │   ├── templates
    │       │       ├── _helpers.tpl
    │       │       ├── cluster-role-binding.yaml
    │       │       ├── cluster-role.yaml
    │       │       ├── configmap.yaml
    │       │       ├── deployment.yaml
    │       │       ├── pod-monitor.yaml
    │       │       └── service-account.yaml
    │       │   ├── tests
    │       │       ├── go.mod
    │       │       ├── go.sum
    │       │       ├── helm_chart_test.go
    │       │       ├── testdata
    │       │       │   └── output
    │       │       │   │   ├── cluster_role_binding_custom.yaml
    │       │       │   │   ├── cluster_role_binding_default.yaml
    │       │       │   │   ├── cluster_role_custom.yaml
    │       │       │   │   ├── cluster_role_default.yaml
    │       │       │   │   ├── configmap_slo_config.yaml
    │       │       │   │   ├── deployment_custom.yaml
    │       │       │   │   ├── deployment_custom_no_extras.yaml
    │       │       │   │   ├── deployment_custom_slo_config.yaml
    │       │       │   │   ├── deployment_default.yaml
    │       │       │   │   ├── pod_monitor_custom.yaml
    │       │       │   │   ├── pod_monitor_default.yaml
    │       │       │   │   ├── sa_custom.yaml
    │       │       │   │   └── sa_default.yaml
    │       │       └── values_test.go
    │       │   └── values.yaml
    │   ├── kustomization.yaml
    │   └── raw
    │       ├── sloth-with-common-plugins.yaml
    │       └── sloth.yaml
├── docker
    ├── dev
    │   └── Dockerfile
    └── prod
    │   └── Dockerfile
├── docs
    └── img
    │   ├── logo.png
    │   └── sloth_small_dashboard.png
├── examples
    ├── _gen
    │   ├── custom_rule_group_interval.yml
    │   ├── getting-started.yml
    │   ├── home-wifi.yml
    │   ├── k8s-getting-started.yml
    │   ├── k8s-home-wifi.yml
    │   ├── k8s-multifile.yml
    │   ├── kubernetes-apiserver.yml
    │   ├── multifile.yml
    │   ├── no-alerts.yml
    │   ├── openslo-getting-started.yml
    │   ├── openslo-kubernetes-apiserver.yml
    │   ├── plugin-getting-started.yml
    │   ├── plugin-k8s-getting-started.yml
    │   └── raw-home-wifi.yml
    ├── custom_rule_group_interval.yml
    ├── getting-started.yml
    ├── home-wifi.yml
    ├── k8s-getting-started.yml
    ├── k8s-home-wifi.yml
    ├── k8s-multifile.yml
    ├── kubernetes-apiserver.yml
    ├── multifile.yml
    ├── no-alerts.yml
    ├── openslo-getting-started.yml
    ├── openslo-kubernetes-apiserver.yml
    ├── plugin-getting-started.yml
    ├── plugin-k8s-getting-started.yml
    ├── plugins
    │   └── getting-started
    │   │   └── availability
    │   │       └── plugin.go
    ├── raw-home-wifi.yml
    └── windows
    │   ├── 7d.yaml
    │   └── custom-30d.yaml
├── go.mod
├── go.sum
├── internal
    ├── alert
    │   ├── alert.go
    │   ├── alert_test.go
    │   ├── window.go
    │   └── windows
    │   │   ├── google-28d.yaml
    │   │   └── google-30d.yaml
    ├── app
    │   ├── generate
    │   │   ├── noop.go
    │   │   ├── prometheus.go
    │   │   └── prometheus_test.go
    │   └── kubecontroller
    │   │   ├── handler.go
    │   │   └── retriever.go
    ├── info
    │   └── info.go
    ├── k8sprometheus
    │   ├── helpers.go
    │   ├── k8sprometheusmock
    │   │   └── prometheus_rules_ensurer.go
    │   ├── kubernetes.go
    │   ├── model.go
    │   ├── model_test.go
    │   ├── spec.go
    │   ├── spec_test.go
    │   ├── storage.go
    │   └── storage_test.go
    ├── log
    │   ├── log.go
    │   └── logrus
    │   │   └── logrus.go
    ├── openslo
    │   ├── spec.go
    │   └── spec_test.go
    └── prometheus
    │   ├── alert_rules.go
    │   ├── alert_rules_test.go
    │   ├── conventions.go
    │   ├── helpers.go
    │   ├── model.go
    │   ├── model_test.go
    │   ├── prometheusmock
    │       └── file_manager.go
    │   ├── recording_rules.go
    │   ├── recording_rules_test.go
    │   ├── sli_plugin.go
    │   ├── sli_plugin_test.go
    │   ├── spec.go
    │   ├── spec_test.go
    │   ├── storage.go
    │   └── storage_test.go
├── pkg
    ├── kubernetes
    │   ├── api
    │   │   └── sloth
    │   │   │   ├── register.go
    │   │   │   └── v1
    │   │   │       ├── README.md
    │   │   │       ├── doc.go
    │   │   │       ├── register.go
    │   │   │       ├── types.go
    │   │   │       └── zz_generated.deepcopy.go
    │   └── gen
    │   │   ├── clientset
    │   │       └── versioned
    │   │       │   ├── clientset.go
    │   │       │   ├── doc.go
    │   │       │   ├── fake
    │   │       │       ├── clientset_generated.go
    │   │       │       ├── doc.go
    │   │       │       └── register.go
    │   │       │   ├── scheme
    │   │       │       ├── doc.go
    │   │       │       └── register.go
    │   │       │   └── typed
    │   │       │       └── sloth
    │   │       │           └── v1
    │   │       │               ├── doc.go
    │   │       │               ├── fake
    │   │       │                   ├── doc.go
    │   │       │                   ├── fake_prometheusservicelevel.go
    │   │       │                   └── fake_sloth_client.go
    │   │       │               ├── generated_expansion.go
    │   │       │               ├── prometheusservicelevel.go
    │   │       │               └── sloth_client.go
    │   │   └── crd
    │   │       └── sloth.slok.dev_prometheusservicelevels.yaml
    └── prometheus
    │   ├── alertwindows
    │       └── v1
    │       │   ├── README.md
    │       │   └── v1.go
    │   ├── api
    │       └── v1
    │       │   ├── README.md
    │       │   └── v1.go
    │   └── plugin
    │       └── v1
    │           └── v1.go
├── scripts
    ├── build
    │   ├── bin
    │   │   ├── build-all.sh
    │   │   ├── build-raw.sh
    │   │   └── build.sh
    │   └── docker
    │   │   ├── build-image-dev.sh
    │   │   ├── build-image.sh
    │   │   ├── build-publish-image-all.sh
    │   │   └── publish-image.sh
    ├── check
    │   ├── check.sh
    │   ├── helm-test.sh
    │   ├── integration-test-cli.sh
    │   ├── integration-test-k8s.sh
    │   ├── integration-test.sh
    │   └── unit-test.sh
    ├── deploygen.sh
    ├── deps.sh
    ├── examplesgen.sh
    ├── gogen.sh
    └── kubegen.sh
└── test
    └── integration
        ├── crd
            └── prometheus-operator-crd.yaml
        ├── k8scontroller
            ├── exp_base_28d_test.go
            ├── exp_base_7d_test.go
            ├── exp_base_test.go
            ├── exp_plugin_test.go
            ├── helpers.go
            ├── k8scontroller_test.go
            ├── plugin
            │   └── plugin.go
            └── windows
            │   └── 7d.yaml
        ├── prometheus
            ├── generate_test.go
            ├── helpers.go
            ├── plugin
            │   └── plugin.go
            ├── testdata
            │   ├── in-base-k8s.yaml
            │   ├── in-base.yaml
            │   ├── in-invalid-version.yaml
            │   ├── in-multifile-k8s.yaml
            │   ├── in-multifile.yaml
            │   ├── in-openslo.yaml
            │   ├── in-plugin.yaml
            │   ├── out-base-28d.yaml.tpl
            │   ├── out-base-custom-windows-7d.yaml.tpl
            │   ├── out-base-extra-labels.yaml.tpl
            │   ├── out-base-k8s.yaml.tpl
            │   ├── out-base-no-alerts.yaml.tpl
            │   ├── out-base-no-recordings.yaml.tpl
            │   ├── out-base.yaml.tpl
            │   ├── out-multifile-k8s.yaml.tpl
            │   ├── out-multifile.yaml.tpl
            │   ├── out-openslo.yaml.tpl
            │   ├── out-plugin.yaml.tpl
            │   └── validate
            │   │   ├── bad
            │   │       ├── bad-aa.yaml
            │   │       ├── bad-ab.yaml
            │   │       ├── bad-ba.yaml
            │   │       ├── bad-k8s.yaml
            │   │       ├── bad-multi-k8s.yaml
            │   │       ├── bad-multi.yaml
            │   │       └── bad-openslo.yaml
            │   │   └── good
            │   │       ├── good-aa.yaml
            │   │       ├── good-ab.yaml
            │   │       ├── good-ba.yaml
            │   │       ├── good-k8s.yaml
            │   │       ├── good-multi-k8s.yaml
            │   │       ├── good-multi.yaml
            │   │       └── good-openslo.yaml
            ├── validate_test.go
            └── windows
            │   └── 7d.yaml
        └── testutils
            └── cmd.go


/.codecov.yml:
--------------------------------------------------------------------------------
1 | coverage:
2 |   range: 70..90 # First number represents red, and second represents green.
3 |   status:
4 |     patch: false
5 |     project:
6 |       default:
7 |         # Allow going down 1% before being a failure.
8 |         threshold: 1%
9 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | *       @slok
2 | 
3 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "gomod"
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: "daily"
 7 |     ignore:
 8 |       # Ignore Kubernetes dependencies to have full control on them.
 9 |       - dependency-name: "k8s.io/*"
10 |   - package-ecosystem: "github-actions"
11 |     directory: "/"
12 |     schedule:
13 |       interval: "daily"
14 |   - package-ecosystem: "docker"
15 |     directory: "/docker/dev"
16 |     schedule:
17 |       interval: "daily"
18 |   - package-ecosystem: "docker"
19 |     directory: "/docker/prod"
20 |     schedule:
21 |       interval: "daily"
22 | 


--------------------------------------------------------------------------------
/.github/workflows/generate.yaml:
--------------------------------------------------------------------------------
 1 | # Sample job that allows you to download the generated files as Artifacts from the Github Actions page
 2 | 
 3 | name: SLO generation
 4 | 
 5 | on:
 6 |   # Allows you to run this workflow manually from the Actions tab
 7 |   workflow_dispatch:
 8 | 
 9 | jobs:
10 |   generate-slo-job-1:
11 |     name: Generate the SLOs
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v3
15 |       - name: download and setup generator binary
16 |         run: |       
17 |           wget https://github.com/slok/sloth/releases/download/v0.9.0/sloth-linux-amd64
18 |           chmod +x sloth-linux-amd64
19 |           ./sloth-linux-amd64 generate -i ./examples/getting-started.yml -o ./examples/_gen/getting-started.yml
20 |           ./sloth-linux-amd64 generate -i ./examples/no-alerts.yml -o ./examples/_gen/no-alerts.yml
21 |       - name: 'Upload directory with generated SLOs'
22 |         uses: actions/upload-artifact@v3
23 |         with:
24 |           name: SLOs
25 |           path: examples/_gen/
26 |           
27 | 


--------------------------------------------------------------------------------
/.github/workflows/helmrelease.yaml:
--------------------------------------------------------------------------------
 1 | name: Release Charts
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |     paths:
 8 |       - "deploy/kubernetes/helm/**"
 9 |   workflow_dispatch:
10 | 
11 | jobs:
12 |   release:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: Checkout
16 |         uses: actions/checkout@v3
17 |         with:
18 |           fetch-depth: 0
19 | 
20 |       - name: Configure Git
21 |         run: |
22 |           git config user.name "$GITHUB_ACTOR"
23 |           git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
24 | 
25 |       - name: Install Helm
26 |         uses: azure/setup-helm@v3.4
27 |         with:
28 |           version: v3.7.1
29 | 
30 |       - name: Run chart-releaser
31 |         uses: helm/chart-releaser-action@v1.6.0
32 |         with:
33 |           charts_dir: deploy/kubernetes/helm
34 |         env:
35 |           CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
36 |           CR_RELEASE_NAME_TEMPLATE: "sloth-helm-chart-{{ .Version }}"
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | *.exe
 3 | *.exe~
 4 | *.dll
 5 | *.so
 6 | *.dylib
 7 | 
 8 | # Test binary, built with `go test -c`
 9 | *.test
10 | 
11 | # Output of the go coverage tool, specifically when used with LiteIDE
12 | *.out
13 | 
14 | # Vendor directory
15 | vendor/
16 | 
17 | # Test coverage.
18 | .test_coverage.txt
19 | 
20 | # Binaries
21 | /bin
22 | 


--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | run:
 3 |   timeout: 3m
 4 |   build-tags:
 5 |     - integration
 6 | 
 7 | linters:
 8 |   enable:
 9 |     - misspell
10 |     - goimports
11 |     - revive
12 |     - gofmt
13 |     - depguard
14 |     - godot
15 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # How to Contribute
  2 | 
  3 | Sloth is [Apache 2.0 licensed](LICENSE) and accepts contributions via GitHub
  4 | pull requests. This document outlines some of the conventions on development
  5 | workflow, commit message formatting, contact points and other resources to make
  6 | it easier to get your contribution accepted.
  7 | 
  8 | We gratefully welcome improvements to issues and documentation as well as to code.
  9 | 
 10 | ## Getting Started
 11 | 
 12 | - Fork the repository on GitHub
 13 | - Read the [README](README.md#getting-started) for getting started.
 14 | - If you want to contribute as a developer, continue reading this document for further instructions
 15 | - Play with the project, submit bugs, submit pull requests!
 16 | 
 17 | ## Contribution workflow
 18 | 
 19 | This is a rough outline of how to prepare a contribution:
 20 | 
 21 | - Fork the repository.
 22 | - Create a topic branch from where you want to base your work (usually branched from master).
 23 | - Make commits of logical units.
 24 | - Make sure your commit messages are clear and self-explanatory.
 25 | - Push your changes to a topic branch in your fork of the repository.
 26 | - If you changed code, add automated tests to cover your changes.
 27 | - Submit a pull request from your fork to the original repository.
 28 | 
 29 | ## Running the application
 30 | 
 31 | ### CLI
 32 | 
 33 | To run the CLI you can use the example specs. Some examples:
 34 | 
 35 | ```bash
 36 | go run ./cmd/sloth generate  -i ./examples/getting-started.yml
 37 | 
 38 | go run ./cmd/sloth/ validate -i ./examples/ -p ./examples/plugins/ -e _gen
 39 | ```
 40 | 
 41 | ### Kubernetes
 42 | 
 43 | To run Sloth in a controller mode you can run it in multiple ways, depending on the part that you are working on it may be helpful one or the other.
 44 | 
 45 | > Apart the options that we will describe next, Kuberentes controller mode has multiple options that can be used to develop, deploy in different ways or apply maintenance, like selecting one single namespace, use a label selector... Check them with `sloth controller --help`
 46 | 
 47 | #### Without a cluster
 48 | 
 49 | If you are not developing something that needs a real Kubernetes connection, Sloth can run without a Kubernetes cluster with fake memory K8s memory based clients, use `--mode="fake"`
 50 | 
 51 | Example:
 52 | 
 53 | ```bash
 54 | go run ./cmd/sloth/ controller --mode=fake --debug
 55 | ```
 56 | 
 57 | #### With a local cluster
 58 | 
 59 | If you need a Kubernetes connection or develop using more realistic setup, you can connect to any Kubernetes cluster using local credentials using `--kube-local` flag.
 60 | 
 61 | ```bash
 62 | go run ./cmd/sloth/ controller --kube-local
 63 | ```
 64 | 
 65 | #### Dry run
 66 | 
 67 | If you need to set Sloth in dry-run mode (read-only operations), you case use `--mode=dry-run`.
 68 | 
 69 | ```bash
 70 | go run ./cmd/sloth/ controller --mode=dry-run
 71 | ```
 72 | 
 73 | You can use this mode with `--kube-local`.
 74 | 
 75 | ```bash
 76 | go run ./cmd/sloth/ controller --kube-local --mode=dry-run
 77 | ```
 78 | 
 79 | ## Automated checks and unit tests
 80 | 
 81 | You can check your code satisfies project standards by using:
 82 | 
 83 | ```bash
 84 | make check
 85 | ```
 86 | 
 87 | You can run the unit tests by doing:
 88 | 
 89 | ```bash
 90 | make test
 91 | ```
 92 | 
 93 | ## Integration tests
 94 | 
 95 | > When running the tests if you don't have any of the required dependencies, the tests will be skipped
 96 | 
 97 | ### CLI
 98 | 
 99 | First you will need to build the binary (you can use `make build`).
100 | 
101 | Search your binary, for example `./bin/sloth-linux-amd64` and set as the binary to execute the integration tests:
102 | 
103 | ```bash
104 | export SLOTH_INTEGRATION_BINARY=${PWD}/bin/sloth-linux-amd64
105 | ```
106 | 
107 | Now you can run the tests:
108 | 
109 | ```bash
110 | make ci-integration-cli
111 | ```
112 | 
113 | ### Kubernetes
114 | 
115 | For Kubernetes you will need a cluster, the easiest way is to create a cluster using [Kind], lets see an example by creating a cluster and exporting the access configuration.
116 | 
117 | ```bash
118 | kind create cluster --name sloth
119 | kind get kubeconfig --name sloth > /tmp/kind-sloth.kubeconfig
120 | ```
121 | 
122 | Prepare the required CRDs on the cluster:
123 | 
124 | ```bash
125 | kubectl --kubeconfig=/tmp/kind-sloth.kubeconfig apply -f ./pkg/kubernetes/gen/crd/
126 | kubectl --kubeconfig=/tmp/kind-sloth.kubeconfig apply -f ./test/integration/crd
127 | ```
128 | 
129 | Now we are ready, we need to prepare the integration tests settings that point to the binary of sloth we want to use (build with `make build`) and the Kubernetes cluster access config.
130 | 
131 | ```bash
132 | export SLOTH_INTEGRATION_BINARY=${PWD}/bin/sloth-linux-amd64
133 | export SLOTH_INTEGRATION_KUBE_CONFIG=/tmp/kind-sloth.kubeconfig
134 | ```
135 | 
136 | Execute the tests:
137 | 
138 | ```bash
139 | make ci-integration-k8s
140 | ```
141 | 
142 | ## Profiling
143 | 
144 | By default Sloth will set [pprof] on metrics ports (`8081`).
145 | 
146 | Check this [pprof cheatsheet][pprof-cheatsheet].
147 | 
148 | [kind]: https://github.com/kubernetes-sigs/kind
149 | [pprof-cheatsheet]: https://gist.github.com/slok/33dad1d0d0bae07977e6d32bcc010188
150 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | 
  2 | SHELL := $(shell which bash)
  3 | OSTYPE := $(shell uname)
  4 | DOCKER := $(shell command -v docker)
  5 | GID := $(shell id -g)
  6 | UID := $(shell id -u)
  7 | VERSION ?= $(shell git describe --tags --always)
  8 | 
  9 | UNIT_TEST_CMD := ./scripts/check/unit-test.sh
 10 | INTEGRATION_TEST_CMD := ./scripts/check/integration-test.sh
 11 | INTEGRATION_TEST_K8S_CMD := ./scripts/check/integration-test-k8s.sh
 12 | INTEGRATION_TEST_CLI_CMD := ./scripts/check/integration-test-cli.sh
 13 | HELM_TEST_CMD := ./scripts/check/helm-test.sh
 14 | CHECK_CMD := ./scripts/check/check.sh
 15 | 
 16 | DEV_IMAGE_NAME := local/sloth-dev
 17 | PROD_IMAGE_NAME ?=  ghcr.io/slok/sloth
 18 | 
 19 | DOCKER_RUN_CMD := docker run --env ostype=$(OSTYPE) -v ${PWD}:/src --rm ${DEV_IMAGE_NAME}
 20 | BUILD_BINARY_CMD := VERSION=${VERSION} ./scripts/build/bin/build.sh
 21 | BUILD_BINARY_ALL_CMD := VERSION=${VERSION} ./scripts/build/bin/build-all.sh
 22 | BUILD_DEV_IMAGE_CMD := IMAGE=${DEV_IMAGE_NAME} DOCKER_FILE_PATH=./docker/dev/Dockerfile VERSION=latest ./scripts/build/docker/build-image-dev.sh
 23 | BUILD_PROD_IMAGE_CMD := IMAGE=${PROD_IMAGE_NAME} DOCKER_FILE_PATH=./docker/prod/Dockerfile VERSION=${VERSION} ./scripts/build/docker/build-image.sh
 24 | BUILD_PUBLISH_PROD_IMAGE_ALL_CMD := IMAGE=${PROD_IMAGE_NAME} DOCKER_FILE_PATH=./docker/prod/Dockerfile VERSION=${VERSION} ./scripts/build/docker/build-publish-image-all.sh
 25 | PUBLISH_PROD_IMAGE_CMD := IMAGE=${PROD_IMAGE_NAME} VERSION=${VERSION} ./scripts/build/docker/publish-image.sh
 26 | 
 27 | 
 28 | help: ## Show this help
 29 | 	@echo "Help"
 30 | 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "    \033[36m%-20s\033[93m %s\n", $$1, $$2}'
 31 | 
 32 | .PHONY: default
 33 | default: help
 34 | 
 35 | .PHONY: build-image
 36 | build-image: ## Builds the production docker image.
 37 | 	@$(BUILD_PROD_IMAGE_CMD)
 38 | 
 39 | build-publish-image-all: ## Builds and publishes all the production docker images (multiarch).
 40 | 	@$(BUILD_PUBLISH_PROD_IMAGE_ALL_CMD)
 41 | 
 42 | .PHONY: build-dev-image
 43 | build-dev-image:  ## Builds the development docker image.
 44 | 	@$(BUILD_DEV_IMAGE_CMD)
 45 | 
 46 | build: build-dev-image ## Builds the production binary.
 47 | 	@$(DOCKER_RUN_CMD) /bin/sh -c '$(BUILD_BINARY_CMD)'
 48 | 
 49 | build-all: build-dev-image ## Builds all archs production binaries.
 50 | 	@$(DOCKER_RUN_CMD) /bin/sh -c '$(BUILD_BINARY_ALL_CMD)'
 51 | 
 52 | .PHONY: test
 53 | test: build-dev-image  ## Runs unit test.
 54 | 	@$(DOCKER_RUN_CMD) /bin/sh -c '$(UNIT_TEST_CMD)'
 55 | 
 56 | .PHONY: helm-test
 57 | helm-test: build-dev-image  ## Runs helm chart test.
 58 | 	@$(DOCKER_RUN_CMD) /bin/sh -c '$(HELM_TEST_CMD)'
 59 | 
 60 | .PHONY: check
 61 | check: build-dev-image  ## Runs checks.
 62 | 	@$(DOCKER_RUN_CMD) /bin/sh -c '$(CHECK_CMD)'
 63 | 
 64 | .PHONY: integration
 65 | integration: build-dev-image ## Runs integration test.
 66 | 	@$(DOCKER_RUN_CMD) /bin/sh -c '$(INTEGRATION_TEST_CMD)'
 67 | 
 68 | .PHONY: go-gen
 69 | go-gen: build-dev-image  ## Generates go based code.
 70 | 	@$(DOCKER_RUN_CMD) /bin/sh -c './scripts/gogen.sh'
 71 | 
 72 | .PHONY: kube-gen
 73 | kube-gen: build-dev-image  ## Generates go based code.
 74 | 	/bin/sh -c './scripts/kubegen.sh'
 75 | 
 76 | .PHONY: examples-gen
 77 | examples-gen: build-dev-image  ## Generates sloth examples.
 78 | 	/bin/sh -c './scripts/examplesgen.sh'
 79 | 
 80 | .PHONY: deploy-gen
 81 | deploy-gen: build-dev-image  ## Generates sloth deploy.
 82 | 	/bin/sh -c './scripts/deploygen.sh'
 83 | 
 84 | .PHONY: gen
 85 | gen: kube-gen go-gen examples-gen deploy-gen ## Generates all.
 86 | 
 87 | .PHONY: deps
 88 | deps:  ## Fixes the dependencies
 89 | 	@$(DOCKER_RUN_CMD) /bin/sh -c './scripts/deps.sh'
 90 | 
 91 | .PHONY: ci-build
 92 | ci-build: ## Builds the production binary in CI environment (without docker).
 93 | 	@$(BUILD_BINARY_CMD)
 94 | 
 95 | .PHONY: ci-unit-test
 96 | ci-test:  ## Runs unit test in CI environment (without docker).
 97 | 	@$(UNIT_TEST_CMD)
 98 | 
 99 | .PHONY: ci-helm-test
100 | ci-helm-test:  ## Runs helm chart tests in CI environment (without docker).
101 | 	@$(HELM_TEST_CMD)
102 | 
103 | .PHONY: ci-check
104 | ci-check:  ## Runs checks in CI environment (without docker).
105 | 	@$(CHECK_CMD)
106 | 
107 | .PHONY: ci-integration
108 | ci-integration: ## Runs integration test in CI environment (without docker).
109 | 	@$(INTEGRATION_TEST_CMD)
110 | 
111 | .PHONY: ci-integration-cli
112 | ci-integration-cli: ## Runs integration test for CLI in CI environment (without docker).
113 | 	@$(INTEGRATION_TEST_CLI_CMD)
114 | 
115 | .PHONY: ci-integration-k8s
116 | ci-integration-k8s: ## Runs integration test for K8s in CI environment (without docker).
117 | 	@$(INTEGRATION_TEST_K8S_CMD)
118 | 


--------------------------------------------------------------------------------
/cmd/sloth/commands/commands.go:
--------------------------------------------------------------------------------
 1 | package commands
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"io"
 6 | 
 7 | 	"gopkg.in/alecthomas/kingpin.v2"
 8 | 
 9 | 	"github.com/slok/sloth/internal/log"
10 | )
11 | 
12 | const (
13 | 	// LoggerTypeDefault is the logger default type.
14 | 	LoggerTypeDefault = "default"
15 | 	// LoggerTypeJSON is the logger json type.
16 | 	LoggerTypeJSON = "json"
17 | )
18 | 
19 | // Command represents an application command, all commands that want to be executed
20 | // should implement and setup on main.
21 | type Command interface {
22 | 	Name() string
23 | 	Run(ctx context.Context, config RootConfig) error
24 | }
25 | 
26 | // RootConfig represents the root command configuration and global configuration
27 | // for all the commands.
28 | type RootConfig struct {
29 | 	// Global flags.
30 | 	Debug      bool
31 | 	NoLog      bool
32 | 	NoColor    bool
33 | 	LoggerType string
34 | 
35 | 	// Global instances.
36 | 	Stdin  io.Reader
37 | 	Stdout io.Writer
38 | 	Stderr io.Writer
39 | 	Logger log.Logger
40 | }
41 | 
42 | // NewRootConfig initializes the main root configuration.
43 | func NewRootConfig(app *kingpin.Application) *RootConfig {
44 | 	c := &RootConfig{}
45 | 
46 | 	// Register.
47 | 	app.Flag("debug", "Enable debug mode.").BoolVar(&c.Debug)
48 | 	app.Flag("no-log", "Disable logger.").BoolVar(&c.NoLog)
49 | 	app.Flag("no-color", "Disable logger color.").BoolVar(&c.NoColor)
50 | 	app.Flag("logger", "Selects the logger type.").Default(LoggerTypeDefault).EnumVar(&c.LoggerType, LoggerTypeDefault, LoggerTypeJSON)
51 | 
52 | 	return c
53 | }
54 | 


--------------------------------------------------------------------------------
/cmd/sloth/commands/helpers.go:
--------------------------------------------------------------------------------
 1 | package commands
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"context"
 6 | 	"fmt"
 7 | 	"io/fs"
 8 | 	"path/filepath"
 9 | 	"regexp"
10 | 	"strings"
11 | 
12 | 	"github.com/slok/sloth/internal/log"
13 | 	"github.com/slok/sloth/internal/prometheus"
14 | )
15 | 
16 | var (
17 | 	splitMarkRe  = regexp.MustCompile("(?m)^---")
18 | 	rmCommentsRe = regexp.MustCompile("(?m)^#.*$")
19 | )
20 | 
21 | func splitYAML(data []byte) []string {
22 | 	// Santize.
23 | 	data = bytes.TrimSpace(data)
24 | 	data = rmCommentsRe.ReplaceAll(data, []byte(""))
25 | 
26 | 	// Split (YAML can declare multiple files in the same file using `---`).
27 | 	dataSplit := splitMarkRe.Split(string(data), -1)
28 | 
29 | 	// Remove empty splits.
30 | 	nonEmptyData := []string{}
31 | 	for _, d := range dataSplit {
32 | 		d = strings.TrimSpace(d)
33 | 		if d != "" {
34 | 			nonEmptyData = append(nonEmptyData, d)
35 | 		}
36 | 	}
37 | 
38 | 	return nonEmptyData
39 | }
40 | 
41 | func createPluginLoader(ctx context.Context, logger log.Logger, paths []string) (*prometheus.FileSLIPluginRepo, error) {
42 | 	config := prometheus.FileSLIPluginRepoConfig{
43 | 		Paths:  paths,
44 | 		Logger: logger,
45 | 	}
46 | 	sliPluginRepo, err := prometheus.NewFileSLIPluginRepo(config)
47 | 	if err != nil {
48 | 		return nil, fmt.Errorf("could not create file SLI plugin repository: %w", err)
49 | 	}
50 | 
51 | 	return sliPluginRepo, nil
52 | }
53 | 
54 | func discoverSLOManifests(logger log.Logger, exclude, include *regexp.Regexp, path string) ([]string, error) {
55 | 	logger = logger.WithValues(log.Kv{"svc": "SLODiscovery"})
56 | 
57 | 	paths := []string{}
58 | 	err := filepath.Walk(path, func(path string, info fs.FileInfo, err error) error {
59 | 		if err != nil {
60 | 			return err
61 | 		}
62 | 
63 | 		if info.IsDir() {
64 | 			return nil
65 | 		}
66 | 
67 | 		// Directories and non YAML files don't need to be handled.
68 | 		extension := strings.ToLower(filepath.Ext(path))
69 | 		if info.IsDir() || (extension != ".yml" && extension != ".yaml") {
70 | 			return nil
71 | 		}
72 | 
73 | 		// Filter by exclude or include (exclude has preference).
74 | 		if exclude != nil && exclude.MatchString(path) {
75 | 			logger.Debugf("Excluding path due to exclude filter %s", path)
76 | 			return nil
77 | 		}
78 | 		if include != nil && !include.MatchString(path) {
79 | 			logger.Debugf("Excluding path due to include filter %s", path)
80 | 			return nil
81 | 		}
82 | 
83 | 		// If we reach here, path discovered.
84 | 		paths = append(paths, path)
85 | 
86 | 		return nil
87 | 	})
88 | 
89 | 	if err != nil {
90 | 		return nil, fmt.Errorf("could not find files recursively: %w", err)
91 | 	}
92 | 
93 | 	return paths, nil
94 | }
95 | 


--------------------------------------------------------------------------------
/cmd/sloth/commands/version.go:
--------------------------------------------------------------------------------
 1 | package commands
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 
 7 | 	"gopkg.in/alecthomas/kingpin.v2"
 8 | 
 9 | 	"github.com/slok/sloth/internal/info"
10 | )
11 | 
12 | type versionCommand struct{}
13 | 
14 | // NewVersionCommand returns the version command.
15 | func NewVersionCommand(app *kingpin.Application) Command {
16 | 	c := &versionCommand{}
17 | 	app.Command("version", "Shows version.")
18 | 
19 | 	return c
20 | }
21 | 
22 | func (versionCommand) Name() string { return "version" }
23 | func (versionCommand) Run(ctx context.Context, config RootConfig) error {
24 | 	fmt.Fprintf(config.Stdout, info.Version)
25 | 	return nil
26 | }
27 | 


--------------------------------------------------------------------------------
/cmd/sloth/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"io"
  7 | 	"os"
  8 | 
  9 | 	"github.com/sirupsen/logrus"
 10 | 	"gopkg.in/alecthomas/kingpin.v2"
 11 | 
 12 | 	"github.com/slok/sloth/cmd/sloth/commands"
 13 | 	"github.com/slok/sloth/internal/info"
 14 | 	"github.com/slok/sloth/internal/log"
 15 | 	loglogrus "github.com/slok/sloth/internal/log/logrus"
 16 | )
 17 | 
 18 | // Run runs the main application.
 19 | func Run(ctx context.Context, args []string, stdin io.Reader, stdout, stderr io.Writer) error {
 20 | 	app := kingpin.New("sloth", "Easy SLO generator.")
 21 | 	app.DefaultEnvars()
 22 | 	config := commands.NewRootConfig(app)
 23 | 
 24 | 	// Setup commands (registers flags).
 25 | 	generateCmd := commands.NewGenerateCommand(app)
 26 | 	kubeCtrlCmd := commands.NewKubeControllerCommand(app)
 27 | 	validateCmd := commands.NewValidateCommand(app)
 28 | 	versionCmd := commands.NewVersionCommand(app)
 29 | 
 30 | 	cmds := map[string]commands.Command{
 31 | 		generateCmd.Name(): generateCmd,
 32 | 		kubeCtrlCmd.Name(): kubeCtrlCmd,
 33 | 		validateCmd.Name(): validateCmd,
 34 | 		versionCmd.Name():  versionCmd,
 35 | 	}
 36 | 
 37 | 	// Parse commandline.
 38 | 	cmdName, err := app.Parse(args[1:])
 39 | 	if err != nil {
 40 | 		return fmt.Errorf("invalid command configuration: %w", err)
 41 | 	}
 42 | 
 43 | 	// Set up global dependencies.
 44 | 	config.Stdin = stdin
 45 | 	config.Stdout = stdout
 46 | 	config.Stderr = stderr
 47 | 	config.Logger = getLogger(*config)
 48 | 
 49 | 	// Execute command.
 50 | 	err = cmds[cmdName].Run(ctx, *config)
 51 | 	if err != nil {
 52 | 		return fmt.Errorf("%q command failed: %w", cmdName, err)
 53 | 	}
 54 | 
 55 | 	return nil
 56 | }
 57 | 
 58 | // getLogger returns the application logger.
 59 | func getLogger(config commands.RootConfig) log.Logger {
 60 | 	if config.NoLog {
 61 | 		return log.Noop
 62 | 	}
 63 | 
 64 | 	// If not logger disabled use logrus logger.
 65 | 	logrusLog := logrus.New()
 66 | 	logrusLog.Out = config.Stderr // By default logger goes to stderr (so it can split stdout prints).
 67 | 	logrusLogEntry := logrus.NewEntry(logrusLog)
 68 | 
 69 | 	if config.Debug {
 70 | 		logrusLogEntry.Logger.SetLevel(logrus.DebugLevel)
 71 | 	}
 72 | 
 73 | 	// Log format.
 74 | 	switch config.LoggerType {
 75 | 	case commands.LoggerTypeDefault:
 76 | 		logrusLogEntry.Logger.SetFormatter(&logrus.TextFormatter{
 77 | 			ForceColors:   !config.NoColor,
 78 | 			DisableColors: config.NoColor,
 79 | 		})
 80 | 	case commands.LoggerTypeJSON:
 81 | 		logrusLogEntry.Logger.SetFormatter(&logrus.JSONFormatter{})
 82 | 	}
 83 | 
 84 | 	logger := loglogrus.NewLogrus(logrusLogEntry).WithValues(log.Kv{
 85 | 		"version": info.Version,
 86 | 	})
 87 | 
 88 | 	logger.Debugf("Debug level is enabled") // Will log only when debug enabled.
 89 | 
 90 | 	return logger
 91 | }
 92 | 
 93 | func main() {
 94 | 	ctx := context.Background()
 95 | 	err := Run(ctx, os.Args, os.Stdin, os.Stdout, os.Stderr)
 96 | 	if err != nil {
 97 | 		fmt.Fprintf(os.Stderr, "error: %s", err)
 98 | 		os.Exit(1)
 99 | 	}
100 | }
101 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/.helmignore:
--------------------------------------------------------------------------------
 1 | # Patterns to ignore when building packages.
 2 | # This supports shell glob matching, relative path matching, and
 3 | # negation (prefixed with !). Only one pattern per line.
 4 | .DS_Store
 5 | # Common VCS dirs
 6 | .git/
 7 | .gitignore
 8 | .bzr/
 9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *.orig
18 | *~
19 | # Various IDEs
20 | .project
21 | .idea/
22 | *.tmproj
23 | .vscode/
24 | 
25 | # Custom.
26 | tests/
27 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: sloth
3 | description: Base chart for Sloth.
4 | type: application
5 | home: https://github.com/linode-obs/sloth
6 | kubeVersion: ">= 1.19.0-0"
7 | version: 0.8.2
8 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{- define "sloth.name" -}}
 2 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 3 | {{- end }}
 4 | 
 5 | {{- define "sloth.fullname" -}}
 6 | {{- if .Values.fullnameOverride }}
 7 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 8 | {{- else }}
 9 | {{- $name := default .Chart.Name .Values.nameOverride }}
10 | {{- if contains $name .Release.Name }}
11 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
12 | {{- else }}
13 | {{- printf "%s-%s" $name .Release.Name | trunc 63 | trimSuffix "-" }}
14 | {{- end }}
15 | {{- end }}
16 | {{- end }}
17 | 
18 | 
19 | {{- define "sloth.labels" -}}
20 | helm.sh/chart: {{ include "sloth.chart" . }}
21 | {{- if .Chart.AppVersion }}
22 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
23 | {{- end }}
24 | app.kubernetes.io/managed-by: {{ .Release.Service }}
25 | {{ include "sloth.selectorLabels" . }}
26 | {{- with .Values.labels }}
27 | {{ toYaml . }}
28 | {{- end }}
29 | {{- end }}
30 | 
31 | 
32 | 
33 | {{- define "sloth.selectorLabels" -}}
34 | app: {{ include "sloth.name" . }}
35 | app.kubernetes.io/name: {{ include "sloth.name" . }}
36 | app.kubernetes.io/instance: {{ .Release.Name }}
37 | {{- end }}
38 | 
39 | {{- define "sloth.chart" -}}
40 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
41 | {{- end }}
42 | {{- define "sloth.imagePullSecrets" -}}
43 | {{- range .Values.imagePullSecrets }}
44 | - {{ toYaml . | trim }}
45 | {{- end }}
46 | {{- end }}
47 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/templates/cluster-role-binding.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: rbac.authorization.k8s.io/v1
 3 | kind: ClusterRoleBinding
 4 | metadata:
 5 |   name: {{ include "sloth.fullname" . }}
 6 |   labels:
 7 |     {{- include "sloth.labels" . | nindent 4 }}
 8 | roleRef:
 9 |   apiGroup: rbac.authorization.k8s.io
10 |   kind: ClusterRole
11 |   name: {{ include "sloth.fullname" . }}
12 | subjects:
13 |   - kind: ServiceAccount
14 |     name: {{ include "sloth.fullname" . }}
15 |     namespace: {{ .Release.Namespace }}
16 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/templates/cluster-role.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: rbac.authorization.k8s.io/v1
 3 | kind: ClusterRole
 4 | metadata:
 5 |   name: {{ include "sloth.fullname" . }}
 6 |   labels:
 7 |     {{- include "sloth.labels" . | nindent 4 }}
 8 | rules:
 9 |   - apiGroups: ["sloth.slok.dev"]
10 |     resources: ["*"]
11 |     verbs: ["*"]
12 | 
13 |   - apiGroups: ["monitoring.coreos.com"]
14 |     resources: ["prometheusrules"]
15 |     verbs: ["create", "list", "get", "update", "watch"]
16 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/templates/configmap.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.customSloConfig.enabled }}
 2 | apiVersion: v1
 3 | kind: ConfigMap
 4 | metadata:
 5 |   name: {{ include "sloth.fullname" . }}
 6 |   namespace: {{ .Release.Namespace }}
 7 |   labels:
 8 |     {{- include "sloth.labels" . | nindent 4 }}
 9 | data:
10 |   window.yaml: |
11 |     {{- toYaml .Values.customSloConfig.data | nindent 4 }}
12 | {{- end }}
13 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/templates/pod-monitor.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.metrics.enabled }}
 2 | ---
 3 | apiVersion: monitoring.coreos.com/v1
 4 | kind: PodMonitor
 5 | metadata:
 6 |   name: {{ include "sloth.fullname" . }}
 7 |   namespace: {{ .Release.Namespace }}
 8 |   labels:
 9 |     {{- include "sloth.labels" . | nindent 4 }}
10 |     {{- with .Values.metrics.prometheusLabels }}
11 |     {{- toYaml . | nindent 4 }}
12 |     {{- end }}
13 | spec:
14 |   selector:
15 |     matchLabels:
16 |       {{- include "sloth.selectorLabels" . | nindent 6 }}
17 |   podMetricsEndpoints:
18 |     - port: metrics
19 |     {{- with .Values.metrics.scrapeInterval }}
20 |       interval: {{.}}
21 |     {{- end }}
22 | {{- end }}


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/templates/service-account.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | apiVersion: v1
3 | kind: ServiceAccount
4 | metadata:
5 |   name: {{ include "sloth.fullname" . }}
6 |   namespace: {{ .Release.Namespace }}
7 |   labels:
8 |     {{- include "sloth.labels" . | nindent 4 }}
9 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/tests/testdata/output/cluster_role_binding_custom.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Source: sloth/templates/cluster-role-binding.yaml
 3 | apiVersion: rbac.authorization.k8s.io/v1
 4 | kind: ClusterRoleBinding
 5 | metadata:
 6 |   name: sloth-test
 7 |   labels:
 8 |     helm.sh/chart: sloth-<version>
 9 |     app.kubernetes.io/managed-by: Helm
10 |     app: sloth
11 |     app.kubernetes.io/name: sloth
12 |     app.kubernetes.io/instance: test
13 |     label-from: test
14 | roleRef:
15 |   apiGroup: rbac.authorization.k8s.io
16 |   kind: ClusterRole
17 |   name: sloth-test
18 | subjects:
19 |   - kind: ServiceAccount
20 |     name: sloth-test
21 |     namespace: custom
22 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/tests/testdata/output/cluster_role_binding_default.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Source: sloth/templates/cluster-role-binding.yaml
 3 | apiVersion: rbac.authorization.k8s.io/v1
 4 | kind: ClusterRoleBinding
 5 | metadata:
 6 |   name: sloth
 7 |   labels:
 8 |     helm.sh/chart: sloth-<version>
 9 |     app.kubernetes.io/managed-by: Helm
10 |     app: sloth
11 |     app.kubernetes.io/name: sloth
12 |     app.kubernetes.io/instance: sloth
13 | roleRef:
14 |   apiGroup: rbac.authorization.k8s.io
15 |   kind: ClusterRole
16 |   name: sloth
17 | subjects:
18 |   - kind: ServiceAccount
19 |     name: sloth
20 |     namespace: default
21 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/tests/testdata/output/cluster_role_custom.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Source: sloth/templates/cluster-role.yaml
 3 | apiVersion: rbac.authorization.k8s.io/v1
 4 | kind: ClusterRole
 5 | metadata:
 6 |   name: sloth-test
 7 |   labels:
 8 |     helm.sh/chart: sloth-<version>
 9 |     app.kubernetes.io/managed-by: Helm
10 |     app: sloth
11 |     app.kubernetes.io/name: sloth
12 |     app.kubernetes.io/instance: test
13 |     label-from: test
14 | rules:
15 |   - apiGroups: ["sloth.slok.dev"]
16 |     resources: ["*"]
17 |     verbs: ["*"]
18 | 
19 |   - apiGroups: ["monitoring.coreos.com"]
20 |     resources: ["prometheusrules"]
21 |     verbs: ["create", "list", "get", "update", "watch"]
22 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/tests/testdata/output/cluster_role_default.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Source: sloth/templates/cluster-role.yaml
 3 | apiVersion: rbac.authorization.k8s.io/v1
 4 | kind: ClusterRole
 5 | metadata:
 6 |   name: sloth
 7 |   labels:
 8 |     helm.sh/chart: sloth-<version>
 9 |     app.kubernetes.io/managed-by: Helm
10 |     app: sloth
11 |     app.kubernetes.io/name: sloth
12 |     app.kubernetes.io/instance: sloth
13 | rules:
14 |   - apiGroups: ["sloth.slok.dev"]
15 |     resources: ["*"]
16 |     verbs: ["*"]
17 | 
18 |   - apiGroups: ["monitoring.coreos.com"]
19 |     resources: ["prometheusrules"]
20 |     verbs: ["create", "list", "get", "update", "watch"]
21 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/tests/testdata/output/configmap_slo_config.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Source: sloth/templates/configmap.yaml
 3 | apiVersion: v1
 4 | kind: ConfigMap
 5 | metadata:
 6 |   name: sloth-test
 7 |   namespace: custom
 8 |   labels:
 9 |     helm.sh/chart: sloth-<version>
10 |     app.kubernetes.io/managed-by: Helm
11 |     app: sloth
12 |     app.kubernetes.io/name: sloth
13 |     app.kubernetes.io/instance: test
14 |     label-from: test
15 | data:
16 |   window.yaml: |
17 |     customKey: customValue
18 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_custom.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Source: sloth/templates/deployment.yaml
 3 | apiVersion: apps/v1
 4 | kind: Deployment
 5 | metadata:
 6 |   name: sloth-test
 7 |   namespace: custom
 8 |   labels:
 9 |     helm.sh/chart: sloth-<version>
10 |     app.kubernetes.io/managed-by: Helm
11 |     app: sloth
12 |     app.kubernetes.io/name: sloth
13 |     app.kubernetes.io/instance: test
14 |     label-from: test
15 | spec:
16 |   replicas: 1
17 |   selector:
18 |     matchLabels:
19 |       app: sloth
20 |       app.kubernetes.io/name: sloth
21 |       app.kubernetes.io/instance: test
22 |   template:
23 |     metadata:
24 |       labels:
25 |         helm.sh/chart: sloth-<version>
26 |         app.kubernetes.io/managed-by: Helm
27 |         app: sloth
28 |         app.kubernetes.io/name: sloth
29 |         app.kubernetes.io/instance: test
30 |         label-from: test
31 |       annotations:
32 |         kubectl.kubernetes.io/default-container: sloth
33 |     spec:
34 |       serviceAccountName: sloth-test
35 |       securityContext:
36 |         fsGroup: 100
37 |         runAsGroup: 1000
38 |         runAsNonRoot: true
39 |         runAsUser: 100
40 |       containers:
41 |         - name: sloth
42 |           image: linode-obs/sloth-test:v1.42.42
43 |           args:
44 |             - kubernetes-controller
45 |             - --resync-interval=17m
46 |             - --workers=99
47 |             - --namespace=somens
48 |             - --label-selector=x=y,z!=y
49 |             - --extra-labels=k1=v1
50 |             - --extra-labels=k2=v2
51 |             - --sli-plugins-path=/plugins
52 |             - --disable-optimized-rules
53 |             - --logger=default
54 |           ports:
55 |             - containerPort: 8081
56 |               name: metrics
57 |               protocol: TCP
58 |           volumeMounts:
59 |             - name: sloth-common-sli-plugins
60 |               mountPath: /plugins/sloth-common-sli-plugins
61 |           securityContext:
62 |             allowPrivilegeEscalation: false
63 |           resources:
64 |             limits:
65 |               cpu: 50m
66 |               memory: 150Mi
67 |             requests:
68 |               cpu: 5m
69 |               memory: 75Mi
70 |         - name: git-sync-plugins
71 |           image: k8s.gcr.io/git-sync/git-sync:v3.6.1
72 |           args:
73 |             - --repo=https://github.com/slok/sloth-test-common-sli-plugins
74 |             - --branch=main
75 |             - --wait=30
76 |             - --webhook-url=http://localhost:8082/-/reload
77 |           volumeMounts:
78 |             - name: sloth-common-sli-plugins
79 |               # Default path for git-sync.
80 |               mountPath: /tmp/git
81 |           securityContext:
82 |             allowPrivilegeEscalation: false
83 |           resources:
84 |             limits:
85 |               cpu: 50m
86 |               memory: 100Mi
87 |             requests:
88 |               cpu: 5m
89 |               memory: 50Mi
90 |       volumes:
91 |         - name: sloth-common-sli-plugins
92 |           emptyDir: {}
93 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_custom_no_extras.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Source: sloth/templates/deployment.yaml
 3 | apiVersion: apps/v1
 4 | kind: Deployment
 5 | metadata:
 6 |   name: sloth-test
 7 |   namespace: custom
 8 |   labels:
 9 |     helm.sh/chart: sloth-<version>
10 |     app.kubernetes.io/managed-by: Helm
11 |     app: sloth
12 |     app.kubernetes.io/name: sloth
13 |     app.kubernetes.io/instance: test
14 |     label-from: test
15 | spec:
16 |   replicas: 1
17 |   selector:
18 |     matchLabels:
19 |       app: sloth
20 |       app.kubernetes.io/name: sloth
21 |       app.kubernetes.io/instance: test
22 |   template:
23 |     metadata:
24 |       labels:
25 |         helm.sh/chart: sloth-<version>
26 |         app.kubernetes.io/managed-by: Helm
27 |         app: sloth
28 |         app.kubernetes.io/name: sloth
29 |         app.kubernetes.io/instance: test
30 |         label-from: test
31 |       annotations:
32 |         kubectl.kubernetes.io/default-container: sloth
33 |     spec:
34 |       serviceAccountName: sloth-test
35 |       securityContext:
36 |         fsGroup: 100
37 |         runAsGroup: 1000
38 |         runAsNonRoot: true
39 |         runAsUser: 100
40 |       containers:
41 |         - name: sloth
42 |           image: linode-obs/sloth-test:v1.42.42
43 |           args:
44 |             - kubernetes-controller
45 |             - --resync-interval=17m
46 |             - --workers=99
47 |             - --namespace=somens
48 |             - --label-selector=x=y,z!=y
49 |             - --extra-labels=k1=v1
50 |             - --extra-labels=k2=v2
51 |             - --disable-optimized-rules
52 |             - --logger=default
53 |           securityContext:
54 |             allowPrivilegeEscalation: false
55 |           resources:
56 |             limits:
57 |               cpu: 50m
58 |               memory: 150Mi
59 |             requests:
60 |               cpu: 5m
61 |               memory: 75Mi
62 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_custom_slo_config.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Source: sloth/templates/deployment.yaml
 3 | apiVersion: apps/v1
 4 | kind: Deployment
 5 | metadata:
 6 |   name: sloth-test
 7 |   namespace: custom
 8 |   labels:
 9 |     helm.sh/chart: sloth-<version>
10 |     app.kubernetes.io/managed-by: Helm
11 |     app: sloth
12 |     app.kubernetes.io/name: sloth
13 |     app.kubernetes.io/instance: test
14 |     label-from: test
15 | spec:
16 |   replicas: 1
17 |   selector:
18 |     matchLabels:
19 |       app: sloth
20 |       app.kubernetes.io/name: sloth
21 |       app.kubernetes.io/instance: test
22 |   template:
23 |     metadata:
24 |       labels:
25 |         helm.sh/chart: sloth-<version>
26 |         app.kubernetes.io/managed-by: Helm
27 |         app: sloth
28 |         app.kubernetes.io/name: sloth
29 |         app.kubernetes.io/instance: test
30 |         label-from: test
31 |       annotations:
32 |         kubectl.kubernetes.io/default-container: sloth
33 |         checksum/config: <checksum>
34 |     spec:
35 |       serviceAccountName: sloth-test
36 |       securityContext:
37 |         fsGroup: 100
38 |         runAsGroup: 1000
39 |         runAsNonRoot: true
40 |         runAsUser: 100
41 |       containers:
42 |         - name: sloth
43 |           image: linode-obs/sloth-test:v1.42.42
44 |           args:
45 |             - kubernetes-controller
46 |             - --resync-interval=17m
47 |             - --workers=99
48 |             - --namespace=somens
49 |             - --label-selector=x=y,z!=y
50 |             - --extra-labels=k1=v1
51 |             - --extra-labels=k2=v2
52 |             - --disable-optimized-rules
53 |             - --slo-period-windows-path=/windows
54 |             - --logger=default
55 |           ports:
56 |             - containerPort: 8081
57 |               name: metrics
58 |               protocol: TCP
59 |           volumeMounts:
60 |             - name: sloth-windows
61 |               mountPath: /windows
62 |           securityContext:
63 |             allowPrivilegeEscalation: false
64 |           resources:
65 |             limits:
66 |               cpu: 50m
67 |               memory: 150Mi
68 |             requests:
69 |               cpu: 5m
70 |               memory: 75Mi
71 |       volumes:
72 |         - name: sloth-windows
73 |           configMap:
74 |             defaultMode: 420
75 |             name: sloth-test
76 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_default.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Source: sloth/templates/deployment.yaml
 3 | apiVersion: apps/v1
 4 | kind: Deployment
 5 | metadata:
 6 |   name: sloth
 7 |   namespace: default
 8 |   labels:
 9 |     helm.sh/chart: sloth-<version>
10 |     app.kubernetes.io/managed-by: Helm
11 |     app: sloth
12 |     app.kubernetes.io/name: sloth
13 |     app.kubernetes.io/instance: sloth
14 | spec:
15 |   replicas: 1
16 |   selector:
17 |     matchLabels:
18 |       app: sloth
19 |       app.kubernetes.io/name: sloth
20 |       app.kubernetes.io/instance: sloth
21 |   template:
22 |     metadata:
23 |       labels:
24 |         helm.sh/chart: sloth-<version>
25 |         app.kubernetes.io/managed-by: Helm
26 |         app: sloth
27 |         app.kubernetes.io/name: sloth
28 |         app.kubernetes.io/instance: sloth
29 |       annotations:
30 |         kubectl.kubernetes.io/default-container: sloth
31 |     spec:
32 |       serviceAccountName: sloth
33 |       containers:
34 |         - name: sloth
35 |           image: ghcr.io/linode-obs/sloth:v0.13.1
36 |           args:
37 |             - kubernetes-controller
38 |             - --sli-plugins-path=/plugins
39 |             - --logger=default
40 |           ports:
41 |             - containerPort: 8081
42 |               name: metrics
43 |               protocol: TCP
44 |           volumeMounts:
45 |             - name: sloth-common-sli-plugins
46 |               mountPath: /plugins/sloth-common-sli-plugins
47 |           resources:
48 |             limits:
49 |               cpu: 50m
50 |               memory: 150Mi
51 |             requests:
52 |               cpu: 5m
53 |               memory: 75Mi
54 |         - name: git-sync-plugins
55 |           image: k8s.gcr.io/git-sync/git-sync:v3.6.1
56 |           args:
57 |             - --repo=https://github.com/slok/sloth-common-sli-plugins
58 |             - --branch=main
59 |             - --wait=30
60 |             - --webhook-url=http://localhost:8082/-/reload
61 |           volumeMounts:
62 |             - name: sloth-common-sli-plugins
63 |               # Default path for git-sync.
64 |               mountPath: /tmp/git
65 |           resources:
66 |             limits:
67 |               cpu: 50m
68 |               memory: 100Mi
69 |             requests:
70 |               cpu: 5m
71 |               memory: 50Mi
72 |       volumes:
73 |         - name: sloth-common-sli-plugins
74 |           emptyDir: {}
75 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/tests/testdata/output/pod_monitor_custom.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Source: sloth/templates/pod-monitor.yaml
 3 | apiVersion: monitoring.coreos.com/v1
 4 | kind: PodMonitor
 5 | metadata:
 6 |   name: sloth-test
 7 |   namespace: custom
 8 |   labels:
 9 |     helm.sh/chart: sloth-<version>
10 |     app.kubernetes.io/managed-by: Helm
11 |     app: sloth
12 |     app.kubernetes.io/name: sloth
13 |     app.kubernetes.io/instance: test
14 |     label-from: test
15 |     kp1: vp1
16 |     kp2: vp2
17 | spec:
18 |   selector:
19 |     matchLabels:
20 |       app: sloth
21 |       app.kubernetes.io/name: sloth
22 |       app.kubernetes.io/instance: test
23 |   podMetricsEndpoints:
24 |     - port: metrics
25 |       interval: 45s
26 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/tests/testdata/output/pod_monitor_default.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Source: sloth/templates/pod-monitor.yaml
 3 | apiVersion: monitoring.coreos.com/v1
 4 | kind: PodMonitor
 5 | metadata:
 6 |   name: sloth
 7 |   namespace: default
 8 |   labels:
 9 |     helm.sh/chart: sloth-<version>
10 |     app.kubernetes.io/managed-by: Helm
11 |     app: sloth
12 |     app.kubernetes.io/name: sloth
13 |     app.kubernetes.io/instance: sloth
14 | spec:
15 |   selector:
16 |     matchLabels:
17 |       app: sloth
18 |       app.kubernetes.io/name: sloth
19 |       app.kubernetes.io/instance: sloth
20 |   podMetricsEndpoints:
21 |     - port: metrics
22 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/tests/testdata/output/sa_custom.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Source: sloth/templates/service-account.yaml
 3 | apiVersion: v1
 4 | kind: ServiceAccount
 5 | metadata:
 6 |   name: sloth-test
 7 |   namespace: custom
 8 |   labels:
 9 |     helm.sh/chart: sloth-<version>
10 |     app.kubernetes.io/managed-by: Helm
11 |     app: sloth
12 |     app.kubernetes.io/name: sloth
13 |     app.kubernetes.io/instance: test
14 |     label-from: test
15 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/tests/testdata/output/sa_default.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Source: sloth/templates/service-account.yaml
 3 | apiVersion: v1
 4 | kind: ServiceAccount
 5 | metadata:
 6 |   name: sloth
 7 |   namespace: default
 8 |   labels:
 9 |     helm.sh/chart: sloth-<version>
10 |     app.kubernetes.io/managed-by: Helm
11 |     app: sloth
12 |     app.kubernetes.io/name: sloth
13 |     app.kubernetes.io/instance: sloth
14 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/tests/values_test.go:
--------------------------------------------------------------------------------
 1 | package tests
 2 | 
 3 | type msi = map[string]interface{}
 4 | 
 5 | func defaultValues() msi {
 6 | 	return msi{}
 7 | }
 8 | 
 9 | func customValues() msi {
10 | 	return msi{
11 | 		"labels": msi{
12 | 			"label-from": "test",
13 | 		},
14 | 
15 | 		"image": msi{
16 | 			"repository": "linode-obs/sloth-test",
17 | 			"tag":        "v1.42.42",
18 | 		},
19 | 
20 | 		"sloth": msi{
21 | 			"resyncInterval": "17m",
22 | 			"workers":        99,
23 | 			"labelSelector":  `x=y,z!=y`,
24 | 			"namespace":      "somens",
25 | 			"optimizedRules": false,
26 | 			"extraLabels": msi{
27 | 				"k1": "v1",
28 | 				"k2": "v2",
29 | 			},
30 | 		},
31 | 
32 | 		"commonPlugins": msi{
33 | 			"enabled": true,
34 | 			"gitRepo": msi{
35 | 				"url":    "https://github.com/slok/sloth-test-common-sli-plugins",
36 | 				"branch": "main",
37 | 			},
38 | 		},
39 | 
40 | 		"metrics": msi{
41 | 			"enabled":        true,
42 | 			"scrapeInterval": "45s",
43 | 			"prometheusLabels": msi{
44 | 				"kp1": "vp1",
45 | 				"kp2": "vp2",
46 | 			},
47 | 		},
48 | 
49 | 		"customSloConfig": msi{
50 | 			"data": msi{
51 | 				"customKey": "customValue",
52 | 			},
53 | 		},
54 | 
55 | 		"securityContext": msi{
56 | 			"pod": msi{
57 | 				"runAsNonRoot": true,
58 | 				"runAsGroup":   1000,
59 | 				"runAsUser":    100,
60 | 				"fsGroup":      100,
61 | 			},
62 | 			"container": msi{
63 | 				"allowPrivilegeEscalation": false,
64 | 			},
65 | 		},
66 | 	}
67 | }
68 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/helm/sloth/values.yaml:
--------------------------------------------------------------------------------
 1 | labels: {}
 2 | 
 3 | image:
 4 |   repository: ghcr.io/linode-obs/sloth
 5 |   tag: v0.13.1
 6 | 
 7 | # -- Container resources: requests and limits for CPU, Memory
 8 | resources:
 9 |   limits:
10 |     cpu: 50m
11 |     memory: 150Mi
12 |   requests:
13 |     cpu: 5m
14 |     memory: 75Mi
15 | 
16 | imagePullSecrets: []
17 | #  - name: secret1
18 | #  - name: secret2
19 | 
20 | sloth:
21 |   resyncInterval: ""    # The controller resync interval duration (e.g 15m).
22 |   workers: 0            # The number of concurrent controller workers (e.g 5).
23 |   labelSelector: ""     # Sloth will handle only the ones that match the selector.
24 |   namespace: ""         # The namespace where sloth will the CRs to process.
25 |   extraLabels: {}       # Labels that will be added to all the generated SLO Rules.
26 |   defaultSloPeriod: ""  # The slo period used by sloth (e.g. 30d).
27 |   optimizedRules: true  # Reduce prom load for calculating period window burnrates.
28 |   debug:
29 |     enabled: false
30 |   # Could be: default or json
31 |   logger: default
32 | 
33 | commonPlugins:
34 |   enabled: true
35 |   image:
36 |     repository: k8s.gcr.io/git-sync/git-sync
37 |     tag: v3.6.1
38 |   gitRepo:
39 |     url: https://github.com/slok/sloth-common-sli-plugins
40 |     branch: main
41 |     resources:
42 |       limits:
43 |         cpu: 50m
44 |         memory: 100Mi
45 |       requests:
46 |         cpu: 5m
47 |         memory: 50Mi
48 | 
49 | metrics:
50 |   enabled: true
51 |   #scrapeInterval: 30s
52 |   prometheusLabels: {}
53 | 
54 | customSloConfig:
55 |   enabled: false
56 |   path: /windows
57 |   data: {}
58 | #    apiVersion: sloth.slok.dev/v1
59 | #    kind: AlertWindows
60 | #    spec:
61 | #    ... See https://sloth.dev/usage/slo-period-windows/
62 | 
63 | # add deployment pod tolerations
64 | # tolerations:
65 | #   - key: kubernetes.azure.com/scalesetpriority
66 | #     operator: Equal
67 | #     value: spot
68 | #     effect: NoSchedule
69 | 
70 | securityContext:
71 |   pod: null
72 |   #   fsGroup: 100
73 |   #   runAsGroup: 1000
74 |   #   runAsNonRoot: true
75 |   #   runAsUser: 100
76 |   container: null
77 |   #   allowPrivilegeEscalation: false
78 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 | 
4 | resources:
5 | - raw/sloth-with-common-plugins.yaml


--------------------------------------------------------------------------------
/deploy/kubernetes/raw/sloth-with-common-plugins.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | # Source: sloth/templates/service-account.yaml
  3 | apiVersion: v1
  4 | kind: ServiceAccount
  5 | metadata:
  6 |   name: sloth
  7 |   namespace: monitoring
  8 |   labels:
  9 |     helm.sh/chart: sloth-0.6.4
 10 |     app.kubernetes.io/managed-by: Helm
 11 |     app: sloth
 12 |     app.kubernetes.io/name: sloth
 13 |     app.kubernetes.io/instance: sloth
 14 | ---
 15 | # Source: sloth/templates/cluster-role.yaml
 16 | apiVersion: rbac.authorization.k8s.io/v1
 17 | kind: ClusterRole
 18 | metadata:
 19 |   name: sloth
 20 |   labels:
 21 |     helm.sh/chart: sloth-0.6.4
 22 |     app.kubernetes.io/managed-by: Helm
 23 |     app: sloth
 24 |     app.kubernetes.io/name: sloth
 25 |     app.kubernetes.io/instance: sloth
 26 | rules:
 27 |   - apiGroups: ["sloth.slok.dev"]
 28 |     resources: ["*"]
 29 |     verbs: ["*"]
 30 | 
 31 |   - apiGroups: ["monitoring.coreos.com"]
 32 |     resources: ["prometheusrules"]
 33 |     verbs: ["create", "list", "get", "update", "watch"]
 34 | ---
 35 | # Source: sloth/templates/cluster-role-binding.yaml
 36 | apiVersion: rbac.authorization.k8s.io/v1
 37 | kind: ClusterRoleBinding
 38 | metadata:
 39 |   name: sloth
 40 |   labels:
 41 |     helm.sh/chart: sloth-0.6.4
 42 |     app.kubernetes.io/managed-by: Helm
 43 |     app: sloth
 44 |     app.kubernetes.io/name: sloth
 45 |     app.kubernetes.io/instance: sloth
 46 | roleRef:
 47 |   apiGroup: rbac.authorization.k8s.io
 48 |   kind: ClusterRole
 49 |   name: sloth
 50 | subjects:
 51 |   - kind: ServiceAccount
 52 |     name: sloth
 53 |     namespace: monitoring
 54 | ---
 55 | # Source: sloth/templates/deployment.yaml
 56 | apiVersion: apps/v1
 57 | kind: Deployment
 58 | metadata:
 59 |   name: sloth
 60 |   namespace: monitoring
 61 |   labels:
 62 |     helm.sh/chart: sloth-0.6.4
 63 |     app.kubernetes.io/managed-by: Helm
 64 |     app: sloth
 65 |     app.kubernetes.io/name: sloth
 66 |     app.kubernetes.io/instance: sloth
 67 | spec:
 68 |   replicas: 1
 69 |   selector:
 70 |     matchLabels:
 71 |       app: sloth
 72 |       app.kubernetes.io/name: sloth
 73 |       app.kubernetes.io/instance: sloth
 74 |   template:
 75 |     metadata:
 76 |       labels:
 77 |         helm.sh/chart: sloth-0.6.4
 78 |         app.kubernetes.io/managed-by: Helm
 79 |         app: sloth
 80 |         app.kubernetes.io/name: sloth
 81 |         app.kubernetes.io/instance: sloth
 82 |       annotations:
 83 |         kubectl.kubernetes.io/default-container: sloth
 84 |     spec:
 85 |       serviceAccountName: sloth
 86 |       containers:
 87 |         - name: sloth
 88 |           image: ghcr.io/linode-obs/sloth:v0.13.1
 89 |           args:
 90 |             - kubernetes-controller
 91 |             - --sli-plugins-path=/plugins
 92 |           ports:
 93 |             - containerPort: 8081
 94 |               name: metrics
 95 |               protocol: TCP
 96 |           volumeMounts:
 97 |             - name: sloth-common-sli-plugins
 98 |               mountPath: /plugins/sloth-common-sli-plugins
 99 |           resources:
100 |             limits:
101 |               cpu: 50m
102 |               memory: 150Mi
103 |             requests:
104 |               cpu: 5m
105 |               memory: 75Mi
106 |         - name: git-sync-plugins
107 |           image: k8s.gcr.io/git-sync/git-sync:v3.6.1
108 |           args:
109 |             - --repo=https://github.com/slok/sloth-common-sli-plugins
110 |             - --branch=main
111 |             - --wait=30
112 |             - --webhook-url=http://localhost:8082/-/reload
113 |           volumeMounts:
114 |             - name: sloth-common-sli-plugins
115 |               # Default path for git-sync.
116 |               mountPath: /tmp/git
117 |           resources:
118 |             limits:
119 |               cpu: 50m
120 |               memory: 100Mi
121 |             requests:
122 |               cpu: 5m
123 |               memory: 50Mi
124 |       volumes:
125 |         - name: sloth-common-sli-plugins
126 |           emptyDir: {}
127 | ---
128 | # Source: sloth/templates/pod-monitor.yaml
129 | apiVersion: monitoring.coreos.com/v1
130 | kind: PodMonitor
131 | metadata:
132 |   name: sloth
133 |   namespace: monitoring
134 |   labels:
135 |     helm.sh/chart: sloth-0.6.4
136 |     app.kubernetes.io/managed-by: Helm
137 |     app: sloth
138 |     app.kubernetes.io/name: sloth
139 |     app.kubernetes.io/instance: sloth
140 | spec:
141 |   selector:
142 |     matchLabels:
143 |       app: sloth
144 |       app.kubernetes.io/name: sloth
145 |       app.kubernetes.io/instance: sloth
146 |   podMetricsEndpoints:
147 |     - port: metrics
148 | 


--------------------------------------------------------------------------------
/deploy/kubernetes/raw/sloth.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | # Source: sloth/templates/service-account.yaml
  3 | apiVersion: v1
  4 | kind: ServiceAccount
  5 | metadata:
  6 |   name: sloth
  7 |   namespace: monitoring
  8 |   labels:
  9 |     helm.sh/chart: sloth-0.6.4
 10 |     app.kubernetes.io/managed-by: Helm
 11 |     app: sloth
 12 |     app.kubernetes.io/name: sloth
 13 |     app.kubernetes.io/instance: sloth
 14 | ---
 15 | # Source: sloth/templates/cluster-role.yaml
 16 | apiVersion: rbac.authorization.k8s.io/v1
 17 | kind: ClusterRole
 18 | metadata:
 19 |   name: sloth
 20 |   labels:
 21 |     helm.sh/chart: sloth-0.6.4
 22 |     app.kubernetes.io/managed-by: Helm
 23 |     app: sloth
 24 |     app.kubernetes.io/name: sloth
 25 |     app.kubernetes.io/instance: sloth
 26 | rules:
 27 |   - apiGroups: ["sloth.slok.dev"]
 28 |     resources: ["*"]
 29 |     verbs: ["*"]
 30 | 
 31 |   - apiGroups: ["monitoring.coreos.com"]
 32 |     resources: ["prometheusrules"]
 33 |     verbs: ["create", "list", "get", "update", "watch"]
 34 | ---
 35 | # Source: sloth/templates/cluster-role-binding.yaml
 36 | apiVersion: rbac.authorization.k8s.io/v1
 37 | kind: ClusterRoleBinding
 38 | metadata:
 39 |   name: sloth
 40 |   labels:
 41 |     helm.sh/chart: sloth-0.6.4
 42 |     app.kubernetes.io/managed-by: Helm
 43 |     app: sloth
 44 |     app.kubernetes.io/name: sloth
 45 |     app.kubernetes.io/instance: sloth
 46 | roleRef:
 47 |   apiGroup: rbac.authorization.k8s.io
 48 |   kind: ClusterRole
 49 |   name: sloth
 50 | subjects:
 51 |   - kind: ServiceAccount
 52 |     name: sloth
 53 |     namespace: monitoring
 54 | ---
 55 | # Source: sloth/templates/deployment.yaml
 56 | apiVersion: apps/v1
 57 | kind: Deployment
 58 | metadata:
 59 |   name: sloth
 60 |   namespace: monitoring
 61 |   labels:
 62 |     helm.sh/chart: sloth-0.6.4
 63 |     app.kubernetes.io/managed-by: Helm
 64 |     app: sloth
 65 |     app.kubernetes.io/name: sloth
 66 |     app.kubernetes.io/instance: sloth
 67 | spec:
 68 |   replicas: 1
 69 |   selector:
 70 |     matchLabels:
 71 |       app: sloth
 72 |       app.kubernetes.io/name: sloth
 73 |       app.kubernetes.io/instance: sloth
 74 |   template:
 75 |     metadata:
 76 |       labels:
 77 |         helm.sh/chart: sloth-0.6.4
 78 |         app.kubernetes.io/managed-by: Helm
 79 |         app: sloth
 80 |         app.kubernetes.io/name: sloth
 81 |         app.kubernetes.io/instance: sloth
 82 |       annotations:
 83 |         kubectl.kubernetes.io/default-container: sloth
 84 |     spec:
 85 |       serviceAccountName: sloth
 86 |       containers:
 87 |         - name: sloth
 88 |           image: ghcr.io/linode-obs/sloth:v0.13.1
 89 |           args:
 90 |             - kubernetes-controller
 91 |           ports:
 92 |             - containerPort: 8081
 93 |               name: metrics
 94 |               protocol: TCP
 95 |           resources:
 96 |             limits:
 97 |               cpu: 50m
 98 |               memory: 150Mi
 99 |             requests:
100 |               cpu: 5m
101 |               memory: 75Mi
102 | ---
103 | # Source: sloth/templates/pod-monitor.yaml
104 | apiVersion: monitoring.coreos.com/v1
105 | kind: PodMonitor
106 | metadata:
107 |   name: sloth
108 |   namespace: monitoring
109 |   labels:
110 |     helm.sh/chart: sloth-0.6.4
111 |     app.kubernetes.io/managed-by: Helm
112 |     app: sloth
113 |     app.kubernetes.io/name: sloth
114 |     app.kubernetes.io/instance: sloth
115 | spec:
116 |   selector:
117 |     matchLabels:
118 |       app: sloth
119 |       app.kubernetes.io/name: sloth
120 |       app.kubernetes.io/instance: sloth
121 |   podMetricsEndpoints:
122 |     - port: metrics
123 | 


--------------------------------------------------------------------------------
/docker/dev/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM golang:1.23
 2 | 
 3 | LABEL org.opencontainers.image.source https://github.com/slok/sloth
 4 | 
 5 | ARG GOLANGCI_LINT_VERSION="1.61.0"
 6 | ARG MOCKERY_VERSION="2.46.3"
 7 | ARG GOMARKDOC_VERSION="0.4.1"
 8 | ARG HELM_VERSION="3.10.0"
 9 | ARG ostype=Linux
10 | 
11 | RUN apt-get update && apt-get install -y \
12 |     git \
13 |     bash \
14 |     zip
15 | 
16 | 
17 | RUN wget https://github.com/golangci/golangci-lint/releases/download/v${GOLANGCI_LINT_VERSION}/golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64.tar.gz && \
18 |     tar zxvf golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64.tar.gz --strip 1 -C /usr/local/bin/ && \
19 |     rm golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64.tar.gz && \
20 |     \
21 |     wget https://github.com/vektra/mockery/releases/download/v${MOCKERY_VERSION}/mockery_${MOCKERY_VERSION}_Linux_x86_64.tar.gz && \
22 |     tar zxvf mockery_${MOCKERY_VERSION}_Linux_x86_64.tar.gz -C /tmp && \
23 |     mv /tmp/mockery /usr/local/bin/ && \
24 |     rm mockery_${MOCKERY_VERSION}_Linux_x86_64.tar.gz && \
25 |     \
26 |     wget https://github.com/princjef/gomarkdoc/releases/download/v${GOMARKDOC_VERSION}/gomarkdoc_${GOMARKDOC_VERSION}_linux_amd64.tar.gz && \
27 |     tar zxvf gomarkdoc_${GOMARKDOC_VERSION}_linux_amd64.tar.gz -C /tmp && \
28 |     mv /tmp/gomarkdoc_${GOMARKDOC_VERSION}_linux_amd64/gomarkdoc /usr/local/bin/ && \
29 |     rm -rf gomarkdoc_${GOMARKDOC_VERSION}_linux_amd64.tar.gz /tmp/gomarkdoc_${GOMARKDOC_VERSION}_linux_amd64 && \
30 |     \
31 |     wget https://get.helm.sh/helm-v${HELM_VERSION}-linux-amd64.tar.gz && \
32 |     tar zxvf helm-v${HELM_VERSION}-linux-amd64.tar.gz -C /tmp && \
33 |     mv /tmp/linux-amd64/helm /usr/local/bin/ && \
34 |     rm -rf helm-v${HELM_VERSION}-linux-amd64.tar.gz /tmp/linux-amd64
35 | 
36 | 
37 | # Create user.
38 | ARG uid=1000
39 | ARG gid=1000
40 | 
41 | RUN bash -c 'if [ ${ostype} == Linux ]; then addgroup -gid $gid app; else addgroup app; fi && \
42 |     adduser --disabled-password -uid $uid --ingroup app --gecos "" app && \
43 |     chown app:app -R /go'
44 | 
45 | # Fill Go apps cache:
46 | # Main app.
47 | RUN mkdir -p /tmp/cache
48 | COPY go.mod /tmp/cache
49 | COPY go.sum /tmp/cache
50 | RUN chown app:app -R /tmp/cache
51 | USER app
52 | RUN cd /tmp/cache && \
53 |     go mod download
54 | 
55 | # Helm testing app.
56 | USER root
57 | RUN mkdir -p /tmp/cache
58 | COPY deploy/kubernetes/helm/sloth/tests/go.mod /tmp/cache
59 | COPY deploy/kubernetes/helm/sloth/tests/go.sum /tmp/cache
60 | RUN chown app:app -R /tmp/cache
61 | USER app
62 | RUN cd /tmp/cache && \
63 |     go mod download
64 | 
65 | USER app
66 | WORKDIR /src
67 | 


--------------------------------------------------------------------------------
/docker/prod/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Set also `ARCH` ARG here so we can use it on all the `FROM`s.
 2 | ARG ARCH
 3 | 
 4 | FROM golang:1.23-alpine as build-stage
 5 | 
 6 | LABEL org.opencontainers.image.source https://github.com/slok/sloth
 7 | 
 8 | RUN apk --no-cache add \
 9 |     g++ \
10 |     git \
11 |     make \
12 |     curl \
13 |     bash
14 | 
15 | # Required by the built script for setting verion and cross-compiling.
16 | ARG VERSION
17 | ENV VERSION=${VERSION}
18 | ARG ARCH
19 | ENV GOARCH=${ARCH}
20 | 
21 | # Compile.
22 | WORKDIR /src
23 | COPY . .
24 | RUN ./scripts/build/bin/build-raw.sh
25 | 
26 | 
27 | # Although we are on an specific architecture (normally linux/amd64) our go binary has been built for
28 | # ${ARCH} specific architecture.
29 | # To make portable our building process we base our final image on that same architecture as the binary
30 | # to obtain a resulting ${ARCH} image independently where we are building this image.
31 | FROM gcr.io/distroless/static:nonroot-${ARCH}
32 | 
33 | COPY --from=build-stage /src/bin/sloth /usr/local/bin/sloth
34 | 
35 | ENTRYPOINT ["/usr/local/bin/sloth"]
36 | 


--------------------------------------------------------------------------------
/docs/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linode-obs/sloth/a80a2f7e75d32e40e2cb2a2eaf205c0f8b3e97f0/docs/img/logo.png


--------------------------------------------------------------------------------
/docs/img/sloth_small_dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/linode-obs/sloth/a80a2f7e75d32e40e2cb2a2eaf205c0f8b3e97f0/docs/img/sloth_small_dashboard.png


--------------------------------------------------------------------------------
/examples/custom_rule_group_interval.yml:
--------------------------------------------------------------------------------
 1 | # This example shows how you can adjust the Prometheus rule_group interval for expensive SLOs
 2 | # https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/#rule_group
 3 | # The SLO SLI measures the rate of CPU seconds spent performing softirqs
 4 | #
 5 | # `sloth generate -i ./examples/custom_rule_group_interval.yml`
 6 | #
 7 | version: "prometheus/v1"
 8 | service: "myapp"
 9 | labels:
10 |   owner: "myteam"
11 | slos:
12 |   - name: "cpu-availability"
13 |     objective: 99.99
14 |     description: "Example, expensive SLO. Recording rules will run every 2 minutes."
15 |     # alternative way of specifying interval for all three sets of rules
16 |     # interval:
17 |     #   all: "5m"
18 |     interval: # all of these are different sets of rule groups sloth can make
19 |       slierror: "4m"
20 |       metadata: "2m"
21 |       alert: "2m"
22 |     sli:
23 |       events:
24 |         error_query: |
25 |           sum(
26 |             rate(node_cpu_seconds_total{mode="softirq"}[{{.window}}])
27 |           )
28 |         total_query: |
29 |           sum(
30 |             rate(node_cpu_seconds_total[{{.window}}])
31 |           )
32 |     alerting:
33 |       name: MyServiceHighErrorRate
34 |       labels:
35 |         category: "availability"
36 |       annotations:
37 |         summary: "High error rate on 'myservice' requests responses"
38 |       page_alert:
39 |           labels:
40 |             severity: pageteam
41 |             routing_key: myteam
42 |       ticket_alert:
43 |         disable: true
44 | 


--------------------------------------------------------------------------------
/examples/getting-started.yml:
--------------------------------------------------------------------------------
 1 | version: "prometheus/v1"
 2 | service: "myservice"
 3 | labels:
 4 |   owner: "myteam"
 5 |   repo: "myorg/myservice"
 6 |   tier: "2"
 7 | slos:
 8 |   # We allow failing (5xx and 429) 1 request every 1000 requests (99.9%).
 9 |   - name: "requests-availability"
10 |     objective: 99.9
11 |     description: "Common SLO based on availability for HTTP request responses."
12 |     sli:
13 |       events:
14 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
15 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
16 |     alerting:
17 |       name: MyServiceHighErrorRate
18 |       labels:
19 |         category: "availability"
20 |       annotations:
21 |         # Overwrite default Sloth SLO alert summmary on ticket and page alerts.
22 |         summary: "High error rate on 'myservice' requests responses"
23 |       page_alert:
24 |         labels:
25 |           severity: pageteam
26 |           routing_key: myteam
27 |       ticket_alert:
28 |         labels:
29 |           severity: "slack"
30 |           slack_channel: "#alerts-myteam"
31 | 


--------------------------------------------------------------------------------
/examples/home-wifi.yml:
--------------------------------------------------------------------------------
 1 | # This example shows a real service level used in my home to have SLOs on my wifi signal.
 2 | # The metrics are extracted using unifi-poller (https://github.com/unifi-poller/unifi-poller)
 3 | # that gets the information from an Ubiquiti Wifi installation.
 4 | # https://community.ui.com/questions/satisfaction-percentage-in-client-properties-overview/8c940637-63d0-41de-a67b-8166cdd0ed32
 5 | #
 6 | # The service level has 2 SLOs based on `client_satisfaction_ratio`, this is a ratio calculated
 7 | # by ubiquiti that is based on wifi drop packages, wifi signal...
 8 | # We conside an SLI event the client satisfactions that currently exist, lets review the SLOs
 9 | #
10 | # - `good-wifi-client-satisfaction`
11 | #   - This SLO warn us that we don't have a good wifi at home.
12 | #   - SLI error: We consider a bad client satisfaction (event) below 75% (0.75)
13 | #   - SLO objective (95%): We are not so restrictive and we allow that that 5 of every 100 clients be below 75%
14 | #
15 | # - `risk-wifi-client-satisfaction`
16 | #   - This SLO warn us that we something very bad is happenning with our home wifi.
17 | #   - SLI error: We consider a bad client satisfaction (event) below 50% (0.5)
18 | #   - SLO objective(99.9%): We are very restrictive and we allow that that 1 of every 1000 clients be below 50%
19 | #
20 | # `sloth generate -i ./examples/home-wifi.yml`
21 | #
22 | version: "prometheus/v1"
23 | service: "home-wifi"
24 | labels:
25 |   cluster: "valhalla"
26 |   component: "ubiquiti"
27 |   context: "home"
28 | slos:
29 |   - name: "good-wifi-client-satisfaction"
30 |     objective: 95
31 |     description: "Will warn us that we don't have a good wifi at home."
32 |     sli:
33 |       events:
34 |         error_query: sum_over_time((count(unifipoller_client_satisfaction_ratio < 0.75))[{{.window}}:]) OR on() vector(0)
35 |         total_query: sum_over_time((count(unifipoller_client_satisfaction_ratio))[{{.window}}:])
36 |     alerting:
37 |       name: GoodWifiClientSatisfaction
38 |       page_alert:
39 |         labels:
40 |           severity: home
41 |       ticket_alert:
42 |         labels:
43 |           severity: warning
44 | 
45 |   - name: "risk-wifi-client-satisfaction"
46 |     objective: 99.9
47 |     description: "Will warn us that we something very bad is happenning with our home wifi."
48 |     sli:
49 |       events:
50 |         error_query: sum_over_time((count(unifipoller_client_satisfaction_ratio < 0.5))[{{.window}}:]) OR on() vector(0)
51 |         total_query: sum_over_time((count(unifipoller_client_satisfaction_ratio))[{{.window}}:])
52 |     alerting:
53 |       name: RiskWifiClientSatisfaction
54 |       page_alert:
55 |         labels:
56 |           severity: home
57 |       ticket_alert:
58 |         labels:
59 |           severity: warning
60 | 


--------------------------------------------------------------------------------
/examples/k8s-getting-started.yml:
--------------------------------------------------------------------------------
 1 | # This example shows the same example as getting-started.yml but using Sloth Kubernetes CRD.
 2 | # It will generate the Prometheus rules in a Kubernetes prometheus-operator PrometheusRules CRD.
 3 | #
 4 | # `sloth generate -i ./examples/k8s-getting-started.yml`
 5 | #
 6 | apiVersion: sloth.slok.dev/v1
 7 | kind: PrometheusServiceLevel
 8 | metadata:
 9 |   name: sloth-slo-my-service
10 |   namespace: monitoring
11 | spec:
12 |   service: "myservice"
13 |   labels:
14 |     owner: "myteam"
15 |     repo: "myorg/myservice"
16 |     tier: "2"
17 |   slos:
18 |     - name: "requests-availability"
19 |       objective: 99.9
20 |       description: "Common SLO based on availability for HTTP request responses."
21 |       sli:
22 |         events:
23 |           errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
24 |           totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
25 |       alerting:
26 |         name: MyServiceHighErrorRate
27 |         labels:
28 |           category: "availability"
29 |         annotations:
30 |           summary: "High error rate on 'myservice' requests responses"
31 |         pageAlert:
32 |           labels:
33 |             severity: pageteam
34 |             routing_key: myteam
35 |         ticketAlert:
36 |           labels:
37 |             severity: "slack"
38 |             slack_channel: "#alerts-myteam"
39 | 


--------------------------------------------------------------------------------
/examples/k8s-home-wifi.yml:
--------------------------------------------------------------------------------
 1 | # This example shows the same example as home-wifi.yml but using Sloth Kubernetes CRD.
 2 | # It will generate the Prometheus rules in a Kubernetes prometheus-operator PrometheusRules CRD.
 3 | #
 4 | # `sloth generate -i ./examples/k8s-home-wifi.yml`
 5 | #
 6 | apiVersion: sloth.slok.dev/v1
 7 | kind: PrometheusServiceLevel
 8 | metadata:
 9 |   name: sloth-slo-home-wifi
10 |   namespace: monitoring
11 |   labels:
12 |     prometheus: prometheus
13 |     role: alert-rules
14 |     app: sloth
15 | spec:
16 |   service: "home-wifi"
17 |   labels:
18 |     cluster: "valhalla"
19 |     component: "ubiquiti"
20 |     context: "home"
21 |   slos:
22 |     - name: "good-wifi-client-satisfaction"
23 |       objective: 95
24 |       description: "Will warn us that we don't have a good wifi at home."
25 |       sli:
26 |         events:
27 |           errorQuery: sum_over_time((count(unifipoller_client_satisfaction_ratio < 0.75))[{{.window}}:]) OR on() vector(0)
28 |           totalQuery: sum_over_time((count(unifipoller_client_satisfaction_ratio))[{{.window}}:])
29 |       alerting:
30 |         name: GoodWifiClientSatisfaction
31 |         pageAlert:
32 |           labels:
33 |             severity: home
34 |         ticketAlert:
35 |           labels:
36 |             severity: warning
37 | 
38 |     - name: "risk-wifi-client-satisfaction"
39 |       objective: 99.9
40 |       description: "Will warn us that we something very bad is happenning with our home wifi."
41 |       sli:
42 |         events:
43 |           errorQuery: sum_over_time((count(unifipoller_client_satisfaction_ratio < 0.5))[{{.window}}:]) OR on() vector(0)
44 |           totalQuery: sum_over_time((count(unifipoller_client_satisfaction_ratio))[{{.window}}:])
45 |       alerting:
46 |         name: RiskWifiClientSatisfaction
47 |         pageAlert:
48 |           labels:
49 |             severity: home
50 |         ticketAlert:
51 |           labels:
52 |             severity: warning
53 | 


--------------------------------------------------------------------------------
/examples/k8s-multifile.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # This example shows the same example as getting-started.yml but using Sloth Kubernetes CRD and multifile.
 3 | # It will generate the Prometheus rules in a Kubernetes prometheus-operator PrometheusRules CRD.
 4 | #
 5 | # `sloth generate -i ./examples/k8s-multifile.yml`
 6 | #
 7 | apiVersion: sloth.slok.dev/v1
 8 | kind: PrometheusServiceLevel
 9 | metadata:
10 |   name: sloth-slo-my-service
11 |   namespace: monitoring
12 | spec:
13 |   service: "myservice"
14 |   labels:
15 |     owner: "myteam"
16 |     repo: "myorg/myservice"
17 |     tier: "2"
18 |   slos:
19 |     - name: "requests-availability"
20 |       objective: 99.9
21 |       description: "Common SLO based on availability for HTTP request responses."
22 |       sli:
23 |         events:
24 |           errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
25 |           totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
26 |       alerting:
27 |         name: MyServiceHighErrorRate
28 |         labels:
29 |           category: "availability"
30 |         annotations:
31 |           summary: "High error rate on 'myservice' requests responses"
32 |         pageAlert:
33 |           labels:
34 |             severity: pageteam
35 |             routing_key: myteam
36 |         ticketAlert:
37 |           labels:
38 |             severity: "slack"
39 |             slack_channel: "#alerts-myteam"
40 | ---
41 | apiVersion: sloth.slok.dev/v1
42 | kind: PrometheusServiceLevel
43 | metadata:
44 |   name: sloth-slo-my-service2
45 |   namespace: monitoring
46 | spec:
47 |   service: "myservice2"
48 |   labels:
49 |     owner: "myteam2"
50 |     repo: "myorg/myservice2"
51 |     tier: "1"
52 |   slos:
53 |     - name: "requests-availability"
54 |       objective: 99.99
55 |       description: "Common SLO based on availability for HTTP request responses."
56 |       sli:
57 |         events:
58 |           errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
59 |           totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
60 |       alerting:
61 |         name: MyServiceHighErrorRate
62 |         labels:
63 |           category: "availability"
64 |         annotations:
65 |           summary: "High error rate on 'myservice' requests responses"
66 |         pageAlert:
67 |           labels:
68 |             severity: pageteam
69 |             routing_key: myteam
70 |         ticketAlert:
71 |           labels:
72 |             severity: "slack"
73 |             slack_channel: "#alerts-myteam"
74 | 


--------------------------------------------------------------------------------
/examples/kubernetes-apiserver.yml:
--------------------------------------------------------------------------------
 1 | # This example shows a real service level used for Kubernetes Apiserver.
 2 | #
 3 | # The service level has 2 SLOs based on Apiserver requests/responses.
 4 | #
 5 | # We consider an SLI event the the requests made to the server, lets review the SLOs
 6 | #
 7 | # - `requests-availability`
 8 | #   - This SLO warn us that we are returning correctly the requests to the clients (kubectl users, controllers...).
 9 | #   - SLI error: We consider a bad request (event) a request with the codes >=500 or 429
10 | #   - SLO objective (99.9%): We are restrictive with this because we only allow failing a request every 1000.
11 | #
12 | # - `requests-latency`
13 | #   - This SLO warn us that we apiserver responses are being slow and this will affect the clients  (kubectl users, controllers...).
14 | #   - SLI error: We consider a bad request (event) when the response latency is <400ms.
15 | #   - SLO objective(99%): We have a relaxed objective because Kubernetes has a lot of async and eventual consistency flows. We could
16 | #                         create in a future another SLO that is less restrictive and use the latency of the realtime requests (e.g: kubectl).
17 | #
18 | # `sloth generate -i ./examples/kubernetes-apiserver.yml`
19 | #
20 | version: "prometheus/v1"
21 | service: "k8s-apiserver"
22 | labels:
23 |   cluster: "valhalla"
24 |   component: "kubernetes"
25 | slos:
26 |   - name: "requests-availability"
27 |     objective: 99.9
28 |     description: "Warn that we are returning correctly the requests to the clients (kubectl users, controllers...)."
29 |     labels:
30 |       category: availability
31 |     sli:
32 |       events:
33 |         error_query: sum(rate(apiserver_request_total{code=~"(5..|429)"}[{{.window}}]))
34 |         total_query: sum(rate(apiserver_request_total[{{.window}}]))
35 |     alerting:
36 |       name: K8sApiserverAvailabilityAlert
37 |       labels:
38 |         category: "availability"
39 |       annotations:
40 |         runbook: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh"
41 |       page_alert:
42 |         labels:
43 |           severity: critical
44 |       ticket_alert:
45 |         labels:
46 |           severity: warning
47 | 
48 |   - name: "requests-latency"
49 |     objective: 99
50 |     description: "Warn that we apiserver responses are being slow and this will affect the clients  (kubectl users, controllers...)."
51 |     labels:
52 |       category: latency
53 |     sli:
54 |       events:
55 |         error_query: |
56 |           (
57 |             sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[{{.window}}]))
58 |             -
59 |             sum(rate(apiserver_request_duration_seconds_bucket{le="0.4",verb!="WATCH"}[{{.window}}]))
60 |           )
61 |         total_query: sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[{{.window}}]))
62 |     alerting:
63 |       name: K8sApiserverLatencyAlert
64 |       labels:
65 |         category: "latency"
66 |       annotations:
67 |         runbook: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh"
68 |       page_alert:
69 |         labels:
70 |           severity: critical
71 |       ticket_alert:
72 |         labels:
73 |           severity: warning
74 | 


--------------------------------------------------------------------------------
/examples/multifile.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: "prometheus/v1"
 3 | service: "myservice"
 4 | labels:
 5 |   owner: "myteam"
 6 |   repo: "myorg/myservice"
 7 |   tier: "2"
 8 | slos:
 9 |   # We allow failing (5xx and 429) 1 request every 1000 requests (99.9%).
10 |   - name: "requests-availability"
11 |     objective: 99.9
12 |     description: "Common SLO based on availability for HTTP request responses."
13 |     sli:
14 |       events:
15 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
16 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
17 |     alerting:
18 |       name: MyServiceHighErrorRate
19 |       labels:
20 |         category: "availability"
21 |       annotations:
22 |         # Overwrite default Sloth SLO alert summmary on ticket and page alerts.
23 |         summary: "High error rate on 'myservice' requests responses"
24 |       page_alert:
25 |         labels:
26 |           severity: pageteam
27 |           routing_key: myteam
28 |       ticket_alert:
29 |         labels:
30 |           severity: "slack"
31 |           slack_channel: "#alerts-myteam"
32 | 
33 | ---
34 | version: "prometheus/v1"
35 | service: "myservice2"
36 | labels:
37 |   owner: "myteam2"
38 |   repo: "myorg/myservice2"
39 |   tier: "1"
40 | slos:
41 |   # We allow failing (5xx and 429) 1 request every 1000 requests (99.9%).
42 |   - name: "requests-availability"
43 |     objective: 99.99
44 |     description: "Common SLO based on availability for HTTP request responses."
45 |     sli:
46 |       events:
47 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
48 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
49 |     alerting:
50 |       name: MyServiceHighErrorRate
51 |       labels:
52 |         category: "availability"
53 |       annotations:
54 |         # Overwrite default Sloth SLO alert summmary on ticket and page alerts.
55 |         summary: "High error rate on 'myservice' requests responses"
56 |       page_alert:
57 |         labels:
58 |           severity: pageteam
59 |           routing_key: myteam
60 |       ticket_alert:
61 |         labels:
62 |           severity: "slack"
63 |           slack_channel: "#alerts-myteam"
64 | 


--------------------------------------------------------------------------------
/examples/no-alerts.yml:
--------------------------------------------------------------------------------
 1 | # This example shows a simple service level by implementing a single SLO without alerts.
 2 | # It disables page (critical) and ticket (warning) alerts.
 3 | # The SLO SLI measures the event errors as the http request respones with the code >=500 and 429.
 4 | #
 5 | # `sloth generate -i ./examples/no-alerts.yml`
 6 | #
 7 | version: "prometheus/v1"
 8 | service: "myapp"
 9 | labels:
10 |   owner: "myteam"
11 | slos:
12 |   - name: "http-availability"
13 |     objective: 99.99
14 |     description: "Common SLO based on availability for HTTP request responses."
15 |     sli:
16 |       events:
17 |         error_query: |
18 |           sum(
19 |             rate(http_request_duration_seconds_count{job="myapp", code=~"(5..|429)"}[{{.window}}])
20 |           )
21 |         total_query: |
22 |           sum(
23 |             rate(http_request_duration_seconds_count{job="myapp"}[{{.window}}])
24 |           )
25 |     alerting:
26 |       page_alert:
27 |         disable: true
28 |       ticket_alert:
29 |         disable: true
30 | 


--------------------------------------------------------------------------------
/examples/openslo-getting-started.yml:
--------------------------------------------------------------------------------
 1 | # This example shows the same example as getting-started.yml but using OpenSLO spec.
 2 | # It will generate the Prometheus rules in a Prometheus rules format.
 3 | #
 4 | # `sloth generate -i ./examples/openslo-getting-started.yml`
 5 | #
 6 | apiVersion: openslo/v1alpha
 7 | kind: SLO
 8 | metadata:
 9 |   name: sloth-slo-my-service
10 |   displayName: Requests Availability
11 | spec:
12 |   service: my-service
13 |   description: "Common SLO based on availability for HTTP request responses."
14 |   budgetingMethod: Occurrences
15 |   objectives:
16 |     - ratioMetrics:
17 |         good:
18 |           source: prometheus
19 |           queryType: promql
20 |           query: sum(rate(http_request_duration_seconds_count{job="myservice",code!~"(5..|429)"}[{{.window}}]))
21 |         total:
22 |           source: prometheus
23 |           queryType: promql
24 |           query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
25 |       target: 0.999
26 |   timeWindows:
27 |     - count: 30
28 |       unit: Day
29 | 


--------------------------------------------------------------------------------
/examples/openslo-kubernetes-apiserver.yml:
--------------------------------------------------------------------------------
 1 | # This example shows the same example as kubernetes-apiserver.yml but using OpenSLO spec.
 2 | # It will generate the Prometheus rules in a Prometheus rules format.
 3 | #
 4 | # Take into account that OpenSLO spec has the concept of single SLO with multiple objectives
 5 | #
 6 | # `sloth generate -i ./examples/openslo-kubernetes-apiserver.yml`
 7 | #
 8 | apiVersion: openslo/v1alpha
 9 | kind: SLO
10 | metadata:
11 |   name: requests-availability-openslo
12 |   displayName: Requests Availability
13 | spec:
14 |   service: k8s-apiserver
15 |   description: "Apiserver are returning correctly the requests to the clients (kubectl users, controllers...)."
16 |   budgetingMethod: Occurrences
17 |   objectives:
18 |     - ratioMetrics:
19 |         good:
20 |           source: prometheus
21 |           queryType: promql
22 |           query: sum(rate(apiserver_request_total{code!~"(5..|429)"}[{{.window}}]))
23 |         total:
24 |           source: prometheus
25 |           queryType: promql
26 |           query: sum(rate(apiserver_request_total[{{.window}}]))
27 |       target: 0.999
28 | 
29 |   timeWindows:
30 |     - count: 30
31 |       unit: Day
32 | 
33 | ---
34 | apiVersion: openslo/v1alpha
35 | kind: SLO
36 | metadata:
37 |   name: requests-latency-openslo
38 |   displayName: Requests Latency
39 | spec:
40 |   service: k8s-apiserver
41 |   description: "Apiserver responses are being fast enough and this will affect the clients (kubectl users, controllers...)."
42 |   budgetingMethod: Occurrences
43 |   objectives:
44 |     - ratioMetrics:
45 |         good:
46 |           source: prometheus
47 |           queryType: promql
48 |           query: sum(rate(apiserver_request_duration_seconds_bucket{le="0.4",verb!="WATCH"}[{{.window}}]))
49 |         total:
50 |           source: prometheus
51 |           queryType: promql
52 |           query: sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[{{.window}}]))
53 |       target: 0.99
54 | 
55 |     - ratioMetrics:
56 |         good:
57 |           source: prometheus
58 |           queryType: promql
59 |           query: sum(rate(apiserver_request_duration_seconds_bucket{le="5",verb!="WATCH"}[{{.window}}]))
60 |         total:
61 |           source: prometheus
62 |           queryType: promql
63 |           query: sum(rate(apiserver_request_duration_seconds_count{verb!="WATCH"}[{{.window}}]))
64 |       target: 0.999
65 | 
66 |   timeWindows:
67 |     - count: 30
68 |       unit: Day
69 | 


--------------------------------------------------------------------------------
/examples/plugin-getting-started.yml:
--------------------------------------------------------------------------------
 1 | version: "prometheus/v1"
 2 | service: "myservice"
 3 | labels:
 4 |   owner: "myteam"
 5 |   repo: "myorg/myservice"
 6 |   tier: "2"
 7 | slos:
 8 |   # We allow failing (5xx and 429) 1 request every 1000 requests (99.9%).
 9 |   - name: "requests-availability"
10 |     objective: 99.9
11 |     description: "Common SLO based on availability for HTTP request responses."
12 |     sli:
13 |       plugin:
14 |         id: "getting_started_availability"
15 |         options:
16 |           job: "myservice"
17 |           filter: 'f1="v1",f2="v2"'
18 |     alerting:
19 |       name: MyServiceHighErrorRate
20 |       labels:
21 |         category: "availability"
22 |       annotations:
23 |         # Overwrite default Sloth SLO alert summmary on ticket and page alerts.
24 |         summary: "High error rate on 'myservice' requests responses"
25 |       page_alert:
26 |         labels:
27 |           severity: pageteam
28 |           routing_key: myteam
29 |       ticket_alert:
30 |         labels:
31 |           severity: "slack"
32 |           slack_channel: "#alerts-myteam"
33 | 


--------------------------------------------------------------------------------
/examples/plugin-k8s-getting-started.yml:
--------------------------------------------------------------------------------
 1 | # This example shows the same example as home-wifi.yml but using Sloth Kubernetes CRD.
 2 | # It will generate the Prometheus rules in a Kubernetes prometheus-operator PrometheusRules CRD.
 3 | #
 4 | # `sloth generate -i ./examples/plugin-k8s-home-wifi.yml` -p ./examples
 5 | #
 6 | apiVersion: sloth.slok.dev/v1
 7 | kind: PrometheusServiceLevel
 8 | metadata:
 9 |   name: sloth-slo-home-wifi
10 |   namespace: monitoring
11 |   labels:
12 |     prometheus: prometheus
13 |     role: alert-rules
14 |     app: sloth
15 | spec:
16 |   service: "myservice"
17 |   labels:
18 |     owner: "myteam"
19 |     repo: "myorg/myservice"
20 |     tier: "2"
21 |   slos:
22 |     # We allow failing (5xx and 429) 1 request every 1000 requests (99.9%).
23 |     - name: "requests-availability"
24 |       objective: 99.9
25 |       description: "Common SLO based on availability for HTTP request responses."
26 |       sli:
27 |         plugin:
28 |           id: "getting_started_availability"
29 |           options:
30 |             job: "myservice"
31 |             filter: 'f1="v1",f2="v2"'
32 |       alerting:
33 |         name: MyServiceHighErrorRate
34 |         labels:
35 |           category: "availability"
36 |         annotations:
37 |           # Overwrite default Sloth SLO alert summmary on ticket and page alerts.
38 |           summary: "High error rate on 'myservice' requests responses"
39 |         page_alert:
40 |           labels:
41 |             severity: pageteam
42 |             routing_key: myteam
43 |         ticket_alert:
44 |           labels:
45 |             severity: "slack"
46 |             slack_channel: "#alerts-myteam"
47 | 


--------------------------------------------------------------------------------
/examples/plugins/getting-started/availability/plugin.go:
--------------------------------------------------------------------------------
 1 | package availability
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"context"
 6 | 	"fmt"
 7 | 	"regexp"
 8 | 	"strings"
 9 | 	"text/template"
10 | )
11 | 
12 | const (
13 | 	SLIPluginVersion = "prometheus/v1"
14 | 	SLIPluginID      = "getting_started_availability"
15 | )
16 | 
17 | var queryTpl = template.Must(template.New("").Parse(`
18 | sum(rate(http_request_duration_seconds_count{ {{.filter}}job="{{.job}}",code=~"(5..|429)" }[{{"{{.window}}"}}]))
19 | /
20 | sum(rate(http_request_duration_seconds_count{ {{.filter}}job="{{.job}}" }[{{"{{.window}}"}}]))`))
21 | 
22 | var filterRegex = regexp.MustCompile(`([^=]+="[^=,"]+",)+`)
23 | 
24 | // SLIPlugin is the getting started plugin example.
25 | //
26 | // It will return an Sloth error ratio raw query that returns the error ratio of HTTP requests based
27 | // on the HTTP response status code, taking 5xx and 429 as error events.
28 | func SLIPlugin(ctx context.Context, meta, labels, options map[string]string) (string, error) {
29 | 	// Get job.
30 | 	job, ok := options["job"]
31 | 	if !ok {
32 | 		return "", fmt.Errorf("job options is required")
33 | 	}
34 | 
35 | 	// Validate labels.
36 | 	err := validateLabels(labels, "owner", "tier")
37 | 	if err != nil {
38 | 		return "", fmt.Errorf("invalid labels: %w", err)
39 | 	}
40 | 
41 | 	// Sanitize filter.
42 | 	filter := options["filter"]
43 | 	if filter != "" {
44 | 		filter = strings.Trim(filter, "{}")
45 | 		filter = strings.Trim(filter, ",")
46 | 		filter = filter + ","
47 | 		match := filterRegex.MatchString(filter)
48 | 		if !match {
49 | 			return "", fmt.Errorf("invalid prometheus filter: %s", filter)
50 | 		}
51 | 	}
52 | 
53 | 	// Create query.
54 | 	var b bytes.Buffer
55 | 	data := map[string]string{
56 | 		"job":    job,
57 | 		"filter": filter,
58 | 	}
59 | 	err = queryTpl.Execute(&b, data)
60 | 	if err != nil {
61 | 		return "", fmt.Errorf("could not execute template: %w", err)
62 | 	}
63 | 
64 | 	return b.String(), nil
65 | }
66 | 
67 | // validateLabels will check the labels exist.
68 | func validateLabels(labels map[string]string, requiredKeys ...string) error {
69 | 	for _, k := range requiredKeys {
70 | 		v, ok := labels[k]
71 | 		if !ok || (ok && v == "") {
72 | 			return fmt.Errorf("%q label is required", k)
73 | 		}
74 | 	}
75 | 
76 | 	return nil
77 | }
78 | 


--------------------------------------------------------------------------------
/examples/raw-home-wifi.yml:
--------------------------------------------------------------------------------
 1 | # This example shows another less accurate or simpler way of creating the home wifi SLO.
 2 | #
 3 | # The metrics already give us a metric in ratio for each wifi connection satisfaction, instead of getting
 4 | # good and bad events as connection with a minimum satisfaction ratio, we will calculate the averate of all
 5 | # ratio satisfaction connections over the time window.
 6 | # So we can't use the `events` SLI because we are not going to divide bad and total events.
 7 | #
 8 | # - `wifi-client-satisfaction`
 9 | #   - This SLO warn us that we have an average wifi connection satisfaction.
10 | #   - SLI error: Calculated internally by ubiquitis metrics, we use directly the ratio.
11 | #   - SLO objective (95%): We allow the average wifi connection satisfaction is >=95%
12 | #
13 | # `sloth generate -i ./examples/raw-home-wifi.yml`
14 | #
15 | version: "prometheus/v1"
16 | service: "home-wifi"
17 | labels:
18 |   cluster: "valhalla"
19 |   component: "ubiquiti"
20 |   context: "home"
21 | slos:
22 |   - name: "wifi-client-satisfaction"
23 |     objective: 95
24 |     description: "Warn us that we have an average wifi connection satisfaction."
25 |     sli:
26 |       raw:
27 |         # Get the averate satisfaction ratio and rest 1 (max good) to get the error ratio.
28 |         error_ratio_query: |
29 |           1 - (
30 |             sum(sum_over_time(unifipoller_client_satisfaction_ratio[{{.window}}]))
31 |             /
32 |             sum(count_over_time(unifipoller_client_satisfaction_ratio[{{.window}}]))
33 |           )
34 |     alerting:
35 |       name: WifiClientSatisfaction
36 |       page_alert:
37 |         labels:
38 |           severity: home
39 |       ticket_alert:
40 |         labels:
41 |           severity: warning
42 | 


--------------------------------------------------------------------------------
/examples/windows/7d.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: sloth.slok.dev/v1
 2 | kind: AlertWindows
 3 | spec:
 4 |   sloPeriod: 7d
 5 |   page:
 6 |     quick:
 7 |       errorBudgetPercent: 8
 8 |       shortWindow: 5m
 9 |       longWindow: 1h
10 |     slow:
11 |       errorBudgetPercent: 12.5
12 |       shortWindow: 30m
13 |       longWindow: 6h
14 |   ticket:
15 |     quick:
16 |       errorBudgetPercent: 20
17 |       shortWindow: 2h
18 |       longWindow: 1d
19 |     slow:
20 |       errorBudgetPercent: 42
21 |       shortWindow: 6h
22 |       longWindow: 3d
23 | 


--------------------------------------------------------------------------------
/examples/windows/custom-30d.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: sloth.slok.dev/v1
 2 | kind: AlertWindows
 3 | spec:
 4 |   sloPeriod: 30d
 5 |   page:
 6 |     quick:
 7 |       errorBudgetPercent: 1
 8 |       shortWindow: 2m
 9 |       longWindow: 30m
10 |     slow:
11 |       errorBudgetPercent: 2
12 |       shortWindow: 15m
13 |       longWindow: 3h
14 |   ticket:
15 |     quick:
16 |       errorBudgetPercent: 5
17 |       shortWindow: 1h
18 |       longWindow: 12h
19 |     slow:
20 |       errorBudgetPercent: 5
21 |       shortWindow: 3h
22 |       longWindow: 36h
23 | 


--------------------------------------------------------------------------------
/internal/alert/alert.go:
--------------------------------------------------------------------------------
  1 | package alert
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"time"
  7 | )
  8 | 
  9 | // Severity is the type of alert.
 10 | type Severity int
 11 | 
 12 | const (
 13 | 	UnknownAlertSeverity Severity = iota
 14 | 	PageAlertSeverity
 15 | 	TicketAlertSeverity
 16 | )
 17 | 
 18 | func (s Severity) String() string {
 19 | 	switch s {
 20 | 	case PageAlertSeverity:
 21 | 		return "page"
 22 | 	case TicketAlertSeverity:
 23 | 		return "ticket"
 24 | 	default:
 25 | 		return "unknown"
 26 | 	}
 27 | }
 28 | 
 29 | // MWMBAlert represents a multiwindow, multi-burn rate alert.
 30 | type MWMBAlert struct {
 31 | 	ID             string
 32 | 	ShortWindow    time.Duration
 33 | 	LongWindow     time.Duration
 34 | 	BurnRateFactor float64
 35 | 	ErrorBudget    float64
 36 | 	Severity       Severity
 37 | }
 38 | 
 39 | // MWMBAlertGroup what represents all the alerts of an SLO.
 40 | // ITs divided into two groups that are made of 2 alerts:
 41 | // - Page & quick: Critical alerts that trigger in high rate burn in short term.
 42 | // - Page & slow: Critical alerts that trigger in high-normal rate burn in medium term.
 43 | // - Ticket & slow: Warning alerts that trigger in normal rate burn in medium term.
 44 | // - Ticket & slow: Warning alerts that trigger in slow rate burn in long term.
 45 | type MWMBAlertGroup struct {
 46 | 	PageQuick   MWMBAlert
 47 | 	PageSlow    MWMBAlert
 48 | 	TicketQuick MWMBAlert
 49 | 	TicketSlow  MWMBAlert
 50 | }
 51 | 
 52 | // WindowsRepo knows how to retrieve windows based on the period of time.
 53 | type WindowsRepo interface {
 54 | 	GetWindows(ctx context.Context, period time.Duration) (*Windows, error)
 55 | }
 56 | 
 57 | // Generator knows how to generate all the required alerts based on an SLO.
 58 | // The generated alerts are generic and don't depend on any specific SLO implementation.
 59 | type Generator struct {
 60 | 	windowsRepo WindowsRepo
 61 | }
 62 | 
 63 | func NewGenerator(windowsRepo WindowsRepo) Generator {
 64 | 	return Generator{
 65 | 		windowsRepo: windowsRepo,
 66 | 	}
 67 | }
 68 | 
 69 | type SLO struct {
 70 | 	ID         string
 71 | 	TimeWindow time.Duration
 72 | 	Objective  float64
 73 | }
 74 | 
 75 | func (g Generator) GenerateMWMBAlerts(ctx context.Context, slo SLO) (*MWMBAlertGroup, error) {
 76 | 	windows, err := g.windowsRepo.GetWindows(ctx, slo.TimeWindow)
 77 | 	if err != nil {
 78 | 		return nil, fmt.Errorf("the %s SLO period time window is not supported", slo.TimeWindow)
 79 | 	}
 80 | 
 81 | 	errorBudget := 100 - slo.Objective
 82 | 
 83 | 	group := MWMBAlertGroup{
 84 | 		PageQuick: MWMBAlert{
 85 | 			ID:             fmt.Sprintf("%s-page-quick", slo.ID),
 86 | 			ShortWindow:    windows.PageQuick.ShortWindow,
 87 | 			LongWindow:     windows.PageQuick.LongWindow,
 88 | 			BurnRateFactor: windows.GetSpeedPageQuick(),
 89 | 			ErrorBudget:    errorBudget,
 90 | 			Severity:       PageAlertSeverity,
 91 | 		},
 92 | 		PageSlow: MWMBAlert{
 93 | 			ID:             fmt.Sprintf("%s-page-slow", slo.ID),
 94 | 			ShortWindow:    windows.PageSlow.ShortWindow,
 95 | 			LongWindow:     windows.PageSlow.LongWindow,
 96 | 			BurnRateFactor: windows.GetSpeedPageSlow(),
 97 | 			ErrorBudget:    errorBudget,
 98 | 			Severity:       PageAlertSeverity,
 99 | 		},
100 | 		TicketQuick: MWMBAlert{
101 | 			ID:             fmt.Sprintf("%s-ticket-quick", slo.ID),
102 | 			ShortWindow:    windows.TicketQuick.ShortWindow,
103 | 			LongWindow:     windows.TicketQuick.LongWindow,
104 | 			BurnRateFactor: windows.GetSpeedTicketQuick(),
105 | 			ErrorBudget:    errorBudget,
106 | 			Severity:       TicketAlertSeverity,
107 | 		},
108 | 		TicketSlow: MWMBAlert{
109 | 			ID:             fmt.Sprintf("%s-ticket-slow", slo.ID),
110 | 			ShortWindow:    windows.TicketSlow.ShortWindow,
111 | 			LongWindow:     windows.TicketSlow.LongWindow,
112 | 			BurnRateFactor: windows.GetSpeedTicketSlow(),
113 | 			ErrorBudget:    errorBudget,
114 | 			Severity:       TicketAlertSeverity,
115 | 		},
116 | 	}
117 | 
118 | 	return &group, nil
119 | }
120 | 


--------------------------------------------------------------------------------
/internal/alert/windows/google-28d.yaml:
--------------------------------------------------------------------------------
 1 | # Common and safe 4 weeks windows.
 2 | #
 3 | # Numbers obtained from https://sre.google/workbook/alerting-on-slos/#recommended_parameters_for_an_slo_based_a.
 4 | apiVersion: "sloth.slok.dev/v1"
 5 | kind: "AlertWindows"
 6 | spec:
 7 |   sloPeriod: 28d
 8 |   page:
 9 |     quick:
10 |       errorBudgetPercent: 2
11 |       shortWindow: 5m
12 |       longWindow: 1h
13 |     slow:
14 |       errorBudgetPercent: 5
15 |       shortWindow: 30m
16 |       longWindow: 6h
17 |   ticket:
18 |     quick:
19 |       errorBudgetPercent: 10
20 |       shortWindow: 2h
21 |       longWindow: 1d
22 |     slow:
23 |       errorBudgetPercent: 10
24 |       shortWindow: 6h
25 |       longWindow: 3d


--------------------------------------------------------------------------------
/internal/alert/windows/google-30d.yaml:
--------------------------------------------------------------------------------
 1 | # Common and safe month windows.
 2 | #
 3 | # Numbers obtained from https://sre.google/workbook/alerting-on-slos/#recommended_parameters_for_an_slo_based_a.
 4 | apiVersion: "sloth.slok.dev/v1"
 5 | kind: "AlertWindows"
 6 | spec:
 7 |   sloPeriod: 30d
 8 |   page:
 9 |     quick:
10 |       errorBudgetPercent: 2
11 |       shortWindow: 5m
12 |       longWindow: 1h
13 |     slow:
14 |       errorBudgetPercent: 5
15 |       shortWindow: 30m
16 |       longWindow: 6h
17 |   ticket:
18 |     quick:
19 |       errorBudgetPercent: 10
20 |       shortWindow: 2h
21 |       longWindow: 1d
22 |     slow:
23 |       errorBudgetPercent: 10
24 |       shortWindow: 6h
25 |       longWindow: 3d
26 | 


--------------------------------------------------------------------------------
/internal/app/generate/noop.go:
--------------------------------------------------------------------------------
 1 | package generate
 2 | 
 3 | import (
 4 | 	"context"
 5 | 
 6 | 	"github.com/prometheus/prometheus/model/rulefmt"
 7 | 	"github.com/slok/sloth/internal/alert"
 8 | 	"github.com/slok/sloth/internal/info"
 9 | 	"github.com/slok/sloth/internal/prometheus"
10 | )
11 | 
12 | type noopSLIRecordingRulesGenerator bool
13 | 
14 | const NoopSLIRecordingRulesGenerator = noopSLIRecordingRulesGenerator(false)
15 | 
16 | func (noopSLIRecordingRulesGenerator) GenerateSLIRecordingRules(ctx context.Context, slo prometheus.SLO, alerts alert.MWMBAlertGroup) ([]rulefmt.Rule, error) {
17 | 	return nil, nil
18 | }
19 | 
20 | type noopMetadataRecordingRulesGenerator bool
21 | 
22 | const NoopMetadataRecordingRulesGenerator = noopMetadataRecordingRulesGenerator(false)
23 | 
24 | func (noopMetadataRecordingRulesGenerator) GenerateMetadataRecordingRules(ctx context.Context, info info.Info, slo prometheus.SLO, alerts alert.MWMBAlertGroup) ([]rulefmt.Rule, error) {
25 | 	return nil, nil
26 | }
27 | 
28 | type noopSLOAlertRulesGenerator bool
29 | 
30 | const NoopSLOAlertRulesGenerator = noopSLOAlertRulesGenerator(false)
31 | 
32 | func (noopSLOAlertRulesGenerator) GenerateSLOAlertRules(ctx context.Context, slo prometheus.SLO, alerts alert.MWMBAlertGroup) ([]rulefmt.Rule, error) {
33 | 	return nil, nil
34 | }
35 | 


--------------------------------------------------------------------------------
/internal/app/kubecontroller/retriever.go:
--------------------------------------------------------------------------------
 1 | package kubecontroller
 2 | 
 3 | import (
 4 | 	"context"
 5 | 
 6 | 	"github.com/spotahome/kooper/v2/controller"
 7 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 8 | 	"k8s.io/apimachinery/pkg/labels"
 9 | 	"k8s.io/apimachinery/pkg/runtime"
10 | 	"k8s.io/apimachinery/pkg/watch"
11 | 	"k8s.io/client-go/tools/cache"
12 | 
13 | 	slothv1 "github.com/slok/sloth/pkg/kubernetes/api/sloth/v1"
14 | )
15 | 
16 | // RetrieverKubernetesRepository is the service to manage k8s resources by the Kubernetes controller retrievers.
17 | type RetrieverKubernetesRepository interface {
18 | 	ListPrometheusServiceLevels(ctx context.Context, ns string, opts metav1.ListOptions) (*slothv1.PrometheusServiceLevelList, error)
19 | 	WatchPrometheusServiceLevels(ctx context.Context, ns string, opts metav1.ListOptions) (watch.Interface, error)
20 | }
21 | 
22 | // NewPrometheusServiceLevelsRetriver returns the retriever for Prometheus service levels events.
23 | func NewPrometheusServiceLevelsRetriver(ns string, labelSelector labels.Selector, repo RetrieverKubernetesRepository) controller.Retriever {
24 | 	return controller.MustRetrieverFromListerWatcher(&cache.ListWatch{
25 | 		ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
26 | 			options.LabelSelector = labelSelector.String()
27 | 			return repo.ListPrometheusServiceLevels(context.Background(), ns, options)
28 | 		},
29 | 		WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
30 | 			options.LabelSelector = labelSelector.String()
31 | 			return repo.WatchPrometheusServiceLevels(context.Background(), ns, options)
32 | 		},
33 | 	})
34 | }
35 | 


--------------------------------------------------------------------------------
/internal/info/info.go:
--------------------------------------------------------------------------------
 1 | package info
 2 | 
 3 | var (
 4 | 	// Version is the version app.
 5 | 	Version = "dev"
 6 | )
 7 | 
 8 | type Mode string
 9 | 
10 | const (
11 | 	ModeTest                    = "test"
12 | 	ModeCLIGenPrometheus        = "cli-gen-prom"
13 | 	ModeCLIGenKubernetes        = "cli-gen-k8s"
14 | 	ModeCLIGenOpenSLO           = "cli-gen-openslo"
15 | 	ModeControllerGenKubernetes = "ctrl-gen-k8s"
16 | )
17 | 
18 | // Info is the information of the app and request based for SLO generators.
19 | type Info struct {
20 | 	Version string
21 | 	Mode    Mode
22 | 	Spec    string
23 | }
24 | 


--------------------------------------------------------------------------------
/internal/k8sprometheus/helpers.go:
--------------------------------------------------------------------------------
 1 | package k8sprometheus
 2 | 
 3 | func mergeLabels(ms ...map[string]string) map[string]string {
 4 | 	res := map[string]string{}
 5 | 	for _, m := range ms {
 6 | 		for k, v := range m {
 7 | 			res[k] = v
 8 | 		}
 9 | 	}
10 | 
11 | 	return res
12 | }
13 | 


--------------------------------------------------------------------------------
/internal/k8sprometheus/k8sprometheusmock/prometheus_rules_ensurer.go:
--------------------------------------------------------------------------------
 1 | // Code generated by mockery v2.46.3. DO NOT EDIT.
 2 | 
 3 | package k8sprometheusmock
 4 | 
 5 | import (
 6 | 	context "context"
 7 | 
 8 | 	mock "github.com/stretchr/testify/mock"
 9 | 
10 | 	v1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
11 | )
12 | 
13 | // PrometheusRulesEnsurer is an autogenerated mock type for the PrometheusRulesEnsurer type
14 | type PrometheusRulesEnsurer struct {
15 | 	mock.Mock
16 | }
17 | 
18 | // EnsurePrometheusRule provides a mock function with given fields: ctx, pr
19 | func (_m *PrometheusRulesEnsurer) EnsurePrometheusRule(ctx context.Context, pr *v1.PrometheusRule) error {
20 | 	ret := _m.Called(ctx, pr)
21 | 
22 | 	if len(ret) == 0 {
23 | 		panic("no return value specified for EnsurePrometheusRule")
24 | 	}
25 | 
26 | 	var r0 error
27 | 	if rf, ok := ret.Get(0).(func(context.Context, *v1.PrometheusRule) error); ok {
28 | 		r0 = rf(ctx, pr)
29 | 	} else {
30 | 		r0 = ret.Error(0)
31 | 	}
32 | 
33 | 	return r0
34 | }
35 | 
36 | // NewPrometheusRulesEnsurer creates a new instance of PrometheusRulesEnsurer. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
37 | // The first argument is typically a *testing.T value.
38 | func NewPrometheusRulesEnsurer(t interface {
39 | 	mock.TestingT
40 | 	Cleanup(func())
41 | }) *PrometheusRulesEnsurer {
42 | 	mock := &PrometheusRulesEnsurer{}
43 | 	mock.Mock.Test(t)
44 | 
45 | 	t.Cleanup(func() { mock.AssertExpectations(t) })
46 | 
47 | 	return mock
48 | }
49 | 


--------------------------------------------------------------------------------
/internal/k8sprometheus/model.go:
--------------------------------------------------------------------------------
 1 | package k8sprometheus
 2 | 
 3 | import (
 4 | 	"github.com/go-playground/validator/v10"
 5 | 
 6 | 	"github.com/slok/sloth/internal/prometheus"
 7 | )
 8 | 
 9 | // K8sMeta is the Kubernetes metadata simplified.
10 | type K8sMeta struct {
11 | 	Kind        string `validate:"required"`
12 | 	APIVersion  string `validate:"required"`
13 | 	Name        string `validate:"required"`
14 | 	UID         string
15 | 	Namespace   string
16 | 	Annotations map[string]string
17 | 	Labels      map[string]string
18 | }
19 | 
20 | // SLOGroup is a Kubernetes SLO group. Is created based on a regular Prometheus
21 | // SLO model and Kubernetes data.
22 | type SLOGroup struct {
23 | 	K8sMeta K8sMeta
24 | 	prometheus.SLOGroup
25 | }
26 | 
27 | // Validate validates the SLO.
28 | func (s SLOGroup) Validate() error {
29 | 	err := modelSpecValidate.Struct(s.K8sMeta)
30 | 	if err != nil {
31 | 		return err
32 | 	}
33 | 
34 | 	err = s.SLOGroup.Validate()
35 | 	if err != nil {
36 | 		return err
37 | 	}
38 | 
39 | 	return nil
40 | }
41 | 
42 | var modelSpecValidate = func() *validator.Validate {
43 | 	return validator.New()
44 | }()
45 | 


--------------------------------------------------------------------------------
/internal/k8sprometheus/model_test.go:
--------------------------------------------------------------------------------
  1 | package k8sprometheus_test
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 	"time"
  7 | 
  8 | 	"github.com/stretchr/testify/assert"
  9 | 
 10 | 	"github.com/slok/sloth/internal/k8sprometheus"
 11 | 	"github.com/slok/sloth/internal/prometheus"
 12 | )
 13 | 
 14 | func getGoodSLOGroup() k8sprometheus.SLOGroup {
 15 | 	return k8sprometheus.SLOGroup{
 16 | 		K8sMeta: k8sprometheus.K8sMeta{
 17 | 			Kind:       "PrometheusServiceLevel",
 18 | 			APIVersion: "sloth.slok.dev/v1",
 19 | 			Name:       "test",
 20 | 			Namespace:  "test-ns",
 21 | 		},
 22 | 		SLOGroup: prometheus.SLOGroup{SLOs: []prometheus.SLO{
 23 | 			getGoodSLO("slo1"),
 24 | 			getGoodSLO("slo2"),
 25 | 		},
 26 | 		},
 27 | 	}
 28 | }
 29 | 
 30 | func getGoodSLO(name string) prometheus.SLO {
 31 | 	return prometheus.SLO{
 32 | 		ID:         fmt.Sprintf("%s-id", name),
 33 | 		Name:       name,
 34 | 		Service:    "test-svc",
 35 | 		TimeWindow: 30 * 24 * time.Hour,
 36 | 		SLI: prometheus.SLI{
 37 | 			Events: &prometheus.SLIEvents{
 38 | 				ErrorQuery: `sum(rate(grpc_server_handled_requests_count{job="myapp",code=~"Internal|Unavailable"}[{{ .window }}]))`,
 39 | 				TotalQuery: `sum(rate(grpc_server_handled_requests_count{job="myapp"}[{{ .window }}]))`,
 40 | 			},
 41 | 		},
 42 | 		Objective: 99.99,
 43 | 		Labels: map[string]string{
 44 | 			"owner":    "myteam",
 45 | 			"category": "test",
 46 | 		},
 47 | 		InfoLabels: map[string]string{
 48 | 			"foo": "bar",
 49 | 		},
 50 | 		PageAlertMeta: prometheus.AlertMeta{
 51 | 			Disable: false,
 52 | 			Name:    "testAlert",
 53 | 			Labels: map[string]string{
 54 | 				"tier":     "1",
 55 | 				"severity": "slack",
 56 | 				"channel":  "#a-myteam",
 57 | 			},
 58 | 			Annotations: map[string]string{
 59 | 				"message": "This is very important.",
 60 | 				"runbook": "http://whatever.com",
 61 | 			},
 62 | 		},
 63 | 		TicketAlertMeta: prometheus.AlertMeta{
 64 | 			Disable: false,
 65 | 			Name:    "testAlert",
 66 | 			Labels: map[string]string{
 67 | 				"tier":     "1",
 68 | 				"severity": "slack",
 69 | 				"channel":  "#a-not-so-important",
 70 | 			},
 71 | 			Annotations: map[string]string{
 72 | 				"message": "This is not very important.",
 73 | 				"runbook": "http://whatever.com",
 74 | 			},
 75 | 		},
 76 | 	}
 77 | }
 78 | 
 79 | func TestModelValidationSpec(t *testing.T) {
 80 | 	tests := map[string]struct {
 81 | 		slos          func() k8sprometheus.SLOGroup
 82 | 		expErrMessage string
 83 | 	}{
 84 | 		"Correct SLOs should not fail.": {
 85 | 			slos: getGoodSLOGroup,
 86 | 		},
 87 | 
 88 | 		"Kind is required.": {
 89 | 			slos: func() k8sprometheus.SLOGroup {
 90 | 				sg := getGoodSLOGroup()
 91 | 				sg.K8sMeta.Kind = ""
 92 | 				return sg
 93 | 			},
 94 | 			expErrMessage: "Key: 'K8sMeta.Kind' Error:Field validation for 'Kind' failed on the 'required' tag",
 95 | 		},
 96 | 
 97 | 		"APIVersion is required.": {
 98 | 			slos: func() k8sprometheus.SLOGroup {
 99 | 				sg := getGoodSLOGroup()
100 | 				sg.K8sMeta.APIVersion = ""
101 | 				return sg
102 | 			},
103 | 			expErrMessage: "Key: 'K8sMeta.APIVersion' Error:Field validation for 'APIVersion' failed on the 'required' tag",
104 | 		},
105 | 
106 | 		"Name is required.": {
107 | 			slos: func() k8sprometheus.SLOGroup {
108 | 				sg := getGoodSLOGroup()
109 | 				sg.K8sMeta.Name = ""
110 | 				return sg
111 | 			},
112 | 			expErrMessage: "Key: 'K8sMeta.Name' Error:Field validation for 'Name' failed on the 'required' tag",
113 | 		},
114 | 
115 | 		"SLO validation is execute correctly and fails if SLOs fail.": {
116 | 			slos: func() k8sprometheus.SLOGroup {
117 | 				sg := getGoodSLOGroup()
118 | 				sg.SLOs[0].ID = ""
119 | 				return sg
120 | 			},
121 | 			expErrMessage: "Key: 'SLOGroup.SLOs[0].ID' Error:Field validation for 'ID' failed on the 'required' tag",
122 | 		},
123 | 	}
124 | 
125 | 	for name, test := range tests {
126 | 		t.Run(name, func(t *testing.T) {
127 | 			assert := assert.New(t)
128 | 
129 | 			slos := test.slos()
130 | 			err := slos.Validate()
131 | 
132 | 			if test.expErrMessage != "" {
133 | 				assert.Error(err)
134 | 				assert.Equal(test.expErrMessage, err.Error())
135 | 			} else {
136 | 				assert.NoError(err)
137 | 			}
138 | 		})
139 | 	}
140 | }
141 | 


--------------------------------------------------------------------------------
/internal/log/log.go:
--------------------------------------------------------------------------------
 1 | package log
 2 | 
 3 | import "context"
 4 | 
 5 | // Kv is a helper type for structured logging fields usage.
 6 | type Kv = map[string]interface{}
 7 | 
 8 | // Logger is the interface that the loggers used by the library will use.
 9 | type Logger interface {
10 | 	Infof(format string, args ...interface{})
11 | 	Warningf(format string, args ...interface{})
12 | 	Errorf(format string, args ...interface{})
13 | 	Debugf(format string, args ...interface{})
14 | 	WithValues(values map[string]interface{}) Logger
15 | 	WithCtxValues(ctx context.Context) Logger
16 | 	SetValuesOnCtx(parent context.Context, values map[string]interface{}) context.Context
17 | }
18 | 
19 | // Noop logger doesn't log anything.
20 | const Noop = noop(0)
21 | 
22 | type noop int
23 | 
24 | func (n noop) Infof(format string, args ...interface{})                         {}
25 | func (n noop) Warningf(format string, args ...interface{})                      {}
26 | func (n noop) Errorf(format string, args ...interface{})                        {}
27 | func (n noop) Debugf(format string, args ...interface{})                        {}
28 | func (n noop) WithValues(map[string]interface{}) Logger                         { return n }
29 | func (n noop) WithCtxValues(context.Context) Logger                             { return n }
30 | func (n noop) SetValuesOnCtx(parent context.Context, values Kv) context.Context { return parent }
31 | 
32 | type contextKey string
33 | 
34 | // contextLogValuesKey used as unique key to store log values in the context.
35 | const contextLogValuesKey = contextKey("internal-log")
36 | 
37 | // CtxWithValues returns a copy of parent in which the key values passed have been
38 | // stored ready to be used using log.Logger.
39 | func CtxWithValues(parent context.Context, kv Kv) context.Context {
40 | 	// Maybe we have values already set.
41 | 	oldValues, ok := parent.Value(contextLogValuesKey).(Kv)
42 | 	if !ok {
43 | 		oldValues = Kv{}
44 | 	}
45 | 
46 | 	// Copy old and received values into the new kv.
47 | 	newValues := Kv{}
48 | 	for k, v := range oldValues {
49 | 		newValues[k] = v
50 | 	}
51 | 	for k, v := range kv {
52 | 		newValues[k] = v
53 | 	}
54 | 
55 | 	return context.WithValue(parent, contextLogValuesKey, newValues)
56 | }
57 | 
58 | // ValuesFromCtx gets the log Key values from a context.
59 | func ValuesFromCtx(ctx context.Context) Kv {
60 | 	values, ok := ctx.Value(contextLogValuesKey).(Kv)
61 | 	if !ok {
62 | 		return Kv{}
63 | 	}
64 | 
65 | 	return values
66 | }
67 | 


--------------------------------------------------------------------------------
/internal/log/logrus/logrus.go:
--------------------------------------------------------------------------------
 1 | package logrus
 2 | 
 3 | import (
 4 | 	"context"
 5 | 
 6 | 	"github.com/sirupsen/logrus"
 7 | 
 8 | 	"github.com/slok/sloth/internal/log"
 9 | )
10 | 
11 | type logger struct {
12 | 	*logrus.Entry
13 | }
14 | 
15 | // NewLogrus returns a new log.Logger for a logrus implementation.
16 | func NewLogrus(l *logrus.Entry) log.Logger {
17 | 	return logger{Entry: l}
18 | }
19 | 
20 | func (l logger) WithValues(kv log.Kv) log.Logger {
21 | 	newLogger := l.Entry.WithFields(kv)
22 | 	return NewLogrus(newLogger)
23 | }
24 | 
25 | func (l logger) WithCtxValues(ctx context.Context) log.Logger {
26 | 	return l.WithValues(log.ValuesFromCtx(ctx))
27 | }
28 | 
29 | func (l logger) SetValuesOnCtx(parent context.Context, values log.Kv) context.Context {
30 | 	return log.CtxWithValues(parent, values)
31 | }
32 | 


--------------------------------------------------------------------------------
/internal/prometheus/alert_rules.go:
--------------------------------------------------------------------------------
  1 | package prometheus
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"fmt"
  7 | 	"text/template"
  8 | 
  9 | 	"github.com/prometheus/prometheus/model/rulefmt"
 10 | 
 11 | 	"github.com/slok/sloth/internal/alert"
 12 | )
 13 | 
 14 | // genFunc knows how to generate an SLI recording rule for a specific time window.
 15 | type alertGenFunc func(slo SLO, sloAlert AlertMeta, quick, slow alert.MWMBAlert) (*rulefmt.Rule, error)
 16 | 
 17 | type sloAlertRulesGenerator struct {
 18 | 	alertGenFunc alertGenFunc
 19 | }
 20 | 
 21 | // SLOAlertRulesGenerator knows how to generate the SLO prometheus alert rules
 22 | // from an SLO.
 23 | var SLOAlertRulesGenerator = sloAlertRulesGenerator{alertGenFunc: defaultSLOAlertGenerator}
 24 | 
 25 | func (s sloAlertRulesGenerator) GenerateSLOAlertRules(ctx context.Context, slo SLO, alerts alert.MWMBAlertGroup) ([]rulefmt.Rule, error) {
 26 | 	rules := []rulefmt.Rule{}
 27 | 
 28 | 	// Generate Page alerts.
 29 | 	if !slo.PageAlertMeta.Disable {
 30 | 		rule, err := s.alertGenFunc(slo, slo.PageAlertMeta, alerts.PageQuick, alerts.PageSlow)
 31 | 		if err != nil {
 32 | 			return nil, fmt.Errorf("could not create page alert: %w", err)
 33 | 		}
 34 | 
 35 | 		rules = append(rules, *rule)
 36 | 	}
 37 | 
 38 | 	// Generate Ticket alerts.
 39 | 	if !slo.TicketAlertMeta.Disable {
 40 | 		rule, err := s.alertGenFunc(slo, slo.TicketAlertMeta, alerts.TicketQuick, alerts.TicketSlow)
 41 | 		if err != nil {
 42 | 			return nil, fmt.Errorf("could not create ticket alert: %w", err)
 43 | 		}
 44 | 
 45 | 		rules = append(rules, *rule)
 46 | 	}
 47 | 
 48 | 	return rules, nil
 49 | }
 50 | 
 51 | func defaultSLOAlertGenerator(slo SLO, sloAlert AlertMeta, quick, slow alert.MWMBAlert) (*rulefmt.Rule, error) {
 52 | 	// Generate the filter labels based on the SLO ids.
 53 | 	metricFilter := labelsToPromFilter(slo.GetSLOIDPromLabels())
 54 | 
 55 | 	// Render the alert template.
 56 | 	tplData := struct {
 57 | 		MetricFilter         string
 58 | 		ErrorBudgetRatio     float64
 59 | 		QuickShortMetric     string
 60 | 		QuickShortBurnFactor float64
 61 | 		QuickLongMetric      string
 62 | 		QuickLongBurnFactor  float64
 63 | 		SlowShortMetric      string
 64 | 		SlowShortBurnFactor  float64
 65 | 		SlowQuickMetric      string
 66 | 		SlowQuickBurnFactor  float64
 67 | 		WindowLabel          string
 68 | 	}{
 69 | 		MetricFilter:         metricFilter,
 70 | 		ErrorBudgetRatio:     quick.ErrorBudget / 100, // Any(quick or slow) should work because are the same.
 71 | 		QuickShortMetric:     slo.GetSLIErrorMetric(quick.ShortWindow),
 72 | 		QuickShortBurnFactor: quick.BurnRateFactor,
 73 | 		QuickLongMetric:      slo.GetSLIErrorMetric(quick.LongWindow),
 74 | 		QuickLongBurnFactor:  quick.BurnRateFactor,
 75 | 		SlowShortMetric:      slo.GetSLIErrorMetric(slow.ShortWindow),
 76 | 		SlowShortBurnFactor:  slow.BurnRateFactor,
 77 | 		SlowQuickMetric:      slo.GetSLIErrorMetric(slow.LongWindow),
 78 | 		SlowQuickBurnFactor:  slow.BurnRateFactor,
 79 | 		WindowLabel:          sloWindowLabelName,
 80 | 	}
 81 | 	var expr bytes.Buffer
 82 | 	err := mwmbAlertTpl.Execute(&expr, tplData)
 83 | 	if err != nil {
 84 | 		return nil, fmt.Errorf("could not render alert expression: %w", err)
 85 | 	}
 86 | 
 87 | 	// Add specific annotations.
 88 | 	severity := quick.Severity.String() // Any(quick or slow) should work because are the same.
 89 | 	extraAnnotations := map[string]string{
 90 | 		"title":   fmt.Sprintf("(%s) {{$labels.%s}} {{$labels.%s}} SLO error budget burn rate is too fast.", severity, sloServiceLabelName, sloNameLabelName),
 91 | 		"summary": fmt.Sprintf("{{$labels.%s}} {{$labels.%s}} SLO error budget burn rate is over expected.", sloServiceLabelName, sloNameLabelName),
 92 | 	}
 93 | 
 94 | 	// Add specific labels. We don't add the labels from the rules because we will
 95 | 	// inherit on the alerts, this way we avoid warnings of overrided labels.
 96 | 	extraLabels := map[string]string{
 97 | 		sloSeverityLabelName: severity,
 98 | 	}
 99 | 
100 | 	return &rulefmt.Rule{
101 | 		Alert:       sloAlert.Name,
102 | 		Expr:        expr.String(),
103 | 		Annotations: mergeLabels(extraAnnotations, sloAlert.Annotations),
104 | 		Labels:      mergeLabels(extraLabels, sloAlert.Labels),
105 | 	}, nil
106 | }
107 | 
108 | // Multiburn multiwindow alert template.
109 | var mwmbAlertTpl = template.Must(template.New("mwmbAlertTpl").Option("missingkey=error").Parse(`(
110 |     max({{ .QuickShortMetric }}{{ .MetricFilter}} > ({{ .QuickShortBurnFactor }} * {{ .ErrorBudgetRatio }})) without ({{ .WindowLabel }})
111 |     and
112 |     max({{ .QuickLongMetric }}{{ .MetricFilter}} > ({{ .QuickLongBurnFactor }} * {{ .ErrorBudgetRatio }})) without ({{ .WindowLabel }})
113 | )
114 | or
115 | (
116 |     max({{ .SlowShortMetric }}{{ .MetricFilter }} > ({{ .SlowShortBurnFactor }} * {{ .ErrorBudgetRatio }})) without ({{ .WindowLabel }})
117 |     and
118 |     max({{ .SlowQuickMetric }}{{ .MetricFilter }} > ({{ .SlowQuickBurnFactor }} * {{ .ErrorBudgetRatio }})) without ({{ .WindowLabel }})
119 | )
120 | `))
121 | 


--------------------------------------------------------------------------------
/internal/prometheus/conventions.go:
--------------------------------------------------------------------------------
 1 | package prometheus
 2 | 
 3 | const (
 4 | 	// Metrics.
 5 | 	sliErrorMetricFmt = "slo:sli_error:ratio_rate%s"
 6 | 
 7 | 	// Labels.
 8 | 	sloNameLabelName      = "sloth_slo"
 9 | 	sloIDLabelName        = "sloth_id"
10 | 	sloServiceLabelName   = "sloth_service"
11 | 	sloWindowLabelName    = "sloth_window"
12 | 	sloSeverityLabelName  = "sloth_severity"
13 | 	sloVersionLabelName   = "sloth_version"
14 | 	sloModeLabelName      = "sloth_mode"
15 | 	sloSpecLabelName      = "sloth_spec"
16 | 	sloObjectiveLabelName = "sloth_objective"
17 | )
18 | 


--------------------------------------------------------------------------------
/internal/prometheus/helpers.go:
--------------------------------------------------------------------------------
 1 | package prometheus
 2 | 
 3 | import (
 4 | 	"sort"
 5 | 	"time"
 6 | 
 7 | 	prommodel "github.com/prometheus/common/model"
 8 | 
 9 | 	"github.com/slok/sloth/internal/alert"
10 | )
11 | 
12 | func mergeLabels(ms ...map[string]string) map[string]string {
13 | 	res := map[string]string{}
14 | 	for _, m := range ms {
15 | 		for k, v := range m {
16 | 			res[k] = v
17 | 		}
18 | 	}
19 | 
20 | 	return res
21 | }
22 | 
23 | func labelsToPromFilter(labels map[string]string) string {
24 | 	metricFilters := prommodel.LabelSet{}
25 | 	for k, v := range labels {
26 | 		metricFilters[prommodel.LabelName(k)] = prommodel.LabelValue(v)
27 | 	}
28 | 
29 | 	return metricFilters.String()
30 | }
31 | 
32 | // Pretty simple durations for prometheus.
33 | func timeDurationToPromStr(t time.Duration) string {
34 | 	return prommodel.Duration(t).String()
35 | }
36 | 
37 | // getAlertGroupWindows gets all the time windows from a multiwindow multiburn alert group.
38 | func getAlertGroupWindows(alerts alert.MWMBAlertGroup) []time.Duration {
39 | 	// Use a map to avoid duplicated windows.
40 | 	windows := map[string]time.Duration{
41 | 		alerts.PageQuick.ShortWindow.String():   alerts.PageQuick.ShortWindow,
42 | 		alerts.PageQuick.LongWindow.String():    alerts.PageQuick.LongWindow,
43 | 		alerts.PageSlow.ShortWindow.String():    alerts.PageSlow.ShortWindow,
44 | 		alerts.PageSlow.LongWindow.String():     alerts.PageSlow.LongWindow,
45 | 		alerts.TicketQuick.ShortWindow.String(): alerts.TicketQuick.ShortWindow,
46 | 		alerts.TicketQuick.LongWindow.String():  alerts.TicketQuick.LongWindow,
47 | 		alerts.TicketSlow.ShortWindow.String():  alerts.TicketSlow.ShortWindow,
48 | 		alerts.TicketSlow.LongWindow.String():   alerts.TicketSlow.LongWindow,
49 | 	}
50 | 
51 | 	res := make([]time.Duration, 0, len(windows))
52 | 	for _, w := range windows {
53 | 		res = append(res, w)
54 | 	}
55 | 	sort.SliceStable(res, func(i, j int) bool { return res[i] < res[j] })
56 | 
57 | 	return res
58 | }
59 | 


--------------------------------------------------------------------------------
/internal/prometheus/prometheusmock/file_manager.go:
--------------------------------------------------------------------------------
 1 | // Code generated by mockery v2.46.3. DO NOT EDIT.
 2 | 
 3 | package prometheusmock
 4 | 
 5 | import (
 6 | 	context "context"
 7 | 
 8 | 	mock "github.com/stretchr/testify/mock"
 9 | 
10 | 	regexp "regexp"
11 | )
12 | 
13 | // FileManager is an autogenerated mock type for the FileManager type
14 | type FileManager struct {
15 | 	mock.Mock
16 | }
17 | 
18 | // FindFiles provides a mock function with given fields: ctx, root, matcher
19 | func (_m *FileManager) FindFiles(ctx context.Context, root string, matcher *regexp.Regexp) ([]string, error) {
20 | 	ret := _m.Called(ctx, root, matcher)
21 | 
22 | 	if len(ret) == 0 {
23 | 		panic("no return value specified for FindFiles")
24 | 	}
25 | 
26 | 	var r0 []string
27 | 	var r1 error
28 | 	if rf, ok := ret.Get(0).(func(context.Context, string, *regexp.Regexp) ([]string, error)); ok {
29 | 		return rf(ctx, root, matcher)
30 | 	}
31 | 	if rf, ok := ret.Get(0).(func(context.Context, string, *regexp.Regexp) []string); ok {
32 | 		r0 = rf(ctx, root, matcher)
33 | 	} else {
34 | 		if ret.Get(0) != nil {
35 | 			r0 = ret.Get(0).([]string)
36 | 		}
37 | 	}
38 | 
39 | 	if rf, ok := ret.Get(1).(func(context.Context, string, *regexp.Regexp) error); ok {
40 | 		r1 = rf(ctx, root, matcher)
41 | 	} else {
42 | 		r1 = ret.Error(1)
43 | 	}
44 | 
45 | 	return r0, r1
46 | }
47 | 
48 | // ReadFile provides a mock function with given fields: ctx, path
49 | func (_m *FileManager) ReadFile(ctx context.Context, path string) ([]byte, error) {
50 | 	ret := _m.Called(ctx, path)
51 | 
52 | 	if len(ret) == 0 {
53 | 		panic("no return value specified for ReadFile")
54 | 	}
55 | 
56 | 	var r0 []byte
57 | 	var r1 error
58 | 	if rf, ok := ret.Get(0).(func(context.Context, string) ([]byte, error)); ok {
59 | 		return rf(ctx, path)
60 | 	}
61 | 	if rf, ok := ret.Get(0).(func(context.Context, string) []byte); ok {
62 | 		r0 = rf(ctx, path)
63 | 	} else {
64 | 		if ret.Get(0) != nil {
65 | 			r0 = ret.Get(0).([]byte)
66 | 		}
67 | 	}
68 | 
69 | 	if rf, ok := ret.Get(1).(func(context.Context, string) error); ok {
70 | 		r1 = rf(ctx, path)
71 | 	} else {
72 | 		r1 = ret.Error(1)
73 | 	}
74 | 
75 | 	return r0, r1
76 | }
77 | 
78 | // NewFileManager creates a new instance of FileManager. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
79 | // The first argument is typically a *testing.T value.
80 | func NewFileManager(t interface {
81 | 	mock.TestingT
82 | 	Cleanup(func())
83 | }) *FileManager {
84 | 	mock := &FileManager{}
85 | 	mock.Mock.Test(t)
86 | 
87 | 	t.Cleanup(func() { mock.AssertExpectations(t) })
88 | 
89 | 	return mock
90 | }
91 | 


--------------------------------------------------------------------------------
/internal/prometheus/sli_plugin_test.go:
--------------------------------------------------------------------------------
  1 | package prometheus_test
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/stretchr/testify/assert"
  8 | 	"github.com/stretchr/testify/mock"
  9 | 	"github.com/stretchr/testify/require"
 10 | 
 11 | 	"github.com/slok/sloth/internal/prometheus"
 12 | 	"github.com/slok/sloth/internal/prometheus/prometheusmock"
 13 | )
 14 | 
 15 | func TestSLIPluginLoader(t *testing.T) {
 16 | 	tests := map[string]struct {
 17 | 		pluginSrc   string
 18 | 		pluginID    string
 19 | 		meta        map[string]string
 20 | 		labels      map[string]string
 21 | 		options     map[string]string
 22 | 		expPluginID string
 23 | 		expSLIQuery string
 24 | 		expErrLoad  bool
 25 | 		expErr      bool
 26 | 	}{
 27 | 		"Plugin without version should fail on load.": {
 28 | 			pluginSrc: `
 29 | package testplugin
 30 | 
 31 | import "context"
 32 | 
 33 | const SLIPluginVersion = "prometheus/v1"
 34 | 
 35 | func SLIPlugin(ctx context.Context, meta, labels, options map[string]string) (string, error) {
 36 | 	return "test_query{}", nil
 37 | }
 38 | `,
 39 | 			expErrLoad: true,
 40 | 		},
 41 | 
 42 | 		"Basic plugin should load and return a correct SLI.": {
 43 | 			pluginSrc: `
 44 | package testplugin
 45 | 
 46 | import "context"
 47 | 
 48 | const (
 49 | 	SLIPluginID      = "test_plugin"
 50 | 	SLIPluginVersion = "prometheus/v1"
 51 | )
 52 | 
 53 | 
 54 | func SLIPlugin(ctx context.Context, meta, labels, options map[string]string) (string, error) {
 55 | 	return "test_query{}", nil
 56 | }
 57 | `,
 58 | 			expPluginID: "test_plugin",
 59 | 			expSLIQuery: "test_query{}",
 60 | 		},
 61 | 
 62 | 		"Plugin with meta and options should load and return a correct SLI.": {
 63 | 			pluginSrc: `
 64 | package testplugin
 65 | 
 66 | import "context"
 67 | 
 68 | import "fmt"
 69 | 
 70 | const (
 71 | 	SLIPluginID      = "test_plugin"
 72 | 	SLIPluginVersion = "prometheus/v1"
 73 | )
 74 | 
 75 | func SLIPlugin(ctx context.Context, meta, labels, options map[string]string) (string, error) {
 76 | 	return fmt.Sprintf("test_query{mk1=\"%s\",lk1=\"%s\",k1=\"%s\",k2=\"%s\"}", meta["mk1"], labels["lk1"], options["k1"], options["k2"]), nil
 77 | }
 78 | 		`,
 79 | 			meta:        map[string]string{"mk1": "mv1"},
 80 | 			labels:      map[string]string{"lk1": "lv1"},
 81 | 			options:     map[string]string{"k1": "v1", "k2": "v2"},
 82 | 			expSLIQuery: `test_query{mk1="mv1",lk1="lv1",k1="v1",k2="v2"}`,
 83 | 			expPluginID: "test_plugin",
 84 | 		},
 85 | 
 86 | 		"Plugin with error should return errors.": {
 87 | 			pluginSrc: `
 88 | package testplugin
 89 | 
 90 | import "context"
 91 | 
 92 | import "fmt"
 93 | 
 94 | const (
 95 | 	SLIPluginID      = "test_plugin"
 96 | 	SLIPluginVersion = "prometheus/v1"
 97 | )
 98 | 
 99 | func SLIPlugin(ctx context.Context, meta, labels, options map[string]string) (string, error) {
100 | 	return "", fmt.Errorf("something")
101 | }
102 | 		`,
103 | 			meta:        map[string]string{"mk1": "mv1"},
104 | 			labels:      map[string]string{"lk1": "lv1"},
105 | 			options:     map[string]string{"k1": "v1", "k2": "v2"},
106 | 			expPluginID: "test_plugin",
107 | 			expErr:      true,
108 | 		},
109 | 	}
110 | 
111 | 	for name, test := range tests {
112 | 		t.Run(name, func(t *testing.T) {
113 | 			assert := assert.New(t)
114 | 			require := require.New(t)
115 | 
116 | 			// Mock the plugin files.
117 | 			mfm := &prometheusmock.FileManager{}
118 | 			mfm.On("FindFiles", mock.Anything, "./", mock.Anything).Once().Return([]string{"testplugin/test.go"}, nil)
119 | 			mfm.On("ReadFile", mock.Anything, "testplugin/test.go").Once().Return([]byte(test.pluginSrc), nil)
120 | 
121 | 			// Create repository and load plugins.
122 | 			config := prometheus.FileSLIPluginRepoConfig{
123 | 				FileManager: mfm,
124 | 				Paths:       []string{"./"},
125 | 			}
126 | 			repo, err := prometheus.NewFileSLIPluginRepo(config)
127 | 			if test.expErrLoad {
128 | 				assert.Error(err)
129 | 				return
130 | 			}
131 | 			assert.NoError(err)
132 | 
133 | 			// Get plugin.
134 | 			plugin, err := repo.GetSLIPlugin(context.TODO(), test.expPluginID)
135 | 			require.NoError(err)
136 | 
137 | 			// Check.
138 | 			assert.Equal(test.expPluginID, plugin.ID)
139 | 
140 | 			gotSLIQuery, err := plugin.Func(context.TODO(), test.meta, test.labels, test.options)
141 | 			if test.expErr {
142 | 				assert.Error(err)
143 | 			} else if assert.NoError(err) {
144 | 				assert.Equal(test.expSLIQuery, gotSLIQuery)
145 | 			}
146 | 		})
147 | 	}
148 | }
149 | 


--------------------------------------------------------------------------------
/internal/prometheus/spec.go:
--------------------------------------------------------------------------------
  1 | package prometheus
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"regexp"
  7 | 	"time"
  8 | 
  9 | 	"gopkg.in/yaml.v2"
 10 | 
 11 | 	prometheusv1 "github.com/slok/sloth/pkg/prometheus/api/v1"
 12 | 	prometheuspluginv1 "github.com/slok/sloth/pkg/prometheus/plugin/v1"
 13 | )
 14 | 
 15 | type SLIPluginRepo interface {
 16 | 	GetSLIPlugin(ctx context.Context, id string) (*SLIPlugin, error)
 17 | }
 18 | 
 19 | // YAMLSpecLoader knows how to load YAML specs and converts them to a model.
 20 | type YAMLSpecLoader struct {
 21 | 	windowPeriod time.Duration
 22 | 	pluginsRepo  SLIPluginRepo
 23 | }
 24 | 
 25 | // NewYAMLSpecLoader returns a YAML spec loader.
 26 | func NewYAMLSpecLoader(pluginsRepo SLIPluginRepo, windowPeriod time.Duration) YAMLSpecLoader {
 27 | 	return YAMLSpecLoader{
 28 | 		windowPeriod: windowPeriod,
 29 | 		pluginsRepo:  pluginsRepo,
 30 | 	}
 31 | }
 32 | 
 33 | var specTypeV1Regex = regexp.MustCompile(`(?m)^version: +['"]?prometheus\/v1['"]? *$`)
 34 | 
 35 | func (y YAMLSpecLoader) IsSpecType(ctx context.Context, data []byte) bool {
 36 | 	return specTypeV1Regex.Match(data)
 37 | }
 38 | 
 39 | func (y YAMLSpecLoader) LoadSpec(ctx context.Context, data []byte) (*SLOGroup, error) {
 40 | 	if len(data) == 0 {
 41 | 		return nil, fmt.Errorf("spec is required")
 42 | 	}
 43 | 
 44 | 	s := prometheusv1.Spec{}
 45 | 	err := yaml.Unmarshal(data, &s)
 46 | 	if err != nil {
 47 | 		return nil, fmt.Errorf("could not unmarshall YAML spec correctly: %w", err)
 48 | 	}
 49 | 
 50 | 	// Check version.
 51 | 	if s.Version != prometheusv1.Version {
 52 | 		return nil, fmt.Errorf("invalid spec version, should be %q", prometheusv1.Version)
 53 | 	}
 54 | 
 55 | 	// Check at least we have one SLO.
 56 | 	if len(s.SLOs) == 0 {
 57 | 		return nil, fmt.Errorf("at least one SLO is required")
 58 | 	}
 59 | 
 60 | 	m, err := y.mapSpecToModel(ctx, s)
 61 | 	if err != nil {
 62 | 		return nil, fmt.Errorf("could not map to model: %w", err)
 63 | 	}
 64 | 
 65 | 	return m, nil
 66 | }
 67 | 
 68 | func (y YAMLSpecLoader) mapSpecToModel(ctx context.Context, spec prometheusv1.Spec) (*SLOGroup, error) {
 69 | 	models := make([]SLO, 0, len(spec.SLOs))
 70 | 	for _, specSLO := range spec.SLOs {
 71 | 
 72 | 		slo := SLO{
 73 | 			ID:                    fmt.Sprintf("%s-%s", spec.Service, specSLO.Name),
 74 | 			RuleGroupInterval:     specSLO.Interval.RuleGroupInterval,
 75 | 			SLIErrorRulesInterval: specSLO.Interval.SLIErrorRulesInterval,
 76 | 			MetadataRulesInterval: specSLO.Interval.MetadataRulesInterval,
 77 | 			AlertRulesInterval:    specSLO.Interval.AlertRulesInterval,
 78 | 			Name:                  specSLO.Name,
 79 | 			Description:           specSLO.Description,
 80 | 			Service:               spec.Service,
 81 | 			TimeWindow:            y.windowPeriod,
 82 | 			Objective:             specSLO.Objective,
 83 | 			Labels:                mergeLabels(spec.Labels, specSLO.Labels),
 84 | 			PageAlertMeta:         AlertMeta{Disable: true},
 85 | 			TicketAlertMeta:       AlertMeta{Disable: true},
 86 | 			InfoLabels:            specSLO.InfoLabels,
 87 | 		}
 88 | 
 89 | 		// Set SLIs.
 90 | 		if specSLO.SLI.Events != nil {
 91 | 			slo.SLI.Events = &SLIEvents{
 92 | 				ErrorQuery: specSLO.SLI.Events.ErrorQuery,
 93 | 				TotalQuery: specSLO.SLI.Events.TotalQuery,
 94 | 			}
 95 | 		}
 96 | 
 97 | 		if specSLO.SLI.Raw != nil {
 98 | 			slo.SLI.Raw = &SLIRaw{
 99 | 				ErrorRatioQuery: specSLO.SLI.Raw.ErrorRatioQuery,
100 | 			}
101 | 		}
102 | 
103 | 		if specSLO.SLI.Plugin != nil {
104 | 			plugin, err := y.pluginsRepo.GetSLIPlugin(ctx, specSLO.SLI.Plugin.ID)
105 | 			if err != nil {
106 | 				return nil, fmt.Errorf("could not get plugin: %w", err)
107 | 			}
108 | 
109 | 			meta := map[string]string{
110 | 				prometheuspluginv1.SLIPluginMetaService:   spec.Service,
111 | 				prometheuspluginv1.SLIPluginMetaSLO:       specSLO.Name,
112 | 				prometheuspluginv1.SLIPluginMetaObjective: fmt.Sprintf("%f", specSLO.Objective),
113 | 			}
114 | 
115 | 			rawQuery, err := plugin.Func(ctx, meta, spec.Labels, specSLO.SLI.Plugin.Options)
116 | 			if err != nil {
117 | 				return nil, fmt.Errorf("plugin %q execution error: %w", specSLO.SLI.Plugin.ID, err)
118 | 			}
119 | 
120 | 			slo.SLI.Raw = &SLIRaw{
121 | 				ErrorRatioQuery: rawQuery,
122 | 			}
123 | 		}
124 | 
125 | 		// Set alerts.
126 | 		if !specSLO.Alerting.PageAlert.Disable {
127 | 			slo.PageAlertMeta = AlertMeta{
128 | 				Name:        specSLO.Alerting.Name,
129 | 				Labels:      mergeLabels(specSLO.Alerting.Labels, specSLO.Alerting.PageAlert.Labels),
130 | 				Annotations: mergeLabels(specSLO.Alerting.Annotations, specSLO.Alerting.PageAlert.Annotations),
131 | 			}
132 | 		}
133 | 
134 | 		if !specSLO.Alerting.TicketAlert.Disable {
135 | 			slo.TicketAlertMeta = AlertMeta{
136 | 				Name:        specSLO.Alerting.Name,
137 | 				Labels:      mergeLabels(specSLO.Alerting.Labels, specSLO.Alerting.TicketAlert.Labels),
138 | 				Annotations: mergeLabels(specSLO.Alerting.Annotations, specSLO.Alerting.TicketAlert.Annotations),
139 | 			}
140 | 		}
141 | 
142 | 		models = append(models, slo)
143 | 	}
144 | 
145 | 	return &SLOGroup{SLOs: models}, nil
146 | }
147 | 


--------------------------------------------------------------------------------
/pkg/kubernetes/api/sloth/register.go:
--------------------------------------------------------------------------------
1 | package sloth
2 | 
3 | const (
4 | 	GroupName = "sloth.slok.dev"
5 | )
6 | 


--------------------------------------------------------------------------------
/pkg/kubernetes/api/sloth/v1/doc.go:
--------------------------------------------------------------------------------
1 | // +k8s:deepcopy-gen=package
2 | // +groupName=sloth.slok.dev
3 | // +versionName=v1
4 | 
5 | package v1
6 | 


--------------------------------------------------------------------------------
/pkg/kubernetes/api/sloth/v1/register.go:
--------------------------------------------------------------------------------
 1 | package v1
 2 | 
 3 | import (
 4 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 5 | 	"k8s.io/apimachinery/pkg/runtime"
 6 | 	"k8s.io/apimachinery/pkg/runtime/schema"
 7 | 
 8 | 	"github.com/slok/sloth/pkg/kubernetes/api/sloth"
 9 | )
10 | 
11 | const (
12 | 	version = "v1"
13 | )
14 | 
15 | // SchemeGroupVersion is group version used to register these objects.
16 | var SchemeGroupVersion = schema.GroupVersion{Group: sloth.GroupName, Version: version}
17 | 
18 | // Kind takes an unqualified kind and returns back a Group qualified GroupKind.
19 | func Kind(kind string) schema.GroupKind {
20 | 	return VersionKind(kind).GroupKind()
21 | }
22 | 
23 | // VersionKind takes an unqualified kind and returns back a Group qualified GroupVersionKind.
24 | func VersionKind(kind string) schema.GroupVersionKind {
25 | 	return SchemeGroupVersion.WithKind(kind)
26 | }
27 | 
28 | // Resource takes an unqualified resource and returns a Group qualified GroupResource.
29 | func Resource(resource string) schema.GroupResource {
30 | 	return SchemeGroupVersion.WithResource(resource).GroupResource()
31 | }
32 | 
33 | var (
34 | 	// SchemeBuilder initializes a scheme builder.
35 | 	SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)
36 | 	// AddToScheme is a global function that registers this API group & version to a scheme.
37 | 	AddToScheme = SchemeBuilder.AddToScheme
38 | )
39 | 
40 | // Adds the list of known types to Scheme.
41 | func addKnownTypes(scheme *runtime.Scheme) error {
42 | 	scheme.AddKnownTypes(SchemeGroupVersion,
43 | 		&PrometheusServiceLevel{},
44 | 		&PrometheusServiceLevelList{},
45 | 	)
46 | 	metav1.AddToGroupVersion(scheme, SchemeGroupVersion)
47 | 	return nil
48 | }
49 | 


--------------------------------------------------------------------------------
/pkg/kubernetes/gen/clientset/versioned/clientset.go:
--------------------------------------------------------------------------------
  1 | // Code generated by client-gen. DO NOT EDIT.
  2 | 
  3 | package versioned
  4 | 
  5 | import (
  6 | 	"fmt"
  7 | 	"net/http"
  8 | 
  9 | 	slothv1 "github.com/slok/sloth/pkg/kubernetes/gen/clientset/versioned/typed/sloth/v1"
 10 | 	discovery "k8s.io/client-go/discovery"
 11 | 	rest "k8s.io/client-go/rest"
 12 | 	flowcontrol "k8s.io/client-go/util/flowcontrol"
 13 | )
 14 | 
 15 | type Interface interface {
 16 | 	Discovery() discovery.DiscoveryInterface
 17 | 	SlothV1() slothv1.SlothV1Interface
 18 | }
 19 | 
 20 | // Clientset contains the clients for groups. Each group has exactly one
 21 | // version included in a Clientset.
 22 | type Clientset struct {
 23 | 	*discovery.DiscoveryClient
 24 | 	slothV1 *slothv1.SlothV1Client
 25 | }
 26 | 
 27 | // SlothV1 retrieves the SlothV1Client
 28 | func (c *Clientset) SlothV1() slothv1.SlothV1Interface {
 29 | 	return c.slothV1
 30 | }
 31 | 
 32 | // Discovery retrieves the DiscoveryClient
 33 | func (c *Clientset) Discovery() discovery.DiscoveryInterface {
 34 | 	if c == nil {
 35 | 		return nil
 36 | 	}
 37 | 	return c.DiscoveryClient
 38 | }
 39 | 
 40 | // NewForConfig creates a new Clientset for the given config.
 41 | // If config's RateLimiter is not set and QPS and Burst are acceptable,
 42 | // NewForConfig will generate a rate-limiter in configShallowCopy.
 43 | // NewForConfig is equivalent to NewForConfigAndClient(c, httpClient),
 44 | // where httpClient was generated with rest.HTTPClientFor(c).
 45 | func NewForConfig(c *rest.Config) (*Clientset, error) {
 46 | 	configShallowCopy := *c
 47 | 
 48 | 	if configShallowCopy.UserAgent == "" {
 49 | 		configShallowCopy.UserAgent = rest.DefaultKubernetesUserAgent()
 50 | 	}
 51 | 
 52 | 	// share the transport between all clients
 53 | 	httpClient, err := rest.HTTPClientFor(&configShallowCopy)
 54 | 	if err != nil {
 55 | 		return nil, err
 56 | 	}
 57 | 
 58 | 	return NewForConfigAndClient(&configShallowCopy, httpClient)
 59 | }
 60 | 
 61 | // NewForConfigAndClient creates a new Clientset for the given config and http client.
 62 | // Note the http client provided takes precedence over the configured transport values.
 63 | // If config's RateLimiter is not set and QPS and Burst are acceptable,
 64 | // NewForConfigAndClient will generate a rate-limiter in configShallowCopy.
 65 | func NewForConfigAndClient(c *rest.Config, httpClient *http.Client) (*Clientset, error) {
 66 | 	configShallowCopy := *c
 67 | 	if configShallowCopy.RateLimiter == nil && configShallowCopy.QPS > 0 {
 68 | 		if configShallowCopy.Burst <= 0 {
 69 | 			return nil, fmt.Errorf("burst is required to be greater than 0 when RateLimiter is not set and QPS is set to greater than 0")
 70 | 		}
 71 | 		configShallowCopy.RateLimiter = flowcontrol.NewTokenBucketRateLimiter(configShallowCopy.QPS, configShallowCopy.Burst)
 72 | 	}
 73 | 
 74 | 	var cs Clientset
 75 | 	var err error
 76 | 	cs.slothV1, err = slothv1.NewForConfigAndClient(&configShallowCopy, httpClient)
 77 | 	if err != nil {
 78 | 		return nil, err
 79 | 	}
 80 | 
 81 | 	cs.DiscoveryClient, err = discovery.NewDiscoveryClientForConfigAndClient(&configShallowCopy, httpClient)
 82 | 	if err != nil {
 83 | 		return nil, err
 84 | 	}
 85 | 	return &cs, nil
 86 | }
 87 | 
 88 | // NewForConfigOrDie creates a new Clientset for the given config and
 89 | // panics if there is an error in the config.
 90 | func NewForConfigOrDie(c *rest.Config) *Clientset {
 91 | 	cs, err := NewForConfig(c)
 92 | 	if err != nil {
 93 | 		panic(err)
 94 | 	}
 95 | 	return cs
 96 | }
 97 | 
 98 | // New creates a new Clientset for the given RESTClient.
 99 | func New(c rest.Interface) *Clientset {
100 | 	var cs Clientset
101 | 	cs.slothV1 = slothv1.New(c)
102 | 
103 | 	cs.DiscoveryClient = discovery.NewDiscoveryClient(c)
104 | 	return &cs
105 | }
106 | 


--------------------------------------------------------------------------------
/pkg/kubernetes/gen/clientset/versioned/doc.go:
--------------------------------------------------------------------------------
1 | // Code generated by client-gen. DO NOT EDIT.
2 | 
3 | // This package has the automatically generated clientset.
4 | package versioned
5 | 


--------------------------------------------------------------------------------
/pkg/kubernetes/gen/clientset/versioned/fake/clientset_generated.go:
--------------------------------------------------------------------------------
 1 | // Code generated by client-gen. DO NOT EDIT.
 2 | 
 3 | package fake
 4 | 
 5 | import (
 6 | 	clientset "github.com/slok/sloth/pkg/kubernetes/gen/clientset/versioned"
 7 | 	slothv1 "github.com/slok/sloth/pkg/kubernetes/gen/clientset/versioned/typed/sloth/v1"
 8 | 	fakeslothv1 "github.com/slok/sloth/pkg/kubernetes/gen/clientset/versioned/typed/sloth/v1/fake"
 9 | 	"k8s.io/apimachinery/pkg/runtime"
10 | 	"k8s.io/apimachinery/pkg/watch"
11 | 	"k8s.io/client-go/discovery"
12 | 	fakediscovery "k8s.io/client-go/discovery/fake"
13 | 	"k8s.io/client-go/testing"
14 | )
15 | 
16 | // NewSimpleClientset returns a clientset that will respond with the provided objects.
17 | // It's backed by a very simple object tracker that processes creates, updates and deletions as-is,
18 | // without applying any validations and/or defaults. It shouldn't be considered a replacement
19 | // for a real clientset and is mostly useful in simple unit tests.
20 | func NewSimpleClientset(objects ...runtime.Object) *Clientset {
21 | 	o := testing.NewObjectTracker(scheme, codecs.UniversalDecoder())
22 | 	for _, obj := range objects {
23 | 		if err := o.Add(obj); err != nil {
24 | 			panic(err)
25 | 		}
26 | 	}
27 | 
28 | 	cs := &Clientset{tracker: o}
29 | 	cs.discovery = &fakediscovery.FakeDiscovery{Fake: &cs.Fake}
30 | 	cs.AddReactor("*", "*", testing.ObjectReaction(o))
31 | 	cs.AddWatchReactor("*", func(action testing.Action) (handled bool, ret watch.Interface, err error) {
32 | 		gvr := action.GetResource()
33 | 		ns := action.GetNamespace()
34 | 		watch, err := o.Watch(gvr, ns)
35 | 		if err != nil {
36 | 			return false, nil, err
37 | 		}
38 | 		return true, watch, nil
39 | 	})
40 | 
41 | 	return cs
42 | }
43 | 
44 | // Clientset implements clientset.Interface. Meant to be embedded into a
45 | // struct to get a default implementation. This makes faking out just the method
46 | // you want to test easier.
47 | type Clientset struct {
48 | 	testing.Fake
49 | 	discovery *fakediscovery.FakeDiscovery
50 | 	tracker   testing.ObjectTracker
51 | }
52 | 
53 | func (c *Clientset) Discovery() discovery.DiscoveryInterface {
54 | 	return c.discovery
55 | }
56 | 
57 | func (c *Clientset) Tracker() testing.ObjectTracker {
58 | 	return c.tracker
59 | }
60 | 
61 | var (
62 | 	_ clientset.Interface = &Clientset{}
63 | 	_ testing.FakeClient  = &Clientset{}
64 | )
65 | 
66 | // SlothV1 retrieves the SlothV1Client
67 | func (c *Clientset) SlothV1() slothv1.SlothV1Interface {
68 | 	return &fakeslothv1.FakeSlothV1{Fake: &c.Fake}
69 | }
70 | 


--------------------------------------------------------------------------------
/pkg/kubernetes/gen/clientset/versioned/fake/doc.go:
--------------------------------------------------------------------------------
1 | // Code generated by client-gen. DO NOT EDIT.
2 | 
3 | // This package has the automatically generated fake clientset.
4 | package fake
5 | 


--------------------------------------------------------------------------------
/pkg/kubernetes/gen/clientset/versioned/fake/register.go:
--------------------------------------------------------------------------------
 1 | // Code generated by client-gen. DO NOT EDIT.
 2 | 
 3 | package fake
 4 | 
 5 | import (
 6 | 	slothv1 "github.com/slok/sloth/pkg/kubernetes/api/sloth/v1"
 7 | 	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 8 | 	runtime "k8s.io/apimachinery/pkg/runtime"
 9 | 	schema "k8s.io/apimachinery/pkg/runtime/schema"
10 | 	serializer "k8s.io/apimachinery/pkg/runtime/serializer"
11 | 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
12 | )
13 | 
14 | var scheme = runtime.NewScheme()
15 | var codecs = serializer.NewCodecFactory(scheme)
16 | 
17 | var localSchemeBuilder = runtime.SchemeBuilder{
18 | 	slothv1.AddToScheme,
19 | }
20 | 
21 | // AddToScheme adds all types of this clientset into the given scheme. This allows composition
22 | // of clientsets, like in:
23 | //
24 | //	import (
25 | //	  "k8s.io/client-go/kubernetes"
26 | //	  clientsetscheme "k8s.io/client-go/kubernetes/scheme"
27 | //	  aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme"
28 | //	)
29 | //
30 | //	kclientset, _ := kubernetes.NewForConfig(c)
31 | //	_ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme)
32 | //
33 | // After this, RawExtensions in Kubernetes types will serialize kube-aggregator types
34 | // correctly.
35 | var AddToScheme = localSchemeBuilder.AddToScheme
36 | 
37 | func init() {
38 | 	v1.AddToGroupVersion(scheme, schema.GroupVersion{Version: "v1"})
39 | 	utilruntime.Must(AddToScheme(scheme))
40 | }
41 | 


--------------------------------------------------------------------------------
/pkg/kubernetes/gen/clientset/versioned/scheme/doc.go:
--------------------------------------------------------------------------------
1 | // Code generated by client-gen. DO NOT EDIT.
2 | 
3 | // This package contains the scheme of the automatically generated clientset.
4 | package scheme
5 | 


--------------------------------------------------------------------------------
/pkg/kubernetes/gen/clientset/versioned/scheme/register.go:
--------------------------------------------------------------------------------
 1 | // Code generated by client-gen. DO NOT EDIT.
 2 | 
 3 | package scheme
 4 | 
 5 | import (
 6 | 	slothv1 "github.com/slok/sloth/pkg/kubernetes/api/sloth/v1"
 7 | 	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 8 | 	runtime "k8s.io/apimachinery/pkg/runtime"
 9 | 	schema "k8s.io/apimachinery/pkg/runtime/schema"
10 | 	serializer "k8s.io/apimachinery/pkg/runtime/serializer"
11 | 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
12 | )
13 | 
14 | var Scheme = runtime.NewScheme()
15 | var Codecs = serializer.NewCodecFactory(Scheme)
16 | var ParameterCodec = runtime.NewParameterCodec(Scheme)
17 | var localSchemeBuilder = runtime.SchemeBuilder{
18 | 	slothv1.AddToScheme,
19 | }
20 | 
21 | // AddToScheme adds all types of this clientset into the given scheme. This allows composition
22 | // of clientsets, like in:
23 | //
24 | //	import (
25 | //	  "k8s.io/client-go/kubernetes"
26 | //	  clientsetscheme "k8s.io/client-go/kubernetes/scheme"
27 | //	  aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme"
28 | //	)
29 | //
30 | //	kclientset, _ := kubernetes.NewForConfig(c)
31 | //	_ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme)
32 | //
33 | // After this, RawExtensions in Kubernetes types will serialize kube-aggregator types
34 | // correctly.
35 | var AddToScheme = localSchemeBuilder.AddToScheme
36 | 
37 | func init() {
38 | 	v1.AddToGroupVersion(Scheme, schema.GroupVersion{Version: "v1"})
39 | 	utilruntime.Must(AddToScheme(Scheme))
40 | }
41 | 


--------------------------------------------------------------------------------
/pkg/kubernetes/gen/clientset/versioned/typed/sloth/v1/doc.go:
--------------------------------------------------------------------------------
1 | // Code generated by client-gen. DO NOT EDIT.
2 | 
3 | // This package has the automatically generated typed clients.
4 | package v1
5 | 


--------------------------------------------------------------------------------
/pkg/kubernetes/gen/clientset/versioned/typed/sloth/v1/fake/doc.go:
--------------------------------------------------------------------------------
1 | // Code generated by client-gen. DO NOT EDIT.
2 | 
3 | // Package fake has the automatically generated clients.
4 | package fake
5 | 


--------------------------------------------------------------------------------
/pkg/kubernetes/gen/clientset/versioned/typed/sloth/v1/fake/fake_sloth_client.go:
--------------------------------------------------------------------------------
 1 | // Code generated by client-gen. DO NOT EDIT.
 2 | 
 3 | package fake
 4 | 
 5 | import (
 6 | 	v1 "github.com/slok/sloth/pkg/kubernetes/gen/clientset/versioned/typed/sloth/v1"
 7 | 	rest "k8s.io/client-go/rest"
 8 | 	testing "k8s.io/client-go/testing"
 9 | )
10 | 
11 | type FakeSlothV1 struct {
12 | 	*testing.Fake
13 | }
14 | 
15 | func (c *FakeSlothV1) PrometheusServiceLevels(namespace string) v1.PrometheusServiceLevelInterface {
16 | 	return &FakePrometheusServiceLevels{c, namespace}
17 | }
18 | 
19 | // RESTClient returns a RESTClient that is used to communicate
20 | // with API server by this client implementation.
21 | func (c *FakeSlothV1) RESTClient() rest.Interface {
22 | 	var ret *rest.RESTClient
23 | 	return ret
24 | }
25 | 


--------------------------------------------------------------------------------
/pkg/kubernetes/gen/clientset/versioned/typed/sloth/v1/generated_expansion.go:
--------------------------------------------------------------------------------
1 | // Code generated by client-gen. DO NOT EDIT.
2 | 
3 | package v1
4 | 
5 | type PrometheusServiceLevelExpansion interface{}
6 | 


--------------------------------------------------------------------------------
/pkg/kubernetes/gen/clientset/versioned/typed/sloth/v1/sloth_client.go:
--------------------------------------------------------------------------------
 1 | // Code generated by client-gen. DO NOT EDIT.
 2 | 
 3 | package v1
 4 | 
 5 | import (
 6 | 	"net/http"
 7 | 
 8 | 	v1 "github.com/slok/sloth/pkg/kubernetes/api/sloth/v1"
 9 | 	"github.com/slok/sloth/pkg/kubernetes/gen/clientset/versioned/scheme"
10 | 	rest "k8s.io/client-go/rest"
11 | )
12 | 
13 | type SlothV1Interface interface {
14 | 	RESTClient() rest.Interface
15 | 	PrometheusServiceLevelsGetter
16 | }
17 | 
18 | // SlothV1Client is used to interact with features provided by the sloth.slok.dev group.
19 | type SlothV1Client struct {
20 | 	restClient rest.Interface
21 | }
22 | 
23 | func (c *SlothV1Client) PrometheusServiceLevels(namespace string) PrometheusServiceLevelInterface {
24 | 	return newPrometheusServiceLevels(c, namespace)
25 | }
26 | 
27 | // NewForConfig creates a new SlothV1Client for the given config.
28 | // NewForConfig is equivalent to NewForConfigAndClient(c, httpClient),
29 | // where httpClient was generated with rest.HTTPClientFor(c).
30 | func NewForConfig(c *rest.Config) (*SlothV1Client, error) {
31 | 	config := *c
32 | 	if err := setConfigDefaults(&config); err != nil {
33 | 		return nil, err
34 | 	}
35 | 	httpClient, err := rest.HTTPClientFor(&config)
36 | 	if err != nil {
37 | 		return nil, err
38 | 	}
39 | 	return NewForConfigAndClient(&config, httpClient)
40 | }
41 | 
42 | // NewForConfigAndClient creates a new SlothV1Client for the given config and http client.
43 | // Note the http client provided takes precedence over the configured transport values.
44 | func NewForConfigAndClient(c *rest.Config, h *http.Client) (*SlothV1Client, error) {
45 | 	config := *c
46 | 	if err := setConfigDefaults(&config); err != nil {
47 | 		return nil, err
48 | 	}
49 | 	client, err := rest.RESTClientForConfigAndClient(&config, h)
50 | 	if err != nil {
51 | 		return nil, err
52 | 	}
53 | 	return &SlothV1Client{client}, nil
54 | }
55 | 
56 | // NewForConfigOrDie creates a new SlothV1Client for the given config and
57 | // panics if there is an error in the config.
58 | func NewForConfigOrDie(c *rest.Config) *SlothV1Client {
59 | 	client, err := NewForConfig(c)
60 | 	if err != nil {
61 | 		panic(err)
62 | 	}
63 | 	return client
64 | }
65 | 
66 | // New creates a new SlothV1Client for the given RESTClient.
67 | func New(c rest.Interface) *SlothV1Client {
68 | 	return &SlothV1Client{c}
69 | }
70 | 
71 | func setConfigDefaults(config *rest.Config) error {
72 | 	gv := v1.SchemeGroupVersion
73 | 	config.GroupVersion = &gv
74 | 	config.APIPath = "/apis"
75 | 	config.NegotiatedSerializer = scheme.Codecs.WithoutConversion()
76 | 
77 | 	if config.UserAgent == "" {
78 | 		config.UserAgent = rest.DefaultKubernetesUserAgent()
79 | 	}
80 | 
81 | 	return nil
82 | }
83 | 
84 | // RESTClient returns a RESTClient that is used to communicate
85 | // with API server by this client implementation.
86 | func (c *SlothV1Client) RESTClient() rest.Interface {
87 | 	if c == nil {
88 | 		return nil
89 | 	}
90 | 	return c.restClient
91 | }
92 | 


--------------------------------------------------------------------------------
/pkg/prometheus/alertwindows/v1/README.md:
--------------------------------------------------------------------------------
  1 | <!-- Code generated by gomarkdoc. DO NOT EDIT -->
  2 | 
  3 | # v1
  4 | 
  5 | ```go
  6 | import "github.com/slok/sloth/pkg/prometheus/alertwindows/v1"
  7 | ```
  8 | 
  9 | ## Index
 10 | 
 11 | - [Constants](<#constants>)
 12 | - [type AlertWindows](<#type-alertwindows>)
 13 | - [type PageWindow](<#type-pagewindow>)
 14 | - [type QuickSlowWindow](<#type-quickslowwindow>)
 15 | - [type Spec](<#type-spec>)
 16 | - [type TicketWindow](<#type-ticketwindow>)
 17 | - [type Window](<#type-window>)
 18 | 
 19 | 
 20 | ## Constants
 21 | 
 22 | ```go
 23 | const APIVersion = "sloth.slok.dev/v1"
 24 | ```
 25 | 
 26 | ```go
 27 | const Kind = "AlertWindows"
 28 | ```
 29 | 
 30 | ## type [AlertWindows](<https://github.com/linode-obs/sloth/blob/main/pkg/prometheus/alertwindows/v1/v1.go#L12-L16>)
 31 | 
 32 | ```go
 33 | type AlertWindows struct {
 34 |     Kind       string `yaml:"kind"`
 35 |     APIVersion string `yaml:"apiVersion"`
 36 |     Spec       Spec   `yaml:"spec"`
 37 | }
 38 | ```
 39 | 
 40 | ## type [PageWindow](<https://github.com/linode-obs/sloth/blob/main/pkg/prometheus/alertwindows/v1/v1.go#L29-L31>)
 41 | 
 42 | PageWindow represents the configuration for page alerting.
 43 | 
 44 | ```go
 45 | type PageWindow struct {
 46 |     QuickSlowWindow `yaml:",inline"`
 47 | }
 48 | ```
 49 | 
 50 | ## type [QuickSlowWindow](<https://github.com/linode-obs/sloth/blob/main/pkg/prometheus/alertwindows/v1/v1.go#L38-L43>)
 51 | 
 52 | ```go
 53 | type QuickSlowWindow struct {
 54 |     // Quick represents the windows for the quick alerting trigger.
 55 |     Quick Window `yaml:"quick"`
 56 |     // Slow represents the windows for the slow alerting trigger.
 57 |     Slow Window `yaml:"slow"`
 58 | }
 59 | ```
 60 | 
 61 | ## type [Spec](<https://github.com/linode-obs/sloth/blob/main/pkg/prometheus/alertwindows/v1/v1.go#L19-L26>)
 62 | 
 63 | Spec represents the root type of the Alerting window.
 64 | 
 65 | ```go
 66 | type Spec struct {
 67 |     // SLOPeriod is the full slo period used for this windows.
 68 |     SLOPeriod prometheusmodel.Duration `yaml:"sloPeriod"`
 69 |     // Page represents the configuration for the page alerting windows.
 70 |     Page PageWindow `yaml:"page"`
 71 |     // Ticket represents the configuration for the ticket alerting windows.
 72 |     Ticket TicketWindow `yaml:"ticket"`
 73 | }
 74 | ```
 75 | 
 76 | ## type [TicketWindow](<https://github.com/linode-obs/sloth/blob/main/pkg/prometheus/alertwindows/v1/v1.go#L34-L36>)
 77 | 
 78 | PageWindow represents the configuration for ticket alerting.
 79 | 
 80 | ```go
 81 | type TicketWindow struct {
 82 |     QuickSlowWindow `yaml:",inline"`
 83 | }
 84 | ```
 85 | 
 86 | ## type [Window](<https://github.com/linode-obs/sloth/blob/main/pkg/prometheus/alertwindows/v1/v1.go#L45-L53>)
 87 | 
 88 | ```go
 89 | type Window struct {
 90 |     // ErrorBudgetPercent is the max error budget consumption allowed in the window.
 91 |     ErrorBudgetPercent float64 `yaml:"errorBudgetPercent"`
 92 |     // Shortwindow is the window that will stop the alerts when a huge amount of
 93 |     // error budget has been consumed but the error has already gone.
 94 |     ShortWindow prometheusmodel.Duration `yaml:"shortWindow"`
 95 |     // Longwindow is the window used to get the error budget for all the window.
 96 |     LongWindow prometheusmodel.Duration `yaml:"longWindow"`
 97 | }
 98 | ```
 99 | 
100 | 
101 | 
102 | Generated by [gomarkdoc](<https://github.com/princjef/gomarkdoc>)
103 | 


--------------------------------------------------------------------------------
/pkg/prometheus/alertwindows/v1/v1.go:
--------------------------------------------------------------------------------
 1 | // Package v1
 2 | 
 3 | package v1
 4 | 
 5 | import prometheusmodel "github.com/prometheus/common/model"
 6 | 
 7 | const Kind = "AlertWindows"
 8 | const APIVersion = "sloth.slok.dev/v1"
 9 | 
10 | //go:generate gomarkdoc -o ./README.md ./
11 | 
12 | type AlertWindows struct {
13 | 	Kind       string `yaml:"kind"`
14 | 	APIVersion string `yaml:"apiVersion"`
15 | 	Spec       Spec   `yaml:"spec"`
16 | }
17 | 
18 | // Spec represents the root type of the Alerting window.
19 | type Spec struct {
20 | 	// SLOPeriod is the full slo period used for this windows.
21 | 	SLOPeriod prometheusmodel.Duration `yaml:"sloPeriod"`
22 | 	// Page represents the configuration for the page alerting windows.
23 | 	Page PageWindow `yaml:"page"`
24 | 	// Ticket represents the configuration for the ticket alerting windows.
25 | 	Ticket TicketWindow `yaml:"ticket"`
26 | }
27 | 
28 | // PageWindow represents the configuration for page alerting.
29 | type PageWindow struct {
30 | 	QuickSlowWindow `yaml:",inline"`
31 | }
32 | 
33 | // PageWindow represents the configuration for ticket alerting.
34 | type TicketWindow struct {
35 | 	QuickSlowWindow `yaml:",inline"`
36 | }
37 | 
38 | type QuickSlowWindow struct {
39 | 	// Quick represents the windows for the quick alerting trigger.
40 | 	Quick Window `yaml:"quick"`
41 | 	// Slow represents the windows for the slow alerting trigger.
42 | 	Slow Window `yaml:"slow"`
43 | }
44 | 
45 | type Window struct {
46 | 	// ErrorBudgetPercent is the max error budget consumption allowed in the window.
47 | 	ErrorBudgetPercent float64 `yaml:"errorBudgetPercent"`
48 | 	// Shortwindow is the window that will stop the alerts when a huge amount of
49 | 	// error budget has been consumed but the error has already gone.
50 | 	ShortWindow prometheusmodel.Duration `yaml:"shortWindow"`
51 | 	// Longwindow is the window used to get the error budget for all the window.
52 | 	LongWindow prometheusmodel.Duration `yaml:"longWindow"`
53 | }
54 | 


--------------------------------------------------------------------------------
/pkg/prometheus/plugin/v1/v1.go:
--------------------------------------------------------------------------------
 1 | // package plugin has all the API to load prometheus plugins using Yaegi.
 2 | // It uses aliases and common types to easy the dynamic plugin load so we don't need
 3 | // to import this package as a library (remove dependencies/external libs from plugins).
 4 | //
 5 | // We use map[string]string and let the plugin make the correct conversion of types because
 6 | // dealing with interfaces on dynamic plugins can lead to bugs and unwanted behaviour, so we
 7 | // play it safe.
 8 | package plugin
 9 | 
10 | import "context"
11 | 
12 | // Version is this plugin type version.
13 | const Version = "prometheus/v1"
14 | 
15 | // SLIPluginVersion is the version of the plugin (e.g: `prometheus/v1`).
16 | type SLIPluginVersion = string
17 | 
18 | // SLIPluginID is the ID of the plugin.
19 | type SLIPluginID = string
20 | 
21 | // Metada keys.
22 | const (
23 | 	SLIPluginMetaService   = "service"
24 | 	SLIPluginMetaSLO       = "slo"
25 | 	SLIPluginMetaObjective = "objective"
26 | )
27 | 
28 | // SLIPlugin knows how to generate SLIs based on data options.
29 | //
30 | // This is the type the SLI plugins need to implement.
31 | type SLIPlugin = func(ctx context.Context, meta, labels, options map[string]string) (query string, err error)
32 | 


--------------------------------------------------------------------------------
/scripts/build/bin/build-all.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o errexit
 4 | set -o nounset
 5 | 
 6 | # Build all.
 7 | ostypes=("Linux" "Darwin" "Windows" "ARM")
 8 | for ostype in "${ostypes[@]}"
 9 | do
10 | 	ostype="${ostype}" ./scripts/build/bin/build.sh
11 | done
12 | 
13 | # Create checksums.
14 | checksums_dir="./bin"
15 | cd ${checksums_dir} && sha256sum * > ./checksums.txt
16 | 


--------------------------------------------------------------------------------
/scripts/build/bin/build-raw.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o errexit
 4 | set -o nounset
 5 | 
 6 | # Env vars that can be set.
 7 | # - EXTENSION: The binary out extension.
 8 | # - VERSION: Version for the binary.
 9 | # - GOOS: OS compiling target
10 | # - GOARCH: Arch compiling target.
11 | # - GOARM: ARM version.
12 | 
13 | version_path="github.com/slok/sloth/internal/info.Version"
14 | src=./cmd/sloth
15 | out=./bin/sloth
16 | 
17 | # Prepare flags.
18 | final_out=${out}${EXTENSION:-}
19 | ldf_cmp="-s -w -extldflags '-static'"
20 | f_ver="-X ${version_path}=${VERSION:-dev}"
21 | 
22 | # Build binary.
23 | echo "[*] Building binary at ${final_out} (GOOS=${GOOS:-}, GOARCH=${GOARCH:-}, GOARM=${GOARM:-}, VERSION=${VERSION:-}, EXTENSION=${EXTENSION:-})"
24 | CGO_ENABLED=0 go build -buildvcs=false -o ${final_out} --ldflags "${ldf_cmp} ${f_ver}"  ${src}
25 | 


--------------------------------------------------------------------------------
/scripts/build/bin/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o errexit
 4 | set -o nounset
 5 | 
 6 | build_script="./scripts/build/bin/build-raw.sh"
 7 | ostype=${ostype:-"native"}
 8 | 
 9 | echo "[+] Build OS type selected: ${ostype}"
10 | 
11 | if [ $ostype == 'Linux' ]; then
12 |     EXTENSION="-linux-amd64" GOOS="linux" GOARCH="amd64" ${build_script}
13 | elif [ $ostype == 'Darwin' ]; then
14 |     EXTENSION="-darwin-amd64" GOOS="darwin" GOARCH="amd64" ${build_script}
15 |     EXTENSION="-darwin-arm64" GOOS="darwin" GOARCH="arm64" ${build_script}
16 | elif [ $ostype == 'Windows' ]; then
17 |     EXTENSION="-windows-amd64.exe" GOOS="windows" GOARCH="amd64" ${build_script}
18 | elif [ $ostype == 'ARM' ]; then
19 |     EXTENSION="-linux-arm64" GOOS="linux" GOARCH="arm64" ${build_script}
20 |     EXTENSION="-linux-arm-v7" GOOS="linux" GOARCH="arm" GOARM="7" ${build_script}
21 | else
22 |     # Native.
23 |     ${build_script}
24 | fi
25 | 


--------------------------------------------------------------------------------
/scripts/build/docker/build-image-dev.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | set -e
 4 | 
 5 | 
 6 | [ -z "$VERSION" ] && echo "VERSION env var is required." && exit 1;
 7 | [ -z "$IMAGE" ] && echo "IMAGE env var is required." && exit 1;
 8 | [ -z "$DOCKER_FILE_PATH" ] && echo "DOCKER_FILE_PATH env var is required." && exit 1;
 9 | 
10 | # Build image.
11 | echo "Building dev image ${IMAGE}:${VERSION}..."
12 | docker build \
13 |     -t "${IMAGE}:${VERSION}" \
14 |     -f "${DOCKER_FILE_PATH}" .


--------------------------------------------------------------------------------
/scripts/build/docker/build-image.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | set -e
 4 | 
 5 | 
 6 | [ -z "$VERSION" ] && echo "VERSION env var is required." && exit 1;
 7 | [ -z "$IMAGE" ] && echo "IMAGE env var is required." && exit 1;
 8 | [ -z "$DOCKER_FILE_PATH" ] && echo "DOCKER_FILE_PATH env var is required." && exit 1;
 9 | 
10 | # By default use amd64 architecture.
11 | DEF_ARCH=amd64
12 | ARCH=${ARCH:-$DEF_ARCH}
13 | 
14 | IMAGE_TAG_ARCH="${IMAGE}:${VERSION}-${ARCH}"
15 | 
16 | # Build image.
17 | echo "Building image ${IMAGE_TAG_ARCH}..."
18 | docker build \
19 |     --build-arg VERSION="${VERSION}" \
20 |     --build-arg ARCH="${ARCH}" \
21 |     -t "${IMAGE_TAG_ARCH}" \
22 |     -f "${DOCKER_FILE_PATH}" .


--------------------------------------------------------------------------------
/scripts/build/docker/build-publish-image-all.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o errexit
 4 | set -o nounset
 5 | 
 6 | [ -z "$VERSION" ] && echo "VERSION env var is required." && exit 1;
 7 | [ -z "$IMAGE" ] && echo "IMAGE env var is required." && exit 1;
 8 | 
 9 | # Build and publish images for all architectures.
10 | archs=("amd64" "arm64" "arm" "ppc64le" "s390x")
11 | for arch in "${archs[@]}"; do
12 |   ARCH="${arch}" ./scripts/build/docker/build-image.sh
13 |   ARCH="${arch}" ./scripts/build/docker/publish-image.sh
14 | done
15 | 
16 | IMAGE_TAG="${IMAGE}:${VERSION}"
17 | 
18 | # Create manifest to join all arch images under one virtual tag.
19 | MANIFEST="docker manifest create -a ${IMAGE_TAG}"
20 | for arch in "${archs[@]}"; do
21 |   MANIFEST="${MANIFEST} ${IMAGE_TAG}-${arch}"
22 | done
23 | eval "${MANIFEST}"
24 | 
25 | # Annotate each arch manifest to set which image is build for which CPU architecture.
26 | for arch in "${archs[@]}"; do
27 |   docker manifest annotate --arch "${arch}" "${IMAGE_TAG}" "${IMAGE_TAG}-${arch}"
28 | done
29 | 
30 | # Push virual tag metadata.
31 | docker manifest push "${IMAGE_TAG}"
32 | 
33 | # Same as the regular virtual tag but for `:latest`.
34 | if [ ! -z "${TAG_IMAGE_LATEST:-}" ]; then
35 |     IMAGE_TAG_LATEST="${IMAGE}:latest"
36 | 
37 |     # Clean latest manifest in case there is one.
38 |     docker manifest rm ${IMAGE_TAG_LATEST} || true
39 | 
40 |     # Create manifest to join all arch images under one virtual tag.
41 |     MANIFEST_LATEST="docker manifest create -a ${IMAGE_TAG_LATEST}"
42 |     for arch in "${archs[@]}"; do
43 |       MANIFEST_LATEST="${MANIFEST_LATEST} ${IMAGE_TAG}-${arch}"
44 |     done
45 |     eval "${MANIFEST_LATEST}"
46 | 
47 |     # Annotate each arch manifest to set which image is build for which CPU architecture.
48 |     for arch in "${archs[@]}"; do
49 |       docker manifest annotate --arch "${arch}" "${IMAGE_TAG_LATEST}" "${IMAGE_TAG}-${arch}"
50 |     done
51 | 
52 |     # Push virual tag metadata.
53 |     docker manifest push "${IMAGE_TAG_LATEST}"
54 | fi


--------------------------------------------------------------------------------
/scripts/build/docker/publish-image.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | set -e
 4 | 
 5 | 
 6 | [ -z "$VERSION" ] && echo "VERSION env var is required." && exit 1;
 7 | [ -z "$IMAGE" ] && echo "IMAGE env var is required." && exit 1;
 8 | 
 9 | DEF_ARCH=amd64
10 | ARCH=${ARCH:-$DEF_ARCH}
11 | 
12 | IMAGE_TAG_ARCH="${IMAGE}:${VERSION}-${ARCH}"
13 | 
14 | echo "Pushing image ${IMAGE_TAG_ARCH}..."
15 | docker push ${IMAGE_TAG_ARCH}
16 | 


--------------------------------------------------------------------------------
/scripts/check/check.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | 
3 | set -o errexit
4 | set -o nounset
5 | 
6 | golangci-lint run


--------------------------------------------------------------------------------
/scripts/check/helm-test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | 
3 | set -o errexit
4 | set -o nounset
5 | 
6 | cd ./deploy/kubernetes/helm/sloth/tests
7 | go test -race -coverprofile=.test_coverage.txt $(go list ./... | grep -v /test/integration )
8 | go tool cover -func=.test_coverage.txt | tail -n1 | awk '{print "Total test coverage: " $3}'


--------------------------------------------------------------------------------
/scripts/check/integration-test-cli.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | 
3 | set -o errexit
4 | set -o nounset
5 | 
6 | go test -race -tags='integration' -v ./test/integration/prometheus/...


--------------------------------------------------------------------------------
/scripts/check/integration-test-k8s.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | 
3 | set -o errexit
4 | set -o nounset
5 | 
6 | go test -race -tags='integration' -v ./test/integration/k8scontroller/...


--------------------------------------------------------------------------------
/scripts/check/integration-test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | 
3 | set -o errexit
4 | set -o nounset
5 | 
6 | go test -race -tags='integration' -v ./test/integration/...


--------------------------------------------------------------------------------
/scripts/check/unit-test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | 
3 | set -o errexit
4 | set -o nounset
5 | 
6 | go test -buildvcs=false -race -coverprofile=.test_coverage.txt $(go list ./... | grep -v /test/integration )
7 | go tool cover -func=.test_coverage.txt | tail -n1 | awk '{print "Total test coverage: " $3}'
8 | 


--------------------------------------------------------------------------------
/scripts/deploygen.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # vim: ai:ts=8:sw=8:noet
 3 | set -efCo pipefail
 4 | export SHELLOPTS
 5 | IFS=$'\t\n'
 6 | 
 7 | command -v helm >/dev/null 2>&1 || { echo 'please install helm'; exit 1; }
 8 | 
 9 | HELM_CHART_PATH="${HELM_CHART_PATH:-./deploy/kubernetes/helm/sloth}"
10 | [ -z "$HELM_CHART_PATH" ] && echo "HELM_CHART_PATH env is needed" && exit 1;
11 | 
12 | GEN_PATH="${GEN_PATH:-./deploy/kubernetes/raw}"
13 | [ -z "$GEN_PATH" ] && echo "GEN_PATH env is needed" && exit 1;
14 | 
15 | mkdir -p "${GEN_PATH}"
16 | 
17 | echo "[*] Rendering chart without plugins..."
18 | rm "${GEN_PATH}/sloth.yaml"
19 | helm template sloth "${HELM_CHART_PATH}" \
20 |     --namespace "monitoring" \
21 |     --set "commonPlugins.enabled=false" > "${GEN_PATH}/sloth.yaml"
22 | 
23 | echo "[*] Rendering chart with plugins..."
24 | rm "${GEN_PATH}/sloth-with-common-plugins.yaml"
25 | helm template sloth "${HELM_CHART_PATH}" \
26 |     --namespace "monitoring" > "${GEN_PATH}/sloth-with-common-plugins.yaml"


--------------------------------------------------------------------------------
/scripts/deps.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | 
3 | set -o errexit
4 | set -o nounset
5 | 
6 | go mod tidy


--------------------------------------------------------------------------------
/scripts/examplesgen.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # vim: ai:ts=8:sw=8:noet
 3 | set -efCo pipefail
 4 | export SHELLOPTS
 5 | IFS=$'\t\n'
 6 | 
 7 | command -v go >/dev/null 2>&1 || {
 8 |     echo 'please install go'
 9 |     exit 1
10 | }
11 | 
12 | SLOS_PATH="${SLOS_PATH:-./examples}"
13 | [ -z "$SLOS_PATH" ] && echo "SLOS_PATH env is needed" && exit 1
14 | 
15 | GEN_PATH="${GEN_PATH:-./examples/_gen}"
16 | [ -z "$GEN_PATH" ] && echo "GEN_PATH env is needed" && exit 1
17 | 
18 | mkdir -p "${GEN_PATH}"
19 | 
20 | # We already know that we are building sloth for each SLO, good enough, this way we can check
21 | # the current development version.
22 | go run ./cmd/sloth/ generate -i "${SLOS_PATH}" -o "${GEN_PATH}" -p "${SLOS_PATH}" --extra-labels "cmd=examplesgen.sh" -e "_gen|windows"
23 | 


--------------------------------------------------------------------------------
/scripts/gogen.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | 
3 | set -o errexit
4 | set -o nounset
5 | 
6 | go generate ./...


--------------------------------------------------------------------------------
/scripts/kubegen.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | set -o errexit
 4 | set -o nounset
 5 | 
 6 | IMAGE_CLI_GEN=quay.io/slok/kube-code-generator:v1.25.0
 7 | IMAGE_CRD_GEN=quay.io/slok/kube-code-generator:v1.25.0
 8 | ROOT_DIRECTORY=$(dirname "$(readlink -f "$0")")/../
 9 | PROJECT_PACKAGE="github.com/slok/sloth"
10 | GEN_DIRECTORY="pkg/kubernetes/gen"
11 | 
12 | echo "Cleaning gen directory"
13 | rm -rf ./${GEN_DIRECTORY}
14 | 
15 | echo "Generating Kubernetes CRD clients..."
16 | docker run -it --rm \
17 | 	-v ${ROOT_DIRECTORY}:/go/src/${PROJECT_PACKAGE} \
18 | 	-e PROJECT_PACKAGE=${PROJECT_PACKAGE} \
19 | 	-e CLIENT_GENERATOR_OUT=${PROJECT_PACKAGE}/pkg/kubernetes/gen \
20 | 	-e APIS_ROOT=${PROJECT_PACKAGE}/pkg/kubernetes/api \
21 | 	-e GROUPS_VERSION="sloth:v1" \
22 | 	-e GENERATION_TARGETS="deepcopy,client" \
23 | 	${IMAGE_CLI_GEN}
24 | 
25 | echo "Generating Kubernetes CRD manifests..."
26 | docker run -it --rm \
27 | 	-v ${ROOT_DIRECTORY}:/src \
28 | 	-e GO_PROJECT_ROOT=/src \
29 | 	-e CRD_FLAG="crd:crdVersions=v1,allowDangerousTypes=true" \
30 | 	-e CRD_TYPES_PATH=/src/pkg/kubernetes/api \
31 | 	-e CRD_OUT_PATH=/src/pkg/kubernetes/gen/crd \
32 | 	${IMAGE_CRD_GEN} update-crd.sh
33 | 
34 | echo "Copying crd to helm chart..."
35 | rm ./deploy/kubernetes/helm/sloth/crds/*
36 | cp "${GEN_DIRECTORY}/crd"/* deploy/kubernetes/helm/sloth/crds/
37 | 


--------------------------------------------------------------------------------
/test/integration/crd/prometheus-operator-crd.yaml:
--------------------------------------------------------------------------------
 1 | # https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
 2 | 
 3 | ---
 4 | apiVersion: apiextensions.k8s.io/v1
 5 | kind: CustomResourceDefinition
 6 | metadata:
 7 |   annotations:
 8 |     controller-gen.kubebuilder.io/version: v0.4.1
 9 |   creationTimestamp: null
10 |   name: prometheusrules.monitoring.coreos.com
11 | spec:
12 |   group: monitoring.coreos.com
13 |   names:
14 |     kind: PrometheusRule
15 |     listKind: PrometheusRuleList
16 |     plural: prometheusrules
17 |     singular: prometheusrule
18 |   scope: Namespaced
19 |   versions:
20 |   - name: v1
21 |     schema:
22 |       openAPIV3Schema:
23 |         description: PrometheusRule defines recording and alerting rules for a Prometheus instance
24 |         properties:
25 |           apiVersion:
26 |             description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
27 |             type: string
28 |           kind:
29 |             description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
30 |             type: string
31 |           metadata:
32 |             type: object
33 |           spec:
34 |             description: Specification of desired alerting rule definitions for Prometheus.
35 |             properties:
36 |               groups:
37 |                 description: Content of Prometheus rule file
38 |                 items:
39 |                   description: 'RuleGroup is a list of sequentially evaluated recording and alerting rules. Note: PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances.  Valid values for this field are ''warn'' or ''abort''.  More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
40 |                   properties:
41 |                     interval:
42 |                       type: string
43 |                     name:
44 |                       type: string
45 |                     partial_response_strategy:
46 |                       type: string
47 |                     rules:
48 |                       items:
49 |                         description: Rule describes an alerting or recording rule.
50 |                         properties:
51 |                           alert:
52 |                             type: string
53 |                           annotations:
54 |                             additionalProperties:
55 |                               type: string
56 |                             type: object
57 |                           expr:
58 |                             anyOf:
59 |                             - type: integer
60 |                             - type: string
61 |                             x-kubernetes-int-or-string: true
62 |                           for:
63 |                             type: string
64 |                           labels:
65 |                             additionalProperties:
66 |                               type: string
67 |                             type: object
68 |                           record:
69 |                             type: string
70 |                         required:
71 |                         - expr
72 |                         type: object
73 |                       type: array
74 |                   required:
75 |                   - name
76 |                   - rules
77 |                   type: object
78 |                 type: array
79 |             type: object
80 |         required:
81 |         - spec
82 |         type: object
83 |     served: true
84 |     storage: true
85 | status:
86 |   acceptedNames:
87 |     kind: ""
88 |     plural: ""
89 |   conditions: []
90 |   storedVersions: []
91 | 


--------------------------------------------------------------------------------
/test/integration/k8scontroller/plugin/plugin.go:
--------------------------------------------------------------------------------
 1 | package availability
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"context"
 6 | 	"fmt"
 7 | 	"regexp"
 8 | 	"strings"
 9 | 	"text/template"
10 | )
11 | 
12 | const (
13 | 	SLIPluginVersion = "prometheus/v1"
14 | 	SLIPluginID      = "integration_test"
15 | )
16 | 
17 | var tpl = template.Must(template.New("").Parse(`
18 | sum(rate(integration_test{ {{.filter}}job="{{.job}}",code=~"(5..|429)" }[{{"{{.window}}"}}]))
19 | /
20 | sum(rate(integration_test{ {{.filter}}job="{{.job}}" }[{{"{{.window}}"}}]))`))
21 | 
22 | var filterRegex = regexp.MustCompile(`([^=]+="[^=,"]+",)+`)
23 | 
24 | func SLIPlugin(ctx context.Context, meta, labels, options map[string]string) (string, error) {
25 | 	// Get job.
26 | 	job, ok := options["job"]
27 | 	if !ok {
28 | 		return "", fmt.Errorf("job options is required")
29 | 	}
30 | 
31 | 	// Validate labels.
32 | 	err := validateLabels(labels, "owner", "tier")
33 | 	if err != nil {
34 | 		return "", fmt.Errorf("invalid labels: %w", err)
35 | 	}
36 | 
37 | 	// Sanitize filter.
38 | 	filter := options["filter"]
39 | 	if filter != "" {
40 | 		filter = strings.Trim(filter, "{}")
41 | 		filter = strings.Trim(filter, ",")
42 | 		filter = filter + ","
43 | 		match := filterRegex.MatchString(filter)
44 | 		if !match {
45 | 			return "", fmt.Errorf("invalid prometheus filter: %s", filter)
46 | 		}
47 | 	}
48 | 
49 | 	// Create query.
50 | 	var b bytes.Buffer
51 | 	data := map[string]string{
52 | 		"job":    job,
53 | 		"filter": filter,
54 | 	}
55 | 	err = tpl.Execute(&b, data)
56 | 	if err != nil {
57 | 		return "", fmt.Errorf("could not execute template: %w", err)
58 | 	}
59 | 
60 | 	return b.String(), nil
61 | }
62 | 
63 | func validateLabels(labels map[string]string, requiredKeys ...string) error {
64 | 	for _, k := range requiredKeys {
65 | 		v, ok := labels[k]
66 | 		if !ok || (ok && v == "") {
67 | 			return fmt.Errorf("%q label is required", k)
68 | 		}
69 | 	}
70 | 
71 | 	return nil
72 | }
73 | 


--------------------------------------------------------------------------------
/test/integration/k8scontroller/windows/7d.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: sloth.slok.dev/v1
 2 | kind: AlertWindows
 3 | spec:
 4 |   sloPeriod: 7d
 5 |   page:
 6 |     quick:
 7 |       errorBudgetPercent: 8
 8 |       shortWindow: 5m
 9 |       longWindow: 1h
10 |     slow:
11 |       errorBudgetPercent: 12.5
12 |       shortWindow: 30m
13 |       longWindow: 6h
14 |   ticket:
15 |     quick:
16 |       errorBudgetPercent: 20
17 |       shortWindow: 2h
18 |       longWindow: 24h
19 |     slow:
20 |       errorBudgetPercent: 42
21 |       shortWindow: 6h
22 |       longWindow: 72h
23 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/generate_test.go:
--------------------------------------------------------------------------------
  1 | package prometheus_test
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"os"
  7 | 	"testing"
  8 | 	"text/template"
  9 | 
 10 | 	"github.com/stretchr/testify/assert"
 11 | 	"github.com/stretchr/testify/require"
 12 | 
 13 | 	"github.com/slok/sloth/test/integration/prometheus"
 14 | 	"github.com/slok/sloth/test/integration/testutils"
 15 | )
 16 | 
 17 | type expecteOutLoader struct {
 18 | 	version string
 19 | }
 20 | 
 21 | func (e expecteOutLoader) mustLoadExp(path string) string {
 22 | 	fileData, err := os.ReadFile(path)
 23 | 	if err != nil {
 24 | 		panic(err)
 25 | 	}
 26 | 
 27 | 	tmpl := template.Must(template.New("").Parse(string(fileData)))
 28 | 
 29 | 	data := map[string]string{"version": e.version}
 30 | 	var b bytes.Buffer
 31 | 	err = tmpl.Execute(&b, data)
 32 | 	if err != nil {
 33 | 		panic(err)
 34 | 	}
 35 | 
 36 | 	return b.String()
 37 | }
 38 | 
 39 | func TestPrometheusGenerate(t *testing.T) {
 40 | 	// Tests config.
 41 | 	config := prometheus.NewConfig(t)
 42 | 	version, err := testutils.SlothVersion(context.TODO(), config.Binary)
 43 | 	require.NoError(t, err)
 44 | 
 45 | 	expectLoader := expecteOutLoader{version: version}
 46 | 
 47 | 	// Tests.
 48 | 	tests := map[string]struct {
 49 | 		genCmdArgs string
 50 | 		expOut     string
 51 | 		expErr     bool
 52 | 	}{
 53 | 		"Generate should generate the correct rules for all the SLOs.": {
 54 | 			genCmdArgs: "--input ./testdata/in-base.yaml",
 55 | 			expOut:     expectLoader.mustLoadExp("./testdata/out-base.yaml.tpl"),
 56 | 		},
 57 | 
 58 | 		"Generate should generate the correct rules for all the SLOs (Kubernetes).": {
 59 | 			genCmdArgs: "--input ./testdata/in-base-k8s.yaml",
 60 | 			expOut:     expectLoader.mustLoadExp("./testdata/out-base-k8s.yaml.tpl"),
 61 | 		},
 62 | 
 63 | 		"Generate without alerts should generate the correct recording rules for all the SLOs.": {
 64 | 			genCmdArgs: "--input ./testdata/in-base.yaml --disable-alerts",
 65 | 			expOut:     expectLoader.mustLoadExp("./testdata/out-base-no-alerts.yaml.tpl"),
 66 | 		},
 67 | 
 68 | 		"Generate without recordings should generate the correct alert rules for all the SLOs.": {
 69 | 			genCmdArgs: "--input ./testdata/in-base.yaml --disable-recordings",
 70 | 			expOut:     expectLoader.mustLoadExp("./testdata/out-base-no-recordings.yaml.tpl"),
 71 | 		},
 72 | 
 73 | 		"Generate with extra labels should generate the correct rules for all the SLOs.": {
 74 | 			genCmdArgs: "--input ./testdata/in-base.yaml -l exk1=exv1 -l exk2=exv2",
 75 | 			expOut:     expectLoader.mustLoadExp("./testdata/out-base-extra-labels.yaml.tpl"),
 76 | 		},
 77 | 
 78 | 		"Generate with plugins should generate the correct rules for all the SLOs.": {
 79 | 			genCmdArgs: "--input ./testdata/in-plugin.yaml",
 80 | 			expOut:     expectLoader.mustLoadExp("./testdata/out-plugin.yaml.tpl"),
 81 | 		},
 82 | 
 83 | 		"Generate using multifile YAML in single file should generate the correct rules for all the SLOs.": {
 84 | 			genCmdArgs: "--input ./testdata/in-multifile.yaml",
 85 | 			expOut:     expectLoader.mustLoadExp("./testdata/out-multifile.yaml.tpl"),
 86 | 		},
 87 | 
 88 | 		"Generate using multifile YAML in single file should generate the correct rules for all the SLOs (Kubernetes).": {
 89 | 			genCmdArgs: "--input ./testdata/in-multifile-k8s.yaml",
 90 | 			expOut:     expectLoader.mustLoadExp("./testdata/out-multifile-k8s.yaml.tpl"),
 91 | 		},
 92 | 
 93 | 		"Generate using OpenSLO YAML should generate Prometheus rules.": {
 94 | 			genCmdArgs: "--input ./testdata/in-openslo.yaml",
 95 | 			expOut:     expectLoader.mustLoadExp("./testdata/out-openslo.yaml.tpl"),
 96 | 		},
 97 | 
 98 | 		"Generate using 28 day time window should generate Prometheus rules.": {
 99 | 			genCmdArgs: "--default-slo-period 28d --input ./testdata/in-base.yaml",
100 | 			expOut:     expectLoader.mustLoadExp("./testdata/out-base-28d.yaml.tpl"),
101 | 		},
102 | 
103 | 		"Generate using custom 7 day time window should generate Prometheus rules.": {
104 | 			genCmdArgs: "--default-slo-period 7d --input ./testdata/in-base.yaml --slo-period-windows-path ./windows",
105 | 			expOut:     expectLoader.mustLoadExp("./testdata/out-base-custom-windows-7d.yaml.tpl"),
106 | 		},
107 | 
108 | 		"Generate using invalid version should fail.": {
109 | 			genCmdArgs: "--input ./testdata/in-invalid-version.yaml",
110 | 			expErr:     true,
111 | 		},
112 | 	}
113 | 
114 | 	for name, test := range tests {
115 | 		t.Run(name, func(t *testing.T) {
116 | 			assert := assert.New(t)
117 | 
118 | 			// Run with context to stop on test end.
119 | 			ctx, cancel := context.WithCancel(context.Background())
120 | 			defer cancel()
121 | 			out, _, err := prometheus.RunSlothGenerate(ctx, config, test.genCmdArgs)
122 | 
123 | 			if test.expErr {
124 | 				assert.Error(err)
125 | 			} else if assert.NoError(err) {
126 | 				assert.Equal(test.expOut, string(out))
127 | 			}
128 | 		})
129 | 	}
130 | }
131 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/helpers.go:
--------------------------------------------------------------------------------
 1 | package prometheus
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"os"
 7 | 	"os/exec"
 8 | 	"testing"
 9 | 
10 | 	"github.com/slok/sloth/test/integration/testutils"
11 | )
12 | 
13 | type Config struct {
14 | 	Binary string
15 | }
16 | 
17 | func (c *Config) defaults() error {
18 | 	if c.Binary == "" {
19 | 		c.Binary = "sloth"
20 | 	}
21 | 
22 | 	_, err := exec.LookPath(c.Binary)
23 | 	if err != nil {
24 | 		return fmt.Errorf("sloth binary missing in %q: %w", c.Binary, err)
25 | 	}
26 | 
27 | 	return nil
28 | }
29 | 
30 | // NewIntegrationConfig prepares the configuration for integration tests, if the configuration is not ready
31 | // it will skip the test.
32 | func NewConfig(t *testing.T) Config {
33 | 	const (
34 | 		envSlothBin = "SLOTH_INTEGRATION_BINARY"
35 | 	)
36 | 
37 | 	c := Config{
38 | 		Binary: os.Getenv(envSlothBin),
39 | 	}
40 | 
41 | 	err := c.defaults()
42 | 	if err != nil {
43 | 		t.Skipf("Skipping due to invalid config: %s", err)
44 | 	}
45 | 
46 | 	return c
47 | }
48 | 
49 | func RunSlothGenerate(ctx context.Context, config Config, cmdArgs string) (stdout, stderr []byte, err error) {
50 | 	env := []string{
51 | 		fmt.Sprintf("SLOTH_SLI_PLUGINS_PATH=%s", "./"),
52 | 	}
53 | 
54 | 	return testutils.RunSloth(ctx, env, config.Binary, fmt.Sprintf("generate %s", cmdArgs), true)
55 | }
56 | 
57 | func RunSlothValidate(ctx context.Context, config Config, cmdArgs string) (stdout, stderr []byte, err error) {
58 | 	env := []string{
59 | 		fmt.Sprintf("SLOTH_SLI_PLUGINS_PATH=%s", "./"),
60 | 	}
61 | 
62 | 	return testutils.RunSloth(ctx, env, config.Binary, fmt.Sprintf("validate %s", cmdArgs), true)
63 | }
64 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/plugin/plugin.go:
--------------------------------------------------------------------------------
 1 | package availability
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"context"
 6 | 	"fmt"
 7 | 	"regexp"
 8 | 	"strings"
 9 | 	"text/template"
10 | )
11 | 
12 | const (
13 | 	SLIPluginVersion = "prometheus/v1"
14 | 	SLIPluginID      = "integration_test"
15 | )
16 | 
17 | var tpl = template.Must(template.New("").Parse(`
18 | sum(rate(integration_test{ {{.filter}}job="{{.job}}",code=~"(5..|429)" }[{{"{{.window}}"}}]))
19 | /
20 | sum(rate(integration_test{ {{.filter}}job="{{.job}}" }[{{"{{.window}}"}}]))`))
21 | 
22 | var filterRegex = regexp.MustCompile(`([^=]+="[^=,"]+",)+`)
23 | 
24 | func SLIPlugin(ctx context.Context, meta, labels, options map[string]string) (string, error) {
25 | 	// Get job.
26 | 	job, ok := options["job"]
27 | 	if !ok {
28 | 		return "", fmt.Errorf("job options is required")
29 | 	}
30 | 
31 | 	// Validate labels.
32 | 	err := validateLabels(labels, "owner", "tier")
33 | 	if err != nil {
34 | 		return "", fmt.Errorf("invalid labels: %w", err)
35 | 	}
36 | 
37 | 	// Sanitize filter.
38 | 	filter := options["filter"]
39 | 	if filter != "" {
40 | 		filter = strings.Trim(filter, "{}")
41 | 		filter = strings.Trim(filter, ",")
42 | 		filter = filter + ","
43 | 		match := filterRegex.MatchString(filter)
44 | 		if !match {
45 | 			return "", fmt.Errorf("invalid prometheus filter: %s", filter)
46 | 		}
47 | 	}
48 | 
49 | 	// Create query.
50 | 	var b bytes.Buffer
51 | 	data := map[string]string{
52 | 		"job":    job,
53 | 		"filter": filter,
54 | 	}
55 | 	err = tpl.Execute(&b, data)
56 | 	if err != nil {
57 | 		return "", fmt.Errorf("could not execute template: %w", err)
58 | 	}
59 | 
60 | 	return b.String(), nil
61 | }
62 | 
63 | func validateLabels(labels map[string]string, requiredKeys ...string) error {
64 | 	for _, k := range requiredKeys {
65 | 		v, ok := labels[k]
66 | 		if !ok || (ok && v == "") {
67 | 			return fmt.Errorf("%q label is required", k)
68 | 		}
69 | 	}
70 | 
71 | 	return nil
72 | }
73 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/in-base-k8s.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: sloth.slok.dev/v1
 2 | kind: PrometheusServiceLevel
 3 | metadata:
 4 |   name: svc
 5 |   namespace: test-ns
 6 | spec:
 7 |   service: "svc01"
 8 |   labels:
 9 |     global01k1: global01v1
10 |   slos:
11 |     - name: "slo1"
12 |       objective: 99.9
13 |       description: "This is SLO 01."
14 |       labels:
15 |         global02k1: global02v1
16 |       sli:
17 |         events:
18 |           errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
19 |           totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
20 |       alerting:
21 |         name: myServiceAlert
22 |         labels:
23 |           alert01k1: "alert01v1"
24 |         annotations:
25 |           alert02k1: "alert02k2"
26 |         pageAlert:
27 |           labels:
28 |             alert03k1: "alert03v1"
29 |         ticketAlert:
30 |           labels:
31 |             alert04k1: "alert04v1"
32 |     - name: "slo02"
33 |       objective: 95
34 |       description: "This is SLO 02."
35 |       labels:
36 |         global03k1: global03v1
37 |       sli:
38 |         raw:
39 |           errorRatioQuery: |
40 |             sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
41 |             /
42 |             sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
43 |       alerting:
44 |         pageAlert:
45 |           disable: true
46 |         ticketAlert:
47 |           disable: true
48 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/in-base.yaml:
--------------------------------------------------------------------------------
 1 | version: "prometheus/v1"
 2 | service: "svc01"
 3 | labels:
 4 |   global01k1: global01v1
 5 | slos:
 6 |   - name: "slo1"
 7 |     objective: 99.9
 8 |     description: "This is SLO 01."
 9 |     labels:
10 |       global02k1: global02v1
11 |     sli:
12 |       events:
13 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
14 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
15 |     alerting:
16 |       name: myServiceAlert
17 |       labels:
18 |         alert01k1: "alert01v1"
19 |       annotations:
20 |         alert02k1: "alert02k2"
21 |       page_alert:
22 |         labels:
23 |           alert03k1: "alert03v1"
24 |       ticket_alert:
25 |         labels:
26 |           alert04k1: "alert04v1"
27 |   - name: "slo02"
28 |     objective: 95
29 |     description: "This is SLO 02."
30 |     labels:
31 |       global03k1: global03v1
32 |     sli:
33 |       raw:
34 |         error_ratio_query: |
35 |           sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
36 |           /
37 |           sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
38 |     alerting:
39 |       page_alert:
40 |         disable: true
41 |       ticket_alert:
42 |         disable: true
43 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/in-invalid-version.yaml:
--------------------------------------------------------------------------------
 1 | version: "prometheus/v999"
 2 | service: "svc01"
 3 | labels:
 4 |   global01k1: global01v1
 5 | slos:
 6 |   - name: "slo1"
 7 |     objective: 99.9
 8 |     description: "This is SLO 01."
 9 |     labels:
10 |       global02k1: global02v1
11 |     sli:
12 |       events:
13 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
14 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
15 |     alerting:
16 |       name: myServiceAlert
17 |       labels:
18 |         alert01k1: "alert01v1"
19 |       annotations:
20 |         alert02k1: "alert02k2"
21 |       page_alert:
22 |         labels:
23 |           alert03k1: "alert03v1"
24 |       ticket_alert:
25 |         labels:
26 |           alert04k1: "alert04v1"
27 |   - name: "slo02"
28 |     objective: 95
29 |     description: "This is SLO 02."
30 |     labels:
31 |       global03k1: global03v1
32 |     sli:
33 |       raw:
34 |         error_ratio_query: |
35 |           sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
36 |           /
37 |           sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
38 |     alerting:
39 |       page_alert:
40 |         disable: true
41 |       ticket_alert:
42 |         disable: true
43 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/in-multifile-k8s.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: sloth.slok.dev/v1
 3 | kind: PrometheusServiceLevel
 4 | metadata:
 5 |   name: svc
 6 |   namespace: test-ns
 7 | spec:
 8 |   service: "svc01"
 9 |   labels:
10 |     global01k1: global01v1
11 |   slos:
12 |     - name: "slo1"
13 |       objective: 99.9
14 |       description: "This is SLO 01."
15 |       labels:
16 |         global02k1: global02v1
17 |       sli:
18 |         events:
19 |           errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
20 |           totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
21 |       alerting:
22 |         name: myServiceAlert
23 |         labels:
24 |           alert01k1: "alert01v1"
25 |         annotations:
26 |           alert02k1: "alert02k2"
27 |         pageAlert:
28 |           labels:
29 |             alert03k1: "alert03v1"
30 |         ticketAlert:
31 |           labels:
32 |             alert04k1: "alert04v1"
33 |     - name: "slo02"
34 |       objective: 95
35 |       description: "This is SLO 02."
36 |       labels:
37 |         global03k1: global03v1
38 |       sli:
39 |         raw:
40 |           errorRatioQuery: |
41 |             sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
42 |             /
43 |             sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
44 |       alerting:
45 |         pageAlert:
46 |           disable: true
47 |         ticketAlert:
48 |           disable: true
49 | 
50 | ---
51 | apiVersion: sloth.slok.dev/v1
52 | kind: PrometheusServiceLevel
53 | metadata:
54 |   name: svc-2
55 |   namespace: test-ns-2
56 | spec:
57 |   service: "svc02"
58 |   labels:
59 |     global01k1: global01v1
60 |   slos:
61 |     - name: "slo1"
62 |       objective: 99.99
63 |       description: "This is SLO 01."
64 |       labels:
65 |         global02k1: global02v1
66 |       sli:
67 |         events:
68 |           errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
69 |           totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
70 |       alerting:
71 |         name: myServiceAlert
72 |         labels:
73 |           alert01k1: "alert01v1"
74 |         annotations:
75 |           alert02k1: "alert02k2"
76 |         pageAlert:
77 |           labels:
78 |             alert03k1: "alert03v1"
79 |         ticketAlert:
80 |           labels:
81 |             alert04k1: "alert04v1"
82 |     - name: "slo02"
83 |       objective: 95
84 |       description: "This is SLO 02."
85 |       labels:
86 |         global03k1: global03v1
87 |       sli:
88 |         raw:
89 |           errorRatioQuery: |
90 |             sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
91 |             /
92 |             sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
93 |       alerting:
94 |         pageAlert:
95 |           disable: true
96 |         ticketAlert:
97 |           disable: true
98 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/in-multifile.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: "prometheus/v1"
 3 | service: "svc01"
 4 | labels:
 5 |   global01k1: global01v1
 6 | slos:
 7 |   - name: "slo1"
 8 |     objective: 99.9
 9 |     description: "This is SLO 01."
10 |     labels:
11 |       global02k1: global02v1
12 |     sli:
13 |       events:
14 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
15 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
16 |     alerting:
17 |       name: myServiceAlert
18 |       labels:
19 |         alert01k1: "alert01v1"
20 |       annotations:
21 |         alert02k1: "alert02k2"
22 |       page_alert:
23 |         labels:
24 |           alert03k1: "alert03v1"
25 |       ticket_alert:
26 |         labels:
27 |           alert04k1: "alert04v1"
28 |   - name: "slo02"
29 |     objective: 95
30 |     description: "This is SLO 02."
31 |     labels:
32 |       global03k1: global03v1
33 |     sli:
34 |       raw:
35 |         error_ratio_query: |
36 |           sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
37 |           /
38 |           sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
39 |     alerting:
40 |       page_alert:
41 |         disable: true
42 |       ticket_alert:
43 |         disable: true
44 | 
45 | ---
46 | version: "prometheus/v1"
47 | service: "svc02"
48 | labels:
49 |   global01k1: global01v1
50 | slos:
51 |   - name: "slo1"
52 |     objective: 99.99
53 |     description: "This is SLO 01."
54 |     labels:
55 |       global02k1: global02v1
56 |     sli:
57 |       events:
58 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
59 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
60 |     alerting:
61 |       name: myServiceAlert
62 |       labels:
63 |         alert01k1: "alert01v1"
64 |       annotations:
65 |         alert02k1: "alert02k2"
66 |       page_alert:
67 |         labels:
68 |           alert03k1: "alert03v1"
69 |       ticket_alert:
70 |         labels:
71 |           alert04k1: "alert04v1"
72 |   - name: "slo02"
73 |     objective: 95
74 |     description: "This is SLO 02."
75 |     labels:
76 |       global03k1: global03v1
77 |     sli:
78 |       raw:
79 |         error_ratio_query: |
80 |           sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
81 |           /
82 |           sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
83 |     alerting:
84 |       page_alert:
85 |         disable: true
86 |       ticket_alert:
87 |         disable: true
88 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/in-openslo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: openslo/v1alpha
 2 | kind: SLO
 3 | metadata:
 4 |   name: slo1
 5 |   displayName: Integration test SLO1
 6 | spec:
 7 |   service: svc01
 8 |   description: "this is SLO1."
 9 |   budgetingMethod: Occurrences
10 |   objectives:
11 |     - ratioMetrics:
12 |         good:
13 |           source: prometheus
14 |           queryType: promql
15 |           query: sum(rate(http_request_duration_seconds_count{job="myservice",code!~"(5..|429)"}[{{.window}}]))
16 |         total:
17 |           source: prometheus
18 |           queryType: promql
19 |           query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
20 |       target: 0.999
21 |   timeWindows:
22 |     - count: 30
23 |       unit: Day
24 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/in-plugin.yaml:
--------------------------------------------------------------------------------
 1 | version: "prometheus/v1"
 2 | service: "svc01"
 3 | labels:
 4 |   owner: myteam
 5 |   tier: "2"
 6 | slos:
 7 |   - name: "slo1"
 8 |     objective: 99.9
 9 |     description: "This is SLO 01."
10 |     sli:
11 |       plugin:
12 |         id: integration_test
13 |         options:
14 |           job: svc01
15 |           filter: guybrush="threepwood",melee="island"
16 |     alerting:
17 |       page_alert:
18 |         disable: true
19 |       ticket_alert:
20 |         disable: true
21 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/out-base-no-recordings.yaml.tpl:
--------------------------------------------------------------------------------
 1 | 
 2 | ---
 3 | # Code generated by Sloth ({{ .version }}): https://github.com/slok/sloth.
 4 | # DO NOT EDIT.
 5 | 
 6 | groups:
 7 | - name: sloth-slo-alerts-svc01-slo1
 8 |   rules:
 9 |   - alert: myServiceAlert
10 |     expr: |
11 |       (
12 |           max(slo:sli_error:ratio_rate5m{sloth_id="svc01-slo1", sloth_service="svc01", sloth_slo="slo1"} > (14.4 * 0.0009999999999999432)) without (sloth_window)
13 |           and
14 |           max(slo:sli_error:ratio_rate1h{sloth_id="svc01-slo1", sloth_service="svc01", sloth_slo="slo1"} > (14.4 * 0.0009999999999999432)) without (sloth_window)
15 |       )
16 |       or
17 |       (
18 |           max(slo:sli_error:ratio_rate30m{sloth_id="svc01-slo1", sloth_service="svc01", sloth_slo="slo1"} > (6 * 0.0009999999999999432)) without (sloth_window)
19 |           and
20 |           max(slo:sli_error:ratio_rate6h{sloth_id="svc01-slo1", sloth_service="svc01", sloth_slo="slo1"} > (6 * 0.0009999999999999432)) without (sloth_window)
21 |       )
22 |     labels:
23 |       alert01k1: alert01v1
24 |       alert03k1: alert03v1
25 |       sloth_severity: page
26 |     annotations:
27 |       alert02k1: alert02k2
28 |       summary: '{{"{{$labels.sloth_service}}"}} {{"{{$labels.sloth_slo}}"}} SLO error budget burn
29 |         rate is over expected.'
30 |       title: (page) {{"{{$labels.sloth_service}}"}} {{"{{$labels.sloth_slo}}"}} SLO error budget
31 |         burn rate is too fast.
32 |   - alert: myServiceAlert
33 |     expr: |
34 |       (
35 |           max(slo:sli_error:ratio_rate2h{sloth_id="svc01-slo1", sloth_service="svc01", sloth_slo="slo1"} > (3 * 0.0009999999999999432)) without (sloth_window)
36 |           and
37 |           max(slo:sli_error:ratio_rate1d{sloth_id="svc01-slo1", sloth_service="svc01", sloth_slo="slo1"} > (3 * 0.0009999999999999432)) without (sloth_window)
38 |       )
39 |       or
40 |       (
41 |           max(slo:sli_error:ratio_rate6h{sloth_id="svc01-slo1", sloth_service="svc01", sloth_slo="slo1"} > (1 * 0.0009999999999999432)) without (sloth_window)
42 |           and
43 |           max(slo:sli_error:ratio_rate3d{sloth_id="svc01-slo1", sloth_service="svc01", sloth_slo="slo1"} > (1 * 0.0009999999999999432)) without (sloth_window)
44 |       )
45 |     labels:
46 |       alert01k1: alert01v1
47 |       alert04k1: alert04v1
48 |       sloth_severity: ticket
49 |     annotations:
50 |       alert02k1: alert02k2
51 |       summary: '{{"{{$labels.sloth_service}}"}} {{"{{$labels.sloth_slo}}"}} SLO error budget burn
52 |         rate is over expected.'
53 |       title: (ticket) {{"{{$labels.sloth_service}}"}} {{"{{$labels.sloth_slo}}"}} SLO error budget
54 |         burn rate is too fast.
55 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/validate/bad/bad-aa.yaml:
--------------------------------------------------------------------------------
 1 | version: "prometheus/v1"
 2 | service: "svc01"
 3 | labels:
 4 |   global01k1: global01v1
 5 | slos:
 6 |   - name: "slo1"
 7 |     objective: 99.9
 8 |     description: "This is SLO 01."
 9 |     labels:
10 |       global02k1: global02v1
11 |     sli:
12 |       events:
13 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
14 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
15 |     alerting:
16 |       name: myServiceAlert
17 |       labels:
18 |         alert01k1: "alert01v1"
19 |       annotations:
20 |         alert02k1: "alert02k2"
21 |       pageAlert:
22 |         labels:
23 |           alert03k1: "alert03v1"
24 |       ticketAlert:
25 |         labels:
26 |           alert04k1: "alert04v1"
27 |   - name: "slo02"
28 |     objective: 101 # BAD!
29 |     description: "This is SLO 02."
30 |     labels:
31 |       global03k1: global03v1
32 |     sli:
33 |       raw:
34 |         error_ratio_query: |
35 |           sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
36 |           /
37 |           sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
38 |     alerting:
39 |       page_alert:
40 |         disable: true
41 |       ticket_alert:
42 |         disable: true
43 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/validate/bad/bad-ab.yaml:
--------------------------------------------------------------------------------
 1 | version: "prometheus/v1"
 2 | service: "svc01"
 3 | labels:
 4 |   global01k1: global01v1
 5 | slos:
 6 |   - name: "slo1"
 7 |     objective: 99.9
 8 |     description: "This is SLO 01."
 9 |     labels:
10 |       global02k1: global02v1
11 |     sli:
12 |       events:
13 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
14 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
15 |     alerting:
16 |       name: myServiceAlert
17 |       labels:
18 |         alert01k1: "alert01v1"
19 |       annotations:
20 |         alert02k1: "alert02k2"
21 |       pageAlert:
22 |         labels:
23 |           alert03k1: "alert03v1"
24 |       ticketAlert:
25 |         labels:
26 |           alert04k1: "alert04v1"
27 |   - name: "slo02"
28 |     objective: 101 # BAD!
29 |     description: "This is SLO 02."
30 |     labels:
31 |       global03k1: global03v1
32 |     sli:
33 |       raw:
34 |         error_ratio_query: |
35 |           sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
36 |           /
37 |           sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
38 |     alerting:
39 |       page_alert:
40 |         disable: true
41 |       ticket_alert:
42 |         disable: true
43 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/validate/bad/bad-ba.yaml:
--------------------------------------------------------------------------------
 1 | version: "prometheus/v1"
 2 | service: "svc01"
 3 | labels:
 4 |   global01k1: global01v1
 5 | slos:
 6 |   - name: "slo1"
 7 |     objective: 99.9
 8 |     description: "This is SLO 01."
 9 |     labels:
10 |       global02k1: global02v1
11 |     sli:
12 |       events:
13 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
14 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
15 |     alerting:
16 |       name: myServiceAlert
17 |       labels:
18 |         alert01k1: "alert01v1"
19 |       annotations:
20 |         alert02k1: "alert02k2"
21 |       pageAlert:
22 |         labels:
23 |           alert03k1: "alert03v1"
24 |       ticketAlert:
25 |         labels:
26 |           alert04k1: "alert04v1"
27 |   - name: "slo02"
28 |     objective: 101 # BAD!
29 |     description: "This is SLO 02."
30 |     labels:
31 |       global03k1: global03v1
32 |     sli:
33 |       raw:
34 |         error_ratio_query: |
35 |           sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
36 |           /
37 |           sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
38 |     alerting:
39 |       page_alert:
40 |         disable: true
41 |       ticket_alert:
42 |         disable: true
43 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/validate/bad/bad-k8s.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: sloth.slok.dev/v1
 2 | kind: PrometheusServiceLevel
 3 | metadata:
 4 |   name: svc
 5 |   namespace: test-ns
 6 | spec:
 7 |   service: "" # BAD!
 8 |   labels:
 9 |     global01k1: global01v1
10 |   slos:
11 |     - name: "slo1"
12 |       objective: 99.9
13 |       description: "This is SLO 01."
14 |       labels:
15 |         global02k1: global02v1
16 |       sli:
17 |         events:
18 |           errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
19 |           totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
20 |       alerting:
21 |         name: myServiceAlert
22 |         labels:
23 |           alert01k1: "alert01v1"
24 |         annotations:
25 |           alert02k1: "alert02k2"
26 |         pageAlert:
27 |           labels:
28 |             alert03k1: "alert03v1"
29 |         ticketAlert:
30 |           labels:
31 |             alert04k1: "alert04v1"
32 |     - name: "slo02"
33 |       objective: 95
34 |       description: "This is SLO 02."
35 |       labels:
36 |         global03k1: global03v1
37 |       sli:
38 |         raw:
39 |           errorRatioQuery: |
40 |             sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
41 |             /
42 |             sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
43 |       alerting:
44 |         pageAlert:
45 |           disable: true
46 |         ticketAlert:
47 |           disable: true
48 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/validate/bad/bad-multi-k8s.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: sloth.slok.dev/v1
 3 | kind: PrometheusServiceLevel
 4 | metadata:
 5 |   name: svc
 6 |   namespace: test-ns
 7 | spec:
 8 |   service: "svc01"
 9 |   labels:
10 |     global01k1: global01v1
11 |   slos:
12 |     - name: "slo1"
13 |       objective: 99.9
14 |       description: "This is SLO 01."
15 |       labels:
16 |         global02k1: global02v1
17 |       sli:
18 |         events:
19 |           errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
20 |           totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
21 |       alerting:
22 |         name: "" # BAD!
23 |         labels:
24 |           alert01k1: "alert01v1"
25 |         annotations:
26 |           alert02k1: "alert02k2"
27 |         pageAlert:
28 |           labels:
29 |             alert03k1: "alert03v1"
30 |         ticketAlert:
31 |           labels:
32 |             alert04k1: "alert04v1"
33 |     - name: "slo02"
34 |       objective: 95
35 |       description: "This is SLO 02."
36 |       labels:
37 |         global03k1: global03v1
38 |       sli:
39 |         raw:
40 |           errorRatioQuery: |
41 |             sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
42 |             /
43 |             sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
44 |       alerting:
45 |         pageAlert:
46 |           disable: true
47 |         ticketAlert:
48 |           disable: true
49 | 
50 | ---
51 | apiVersion: sloth.slok.dev/v1
52 | kind: PrometheusServiceLevel
53 | metadata:
54 |   name: svc-2
55 |   namespace: test-ns-2
56 | spec:
57 |   service: "svc02"
58 |   labels:
59 |     global01k1: global01v1
60 |   slos:
61 |     - name: "slo1"
62 |       objective: 99.99
63 |       description: "This is SLO 01."
64 |       labels:
65 |         global02k1: global02v1
66 |       sli:
67 |         events:
68 |           errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
69 |           totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
70 |       alerting:
71 |         name: myServiceAlert
72 |         labels:
73 |           alert01k1: "alert01v1"
74 |         annotations:
75 |           alert02k1: "alert02k2"
76 |         pageAlert:
77 |           labels:
78 |             alert03k1: "alert03v1"
79 |         ticketAlert:
80 |           labels:
81 |             alert04k1: "alert04v1"
82 |     - name: "slo02"
83 |       objective: 95
84 |       description: "This is SLO 02."
85 |       labels:
86 |         global03k1: global03v1
87 |       sli:
88 |         raw:
89 |           errorRatioQuery: |
90 |             sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
91 |             /
92 |             sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
93 |       alerting:
94 |         pageAlert:
95 |           disable: true
96 |         ticketAlert:
97 |           disable: true
98 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/validate/bad/bad-multi.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: "prometheus/v1"
 3 | service: "svc01"
 4 | labels:
 5 |   global01k1: global01v1
 6 | slos:
 7 |   - name: "slo1"
 8 |     objective: 99.9
 9 |     description: "This is SLO 01."
10 |     labels:
11 |       global02k1: global02v1
12 |     sli:
13 |       events:
14 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
15 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
16 |     alerting:
17 |       name: myServiceAlert
18 |       labels:
19 |         alert01k1: "alert01v1"
20 |       annotations:
21 |         alert02k1: "alert02k2"
22 |       page_alert:
23 |         labels:
24 |           alert03k1: "alert03v1"
25 |       ticket_alert:
26 |         labels:
27 |           alert04k1: "alert04v1"
28 |   - name: "slo02"
29 |     objective: 95
30 |     description: "This is SLO 02."
31 |     labels:
32 |       global03k1: global03v1
33 |     sli:
34 |       raw:
35 |         error_ratio_query: |
36 |           sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
37 |           /
38 |           sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
39 |     alerting:
40 |       page_alert:
41 |         disable: true
42 |       ticket_alert:
43 |         disable: true
44 | 
45 | ---
46 | version: "prometheus/v1"
47 | service: "svc02"
48 | labels:
49 |   global01k1: global01v1
50 | slos:
51 |   - name: "slo1"
52 |     objective: 99.99
53 |     description: "This is SLO 01."
54 |     labels:
55 |       global02k1: global02v1
56 |     sli:
57 |       events:
58 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
59 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
60 |     alerting:
61 |       name: myServiceAlert
62 |       labels:
63 |         alert01k1: "alert01v1"
64 |       annotations:
65 |         alert02k1: "alert02k2"
66 |       page_alert:
67 |         labels:
68 |           alert03k1: "alert03v1"
69 |       ticket_alert:
70 |         labels:
71 |           alert04k1: "alert04v1"
72 |   - name: "slo02"
73 |     objective: 95
74 |     description: "This is SLO 02."
75 |     labels:
76 |       global03k1: global03v1
77 |     sli: {} # BAD!
78 |     alerting:
79 |       page_alert:
80 |         disable: true
81 |       ticket_alert:
82 |         disable: true
83 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/validate/bad/bad-openslo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: openslo/v1alpha
 2 | kind: SLO
 3 | metadata:
 4 |   name: slo1
 5 |   displayName: Integration test SLO1
 6 | spec:
 7 |   service: svc01
 8 |   description: "this is SLO1."
 9 |   budgetingMethod: Occurrences
10 |   objectives:
11 |     - ratioMetrics:
12 |         good:
13 |           source: prometheus
14 |           queryType: promql
15 |           query: sum(rate(http_request_duration_seconds_count{job="myservice",code!~"(5..|429)"}[{{.window}}]))
16 |         total:
17 |           source: prometheus
18 |           queryType: promql
19 |           query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
20 |       target: 0.999
21 |   timeWindows:
22 |     - count: 28 # BAD!
23 |       unit: Day
24 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/validate/good/good-aa.yaml:
--------------------------------------------------------------------------------
 1 | version: "prometheus/v1"
 2 | service: "svc01"
 3 | labels:
 4 |   global01k1: global01v1
 5 | slos:
 6 |   - name: "slo1"
 7 |     objective: 99.9
 8 |     description: "This is SLO 01."
 9 |     labels:
10 |       global02k1: global02v1
11 |     sli:
12 |       events:
13 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
14 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
15 |     alerting:
16 |       name: myServiceAlert
17 |       labels:
18 |         alert01k1: "alert01v1"
19 |       annotations:
20 |         alert02k1: "alert02k2"
21 |       pageAlert:
22 |         labels:
23 |           alert03k1: "alert03v1"
24 |       ticketAlert:
25 |         labels:
26 |           alert04k1: "alert04v1"
27 |   - name: "slo02"
28 |     objective: 95
29 |     description: "This is SLO 02."
30 |     labels:
31 |       global03k1: global03v1
32 |     sli:
33 |       raw:
34 |         error_ratio_query: |
35 |           sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
36 |           /
37 |           sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
38 |     alerting:
39 |       page_alert:
40 |         disable: true
41 |       ticket_alert:
42 |         disable: true
43 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/validate/good/good-ab.yaml:
--------------------------------------------------------------------------------
 1 | version: "prometheus/v1"
 2 | service: "svc01"
 3 | labels:
 4 |   global01k1: global01v1
 5 | slos:
 6 |   - name: "slo1"
 7 |     objective: 99.9
 8 |     description: "This is SLO 01."
 9 |     labels:
10 |       global02k1: global02v1
11 |     sli:
12 |       events:
13 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
14 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
15 |     alerting:
16 |       name: myServiceAlert
17 |       labels:
18 |         alert01k1: "alert01v1"
19 |       annotations:
20 |         alert02k1: "alert02k2"
21 |       pageAlert:
22 |         labels:
23 |           alert03k1: "alert03v1"
24 |       ticketAlert:
25 |         labels:
26 |           alert04k1: "alert04v1"
27 |   - name: "slo02"
28 |     objective: 95
29 |     description: "This is SLO 02."
30 |     labels:
31 |       global03k1: global03v1
32 |     sli:
33 |       raw:
34 |         error_ratio_query: |
35 |           sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
36 |           /
37 |           sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
38 |     alerting:
39 |       page_alert:
40 |         disable: true
41 |       ticket_alert:
42 |         disable: true
43 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/validate/good/good-ba.yaml:
--------------------------------------------------------------------------------
 1 | version: "prometheus/v1"
 2 | service: "svc01"
 3 | labels:
 4 |   global01k1: global01v1
 5 | slos:
 6 |   - name: "slo1"
 7 |     objective: 99.9
 8 |     description: "This is SLO 01."
 9 |     labels:
10 |       global02k1: global02v1
11 |     sli:
12 |       events:
13 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
14 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
15 |     alerting:
16 |       name: myServiceAlert
17 |       labels:
18 |         alert01k1: "alert01v1"
19 |       annotations:
20 |         alert02k1: "alert02k2"
21 |       pageAlert:
22 |         labels:
23 |           alert03k1: "alert03v1"
24 |       ticketAlert:
25 |         labels:
26 |           alert04k1: "alert04v1"
27 |   - name: "slo02"
28 |     objective: 95
29 |     description: "This is SLO 02."
30 |     labels:
31 |       global03k1: global03v1
32 |     sli:
33 |       raw:
34 |         error_ratio_query: |
35 |           sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
36 |           /
37 |           sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
38 |     alerting:
39 |       page_alert:
40 |         disable: true
41 |       ticket_alert:
42 |         disable: true
43 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/validate/good/good-k8s.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: sloth.slok.dev/v1
 2 | kind: PrometheusServiceLevel
 3 | metadata:
 4 |   name: svc
 5 |   namespace: test-ns
 6 | spec:
 7 |   service: "svc01"
 8 |   labels:
 9 |     global01k1: global01v1
10 |   slos:
11 |     - name: "slo1"
12 |       objective: 99.9
13 |       description: "This is SLO 01."
14 |       labels:
15 |         global02k1: global02v1
16 |       sli:
17 |         events:
18 |           errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
19 |           totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
20 |       alerting:
21 |         name: myServiceAlert
22 |         labels:
23 |           alert01k1: "alert01v1"
24 |         annotations:
25 |           alert02k1: "alert02k2"
26 |         pageAlert:
27 |           labels:
28 |             alert03k1: "alert03v1"
29 |         ticketAlert:
30 |           labels:
31 |             alert04k1: "alert04v1"
32 |     - name: "slo02"
33 |       objective: 95
34 |       description: "This is SLO 02."
35 |       labels:
36 |         global03k1: global03v1
37 |       sli:
38 |         raw:
39 |           errorRatioQuery: |
40 |             sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
41 |             /
42 |             sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
43 |       alerting:
44 |         pageAlert:
45 |           disable: true
46 |         ticketAlert:
47 |           disable: true
48 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/validate/good/good-multi-k8s.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: sloth.slok.dev/v1
 3 | kind: PrometheusServiceLevel
 4 | metadata:
 5 |   name: svc
 6 |   namespace: test-ns
 7 | spec:
 8 |   service: "svc01"
 9 |   labels:
10 |     global01k1: global01v1
11 |   slos:
12 |     - name: "slo1"
13 |       objective: 99.9
14 |       description: "This is SLO 01."
15 |       labels:
16 |         global02k1: global02v1
17 |       sli:
18 |         events:
19 |           errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
20 |           totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
21 |       alerting:
22 |         name: myServiceAlert
23 |         labels:
24 |           alert01k1: "alert01v1"
25 |         annotations:
26 |           alert02k1: "alert02k2"
27 |         pageAlert:
28 |           labels:
29 |             alert03k1: "alert03v1"
30 |         ticketAlert:
31 |           labels:
32 |             alert04k1: "alert04v1"
33 |     - name: "slo02"
34 |       objective: 95
35 |       description: "This is SLO 02."
36 |       labels:
37 |         global03k1: global03v1
38 |       sli:
39 |         raw:
40 |           errorRatioQuery: |
41 |             sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
42 |             /
43 |             sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
44 |       alerting:
45 |         pageAlert:
46 |           disable: true
47 |         ticketAlert:
48 |           disable: true
49 | 
50 | ---
51 | apiVersion: sloth.slok.dev/v1
52 | kind: PrometheusServiceLevel
53 | metadata:
54 |   name: svc-2
55 |   namespace: test-ns-2
56 | spec:
57 |   service: "svc02"
58 |   labels:
59 |     global01k1: global01v1
60 |   slos:
61 |     - name: "slo1"
62 |       objective: 99.99
63 |       description: "This is SLO 01."
64 |       labels:
65 |         global02k1: global02v1
66 |       sli:
67 |         events:
68 |           errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
69 |           totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
70 |       alerting:
71 |         name: myServiceAlert
72 |         labels:
73 |           alert01k1: "alert01v1"
74 |         annotations:
75 |           alert02k1: "alert02k2"
76 |         pageAlert:
77 |           labels:
78 |             alert03k1: "alert03v1"
79 |         ticketAlert:
80 |           labels:
81 |             alert04k1: "alert04v1"
82 |     - name: "slo02"
83 |       objective: 95
84 |       description: "This is SLO 02."
85 |       labels:
86 |         global03k1: global03v1
87 |       sli:
88 |         raw:
89 |           errorRatioQuery: |
90 |             sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
91 |             /
92 |             sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
93 |       alerting:
94 |         pageAlert:
95 |           disable: true
96 |         ticketAlert:
97 |           disable: true
98 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/validate/good/good-multi.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: "prometheus/v1"
 3 | service: "svc01"
 4 | labels:
 5 |   global01k1: global01v1
 6 | slos:
 7 |   - name: "slo1"
 8 |     objective: 99.9
 9 |     description: "This is SLO 01."
10 |     labels:
11 |       global02k1: global02v1
12 |     sli:
13 |       events:
14 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
15 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
16 |     alerting:
17 |       name: myServiceAlert
18 |       labels:
19 |         alert01k1: "alert01v1"
20 |       annotations:
21 |         alert02k1: "alert02k2"
22 |       page_alert:
23 |         labels:
24 |           alert03k1: "alert03v1"
25 |       ticket_alert:
26 |         labels:
27 |           alert04k1: "alert04v1"
28 |   - name: "slo02"
29 |     objective: 95
30 |     description: "This is SLO 02."
31 |     labels:
32 |       global03k1: global03v1
33 |     sli:
34 |       raw:
35 |         error_ratio_query: |
36 |           sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
37 |           /
38 |           sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
39 |     alerting:
40 |       page_alert:
41 |         disable: true
42 |       ticket_alert:
43 |         disable: true
44 | 
45 | ---
46 | version: "prometheus/v1"
47 | service: "svc02"
48 | labels:
49 |   global01k1: global01v1
50 | slos:
51 |   - name: "slo1"
52 |     objective: 99.99
53 |     description: "This is SLO 01."
54 |     labels:
55 |       global02k1: global02v1
56 |     sli:
57 |       events:
58 |         error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
59 |         total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
60 |     alerting:
61 |       name: myServiceAlert
62 |       labels:
63 |         alert01k1: "alert01v1"
64 |       annotations:
65 |         alert02k1: "alert02k2"
66 |       page_alert:
67 |         labels:
68 |           alert03k1: "alert03v1"
69 |       ticket_alert:
70 |         labels:
71 |           alert04k1: "alert04v1"
72 |   - name: "slo02"
73 |     objective: 95
74 |     description: "This is SLO 02."
75 |     labels:
76 |       global03k1: global03v1
77 |     sli:
78 |       raw:
79 |         error_ratio_query: |
80 |           sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
81 |           /
82 |           sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
83 |     alerting:
84 |       page_alert:
85 |         disable: true
86 |       ticket_alert:
87 |         disable: true
88 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/testdata/validate/good/good-openslo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: openslo/v1alpha
 2 | kind: SLO
 3 | metadata:
 4 |   name: slo1
 5 |   displayName: Integration test SLO1
 6 | spec:
 7 |   service: svc01
 8 |   description: "this is SLO1."
 9 |   budgetingMethod: Occurrences
10 |   objectives:
11 |     - ratioMetrics:
12 |         good:
13 |           source: prometheus
14 |           queryType: promql
15 |           query: sum(rate(http_request_duration_seconds_count{job="myservice",code!~"(5..|429)"}[{{.window}}]))
16 |         total:
17 |           source: prometheus
18 |           queryType: promql
19 |           query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
20 |       target: 0.999
21 |   timeWindows:
22 |     - count: 30
23 |       unit: Day
24 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/validate_test.go:
--------------------------------------------------------------------------------
 1 | package prometheus_test
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/stretchr/testify/assert"
 8 | 
 9 | 	"github.com/slok/sloth/test/integration/prometheus"
10 | )
11 | 
12 | func TestPrometheusValidate(t *testing.T) {
13 | 	// Tests config.
14 | 	config := prometheus.NewConfig(t)
15 | 
16 | 	// Tests.
17 | 	tests := map[string]struct {
18 | 		valCmdArgs string
19 | 		expErr     bool
20 | 	}{
21 | 		"Discovery of good specs should validate correctly.": {
22 | 			valCmdArgs: "--input ./testdata/validate/good",
23 | 		},
24 | 
25 | 		"Discovery of bad specs should validate with failures.": {
26 | 			valCmdArgs: "--input ./testdata/validate/bad",
27 | 			expErr:     true,
28 | 		},
29 | 
30 | 		"Discovery of all specs should validate with failures.": {
31 | 			valCmdArgs: "--input ./testdata/validate",
32 | 			expErr:     true,
33 | 		},
34 | 
35 | 		"Discovery of all specs excluding bads should validate correctly.": {
36 | 			valCmdArgs: "--input ./testdata/validate --fs-exclude bad",
37 | 		},
38 | 
39 | 		"Discovery of all specs including only good should validate correctly.": {
40 | 			valCmdArgs: "--input ./testdata/validate --fs-include good",
41 | 		},
42 | 
43 | 		"Discovery of none specs should fail.": {
44 | 			valCmdArgs: "--input ./testdata/validate --fs-exclude .*",
45 | 			expErr:     true,
46 | 		},
47 | 
48 | 		"Discovery of all specs excluding bad and including a bad one should validate correctly because exclude has preference.": {
49 | 			valCmdArgs: "--input ./testdata/validate --fs-exclude bad --fs-include .*-aa.*",
50 | 		},
51 | 	}
52 | 
53 | 	for name, test := range tests {
54 | 		t.Run(name, func(t *testing.T) {
55 | 			assert := assert.New(t)
56 | 
57 | 			// Run with context to stop on test end.
58 | 			ctx, cancel := context.WithCancel(context.Background())
59 | 			defer cancel()
60 | 
61 | 			_, _, err := prometheus.RunSlothValidate(ctx, config, test.valCmdArgs)
62 | 
63 | 			if test.expErr {
64 | 				assert.Error(err)
65 | 			} else {
66 | 				assert.NoError(err)
67 | 			}
68 | 		})
69 | 	}
70 | }
71 | 


--------------------------------------------------------------------------------
/test/integration/prometheus/windows/7d.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: sloth.slok.dev/v1
 2 | kind: AlertWindows
 3 | spec:
 4 |   sloPeriod: 7d
 5 |   page:
 6 |     quick:
 7 |       errorBudgetPercent: 8
 8 |       shortWindow: 5m
 9 |       longWindow: 1h
10 |     slow:
11 |       errorBudgetPercent: 12.5
12 |       shortWindow: 30m
13 |       longWindow: 6h
14 |   ticket:
15 |     quick:
16 |       errorBudgetPercent: 20
17 |       shortWindow: 2h
18 |       longWindow: 24h
19 |     slow:
20 |       errorBudgetPercent: 42
21 |       shortWindow: 6h
22 |       longWindow: 72h
23 | 


--------------------------------------------------------------------------------
/test/integration/testutils/cmd.go:
--------------------------------------------------------------------------------
 1 | package testutils
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"context"
 6 | 	"fmt"
 7 | 	"os"
 8 | 	"os/exec"
 9 | 	"regexp"
10 | 	"strings"
11 | )
12 | 
13 | var multiSpaceRegex = regexp.MustCompile(" +")
14 | 
15 | // RunSloth executes sloth command.
16 | func RunSloth(ctx context.Context, env []string, cmdApp, cmdArgs string, nolog bool) (stdout, stderr []byte, err error) {
17 | 	// Sanitize command.
18 | 	cmdArgs = strings.TrimSpace(cmdArgs)
19 | 	cmdArgs = multiSpaceRegex.ReplaceAllString(cmdArgs, " ")
20 | 
21 | 	// Split into args.
22 | 	args := strings.Split(cmdArgs, " ")
23 | 
24 | 	// Create command.
25 | 	var outData, errData bytes.Buffer
26 | 	cmd := exec.CommandContext(ctx, cmdApp, args...)
27 | 	cmd.Stdout = &outData
28 | 	cmd.Stderr = &errData
29 | 
30 | 	// Set env.
31 | 	newEnv := append([]string{}, env...)
32 | 	newEnv = append(newEnv, os.Environ()...)
33 | 	if nolog {
34 | 		newEnv = append(newEnv,
35 | 			"SLOTH_NO_LOG=true",
36 | 			"SLOTH_NO_COLOR=true",
37 | 		)
38 | 	}
39 | 	cmd.Env = newEnv
40 | 
41 | 	// Run.
42 | 	err = cmd.Run()
43 | 
44 | 	return outData.Bytes(), errData.Bytes(), err
45 | }
46 | 
47 | func SlothVersion(ctx context.Context, slothBinary string) (string, error) {
48 | 	stdout, stderr, err := RunSloth(ctx, []string{}, slothBinary, "version", false)
49 | 	if err != nil {
50 | 		return "", fmt.Errorf("could not obtain versions: %s: %w", stderr, err)
51 | 	}
52 | 
53 | 	version := string(stdout)
54 | 	version = strings.TrimSpace(version)
55 | 
56 | 	return version, nil
57 | }
58 | 


--------------------------------------------------------------------------------