├── .circleci └── config.yml ├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── .gitignore ├── .golangci.yaml ├── .goreleaser.yml ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── cmd └── slo_exporter.go ├── docs ├── architecture.md ├── configuration.md ├── defining_new_slo.md ├── glossary.md ├── modules │ ├── dynamic_classifier.md │ ├── envoy_access_log_server.md │ ├── event_key_generator.md │ ├── event_metadata_renamer.md │ ├── kafka_ingester.md │ ├── metadata_classifier.md │ ├── prometheus_exporter.md │ ├── prometheus_ingester.md │ ├── relabel.md │ ├── slo_event_producer.md │ ├── statistical_classifier.md │ └── tailer.md └── operating.md ├── examples ├── README.md ├── all_in_one │ ├── README.md │ ├── docker-compose.yaml │ ├── example-domain-slo-conf.yaml │ ├── grafana │ │ └── provisioning │ │ │ ├── dashboards │ │ │ ├── SLO_Effective_Burn-rate.json │ │ │ ├── SLO_detailed.json │ │ │ ├── SLO_domains_overview.json │ │ │ ├── SLO_drilldown.json │ │ │ ├── dashboard.yml │ │ │ └── slo_exporter.json │ │ │ └── datasources │ │ │ └── datasource.yml │ ├── nginx │ │ └── conf │ │ │ └── nginx.conf │ ├── prometheus │ │ ├── alerts │ │ └── recording_rules │ └── slo-exporter │ │ └── conf │ │ ├── classification.csv │ │ ├── slo_exporter.yaml │ │ └── slo_rules.yaml ├── envoy_proxy │ ├── README.md │ ├── docker-compose.yaml │ ├── envoy │ │ └── envoy.yaml │ └── slo-exporter │ │ ├── slo_exporter.yaml │ │ └── slo_rules.yaml ├── kafka │ ├── README.md │ ├── docker-compose.yaml │ └── slo-exporter │ │ ├── slo_exporter.yaml │ │ └── slo_rules.yaml ├── nginx_proxy │ ├── README.md │ ├── classification.csv │ ├── slo_exporter.yaml │ ├── slo_rules.yaml │ └── test.log └── prometheus │ ├── README.md │ ├── exact_events_classification.csv │ ├── regexp_events_classification.csv │ ├── slo_exporter.yaml │ └── slo_rules.yaml ├── go.mod ├── go.sum ├── grafana_dashboards ├── README.md ├── SLO_Effective_Burn-rate.json ├── SLO_detailed.json ├── SLO_domains_overview.json ├── SLO_drilldown.json └── slo_exporter.json ├── kubernetes ├── README.md ├── slo-exporter-configmap.yaml ├── slo-exporter-service.yaml └── slo-exporter-statefulset.yaml ├── pkg ├── config │ └── config.go ├── dynamic_classifier │ ├── dynamic_classifier.go │ ├── dynamic_classifier_test.go │ ├── matcher.go │ ├── matcher_test.go │ ├── memory_exact_matcher.go │ ├── regexp_matcher.go │ └── testdata │ │ ├── TestClassificationByExactMatches.golden │ │ ├── TestClassificationByRegexpMatches.golden │ │ ├── TestLoadExactMatchesFromMultipleCSV.golden │ │ ├── TestLoadRegexpMatchesFromMultipleCSV.golden │ │ ├── TestMatcherExactDumpCSV.golden │ │ ├── TestMatcherRegexpDumpCSV.golden │ │ └── Test_DynamicClassifier_Classify_OverridesCacheFromConfig.golden ├── envoy_access_log_server │ ├── access_log_server.go │ ├── service_v3.go │ ├── service_v3_test.go │ └── util.go ├── event │ ├── raw.go │ ├── slo.go │ └── slo_classification.go ├── event_key_generator │ ├── event_key_generator.go │ └── event_key_generator_test.go ├── event_metadata_renamer │ ├── renamer.go │ └── renamer_test.go ├── kafka_ingester │ ├── kafka_ingester.go │ └── kafka_ingester_test.go ├── metadata_classifier │ ├── metadata_cassifier.go │ └── metadata_cassifier_test.go ├── pipeline │ ├── manager.go │ ├── manager_test.go │ ├── module.go │ └── module_test.go ├── prober │ ├── prober.go │ └── prober_test.go ├── prometheus_exporter │ ├── aggregating_counter.go │ ├── aggregating_counter_test.go │ ├── exemplars.go │ ├── prometheus_exporter.go │ └── prometheus_exporter_test.go ├── prometheus_ingester │ ├── headerRoundTripper.go │ ├── headerRoundTripper_test.go │ ├── prometheus_ingester.go │ ├── prometheus_ingester_test.go │ ├── query_executor.go │ └── query_executor_test.go ├── relabel │ ├── relabel.go │ └── relabel_test.go ├── slo_event_producer │ ├── config.go │ ├── config_test.go │ ├── event_evaluator.go │ ├── event_evaluator_test.go │ ├── operator.go │ ├── operator_test.go │ ├── rule.go │ ├── rule_test.go │ ├── slo_event_producer.go │ └── testdata │ │ ├── slo_rules_invalid.yaml.golden │ │ └── slo_rules_valid.yaml.golden ├── statistical_classifier │ ├── statistical_classifier.go │ ├── weighted_classifier.go │ └── weighted_classifier_test.go ├── storage │ ├── capped_container_test.go │ ├── container_test.go │ ├── in_memory.go │ └── interfaces.go ├── stringmap │ ├── stringmap.go │ ├── stringmap_benchmark_test.go │ └── stringmap_test.go └── tailer │ ├── tailer.go │ └── tailer_test.go ├── prometheus ├── alerts │ ├── error-budget.yaml │ ├── missing_all_data.yaml │ ├── missing_data.yaml │ ├── slo_burn_rate.yaml │ ├── slo_data_corrections.yaml │ └── slo_exporter_alerts.yaml └── recording_rules │ ├── burn-rate.yaml │ ├── error-budget.yaml │ ├── events-over-time-slo-exporter.yaml │ ├── events-over-time.yaml │ ├── example-domain.yaml │ ├── rate-coefficient.yaml │ └── slo_data_corrections.yaml ├── scripts ├── benchmark.sh └── generate_godoc.sh ├── test ├── Test_MetricsInitialization │ ├── README.md │ ├── classifications.csv │ ├── logs │ ├── metrics │ ├── slo_exporter.yaml │ └── slo_rules.yaml ├── Test_SloHeaders │ ├── README.md │ ├── classifications.csv │ ├── logs │ ├── metrics │ ├── slo_exporter.yaml │ └── slo_rules.yaml ├── Test_SloHeadersUpdateCache │ ├── README.md │ ├── classifications.csv │ ├── logs │ ├── metrics │ ├── slo_exporter.yaml │ └── slo_rules.yaml └── run_tests.sh └── tools └── slo-rules-generator ├── README.md ├── alerting.go ├── all-in-one-example-domain.yaml ├── class.go ├── domain.go ├── go.mod ├── go.sum ├── slo-domains.yaml.example ├── slo-rules-generator.go └── slo-rules-generator_test.go /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | orbs: 3 | go: circleci/go@1.7.3 4 | 5 | defaults: &defaults 6 | executor: 7 | name: go/default # Use the default executor from the orb 8 | tag: "1.23" 9 | 10 | jobs: 11 | lint: 12 | <<: *defaults 13 | steps: 14 | - checkout # checkout source code 15 | - go/load-cache # Load cached Go modules. 16 | - run: GOMAXPROCS=1 GOMEMLIMIT=1750MiB make lint 17 | - go/save-cache # Save Go modules to cache. 18 | 19 | test: 20 | <<: *defaults 21 | steps: 22 | - checkout # checkout source code 23 | - go/load-cache # Load cached Go modules. 24 | - run: make test-and-coverage 25 | - go/save-cache # Save Go modules to cache. 26 | 27 | test-release: 28 | <<: *defaults 29 | steps: 30 | - checkout 31 | - setup_remote_docker 32 | - run: make test-release 33 | 34 | build: 35 | <<: *defaults 36 | steps: 37 | - checkout # checkout source code 38 | - go/load-cache # Load cached Go modules. 39 | - run: make build 40 | - go/save-cache # Save Go modules to cache. 41 | - persist_to_workspace: 42 | root: . 43 | paths: 44 | - slo_exporter 45 | 46 | build_docker: 47 | <<: *defaults 48 | steps: 49 | - checkout 50 | - setup_remote_docker 51 | - attach_workspace: 52 | at: . 53 | - run: make docker 54 | 55 | release: 56 | <<: *defaults 57 | steps: 58 | - checkout # checkout source code 59 | - go/load-cache # Load cached Go modules. 60 | - setup_remote_docker 61 | - run: | 62 | echo "${DOCKERHUB_PASSWORD}" | docker login -u="${DOCKERHUB_USER}" --password-stdin 63 | make release 64 | 65 | workflows: 66 | version: 2 67 | slo-exporter: 68 | jobs: 69 | - lint 70 | - test 71 | - test-release: 72 | filters: 73 | branches: 74 | only: master 75 | - build: 76 | filters: 77 | tags: 78 | ignore: /.*/ 79 | - build_docker: 80 | requires: 81 | - build 82 | filters: 83 | tags: 84 | ignore: /.*/ 85 | - release: 86 | filters: 87 | tags: 88 | only: /^v[0-9]+(\.[0-9]+){2}(-.+|[^-.]*)$/ 89 | branches: 90 | ignore: /.*/ 91 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve slo-exporter 4 | title: "[BUG]" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 |
10 | Please read this before submitting a bug report 11 | 12 | * **Check the [debugging guide](docs/operating.md).** You might be able to find the cause of the problem and fix things yourself. Most importantly, check if you can reproduce the problem in the latest version of slo-exporter. 13 | * **Perform a [cursory search](https://github.com/search?q=+is%3Aissue+repo%3Aslo-exporter)** to see if the problem has already been reported. If it has **and the issue is still open**, add a comment to the existing issue instead of opening a new one. 14 | 15 | #### How Do I Submit A (Good) Bug Report? 16 | 17 | Explain the problem and include additional details to help maintainers reproduce the problem: 18 | 19 | * **Use a clear and descriptive title** for the issue to identify the problem. 20 | * **Describe the exact steps which reproduce the problem** in as many details as possible. For example, start by explaining how you started slo-exporter, e.g. which command exactly you used in the terminal. When listing steps, **don't just say what you did, but explain how you did it**. 21 | * **Provide specific examples to demonstrate the steps**. Include links to files or GitHub projects, or copy/pasteable snippets, which you use in those examples. If you're providing snippets in the issue, use [Markdown code blocks](https://help.github.com/articles/markdown-basics/#multiple-lines). 22 | * **Describe the behavior you observed after following the steps** and point out what exactly is the problem with that behavior. 23 | * **Explain which behavior you expected to see instead and why.** 24 | * **If you're reporting that slo-exporter crashed**, include a crash report with a stack trace from the operating system. Include the crash report in the issue in a [code block](https://help.github.com/articles/markdown-basics/#multiple-lines), a [file attachment](https://help.github.com/articles/file-attachments-on-issues-and-pull-requests/), or put it in a [gist](https://gist.github.com/) and provide link to that gist. 25 | * **If the problem is related to performance or memory**, include a [CPU profile capture](docs/operating.md#profiling) with your report. 26 | * **If the problem wasn't triggered by a specific action**, describe what you were doing before the problem happened and share more information using the guidelines below. 27 | 28 | Provide more context by answering these questions: 29 | 30 | * **Did the problem start happening recently** (e.g. after updating to a new version) or was this always a problem? 31 | * If the problem started happening recently, **can you reproduce the problem in an older version of slo-exporter?** What's the most recent version in which the problem doesn't happen? 32 | * **Can you reliably reproduce the issue?** If not, provide details about how often the problem happens and under which conditions it normally happens. 33 | 34 | Include details about your configuration and environment: 35 | 36 | * **Which version are you using?** You can get the exact version by running `slo-exporter --version` in your terminal. 37 | * **What's the name and version of the OS you're using**? 38 | * **Are you running slo-exporter in a virtual machine or container?** If so, which VM software are you using and which operating systems and versions are used for the host and the guest? 39 | * **What are your [local configuration files](docs/configuration.md) and environment variables?** `slo_exporter.yaml` and possibly others. 40 |
41 | --- 42 | 43 | #### Describe the bug 44 | FILL ME 45 | 46 | #### How to reproduce the bug 47 | FILL ME 48 | 49 | #### Expected behavior 50 | A clear and concise description of what you expected to happen. 51 | 52 | #### Additional context 53 | FILL ME 54 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "[ENHANCEMENT]" 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 |
Please read this before submitting a feature request. 11 | 12 | #### Before Submitting An Enhancement Suggestion 13 | 14 | * **Check the [debugging guide](/docs/operating.md)** for tips — you might discover that the enhancement is already available. Most importantly, check if you're using the latest version and if you can get the desired behavior by changing [configuration settings](/docs/configuration.md). 15 | * **Perform a [cursory search](https://github.com/search?q=+is%3Aissue+repo%3Aslo-exporter)** to see if the enhancement has already been suggested. If it has, add a comment to the existing issue instead of opening a new one. 16 | 17 | #### How Do I Submit A (Good) Enhancement Suggestion? 18 | 19 | Enhancement suggestions are tracked as [GitHub issues](https://guides.github.com/features/issues/). Create an issue on that repository and provide the following information: 20 | 21 | * **Use a clear and descriptive title** for the issue to identify the suggestion. 22 | * **Provide a step-by-step description of the suggested enhancement** in as many details as possible. 23 | * **Provide specific examples to demonstrate the steps**. Include copy/pasteable snippets which you use in those examples, as [Markdown code blocks](https://help.github.com/articles/markdown-basics/#multiple-lines). 24 | * **Describe the current behavior** and **explain which behavior you expected to see instead** and why. 25 | * **Explain why this enhancement would be useful** to most of users. 26 | * **Specify which version you're using.** You can get the exact version by running `slo-exporter --version` in your terminal. 27 | * **Specify the name and version of the OS you're using.** 28 |
29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | vendor/ 2 | 3 | slo_exporter 4 | dist 5 | tmp 6 | 7 | **/*.pos 8 | **/test_output/ 9 | -------------------------------------------------------------------------------- /.golangci.yaml: -------------------------------------------------------------------------------- 1 | linters: 2 | enable: 3 | - contextcheck 4 | - durationcheck 5 | - errcheck 6 | - errname 7 | - errorlint 8 | - gocritic 9 | - gofmt 10 | - gofumpt 11 | - goimports 12 | - gosimple 13 | - govet 14 | - ineffassign 15 | - misspell 16 | - nakedret 17 | - nilerr 18 | - nilnil 19 | - prealloc 20 | - predeclared 21 | - promlinter 22 | - revive 23 | - staticcheck 24 | - stylecheck 25 | - typecheck 26 | - unconvert 27 | - unparam 28 | - unused 29 | - usestdlibvars 30 | 31 | linters-settings: 32 | # I'm biased and I'm enabling more than 100 checks 33 | # Might be too much for you. See https://go-critic.com/overview.html 34 | gocritic: 35 | enabled-tags: 36 | - diagnostic 37 | - experimental 38 | - opinionated 39 | - performance 40 | - style 41 | disabled-checks: 42 | # These 3 will detect many cases, but they do sense 43 | # if it's performance oriented code 44 | - hugeParam 45 | - rangeExprCopy 46 | - rangeValCopy 47 | 48 | errcheck: 49 | # Report `a := b.(MyStruct)` when `a, ok := ...` should be. 50 | check-type-assertions: true # Default: false 51 | 52 | # Report skipped checks:`num, _ := strconv.Atoi(numStr)`. 53 | check-blank: true # Default: false 54 | 55 | # Function to skip. 56 | exclude-functions: 57 | - io/ioutil.ReadFile 58 | - io.Copy(*bytes.Buffer) 59 | - io.Copy(os.Stdout) 60 | 61 | govet: 62 | disable: 63 | - fieldalignment # I'm ok to waste some bytes 64 | 65 | nakedret: 66 | max-func-lines: 1 # Default: 30 67 | 68 | issues: 69 | exclude: 70 | - "var-naming: don't use an underscore in package name" 71 | - "ST1003: should not use underscores in package names" 72 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | before: 2 | hooks: 3 | - go mod download 4 | - docker run --rm --privileged multiarch/qemu-user-static --reset -p yes 5 | 6 | builds: 7 | - main: ./cmd/slo_exporter.go 8 | binary: slo_exporter 9 | env: 10 | - CGO_ENABLED=0 11 | goos: 12 | - linux 13 | - windows 14 | - darwin 15 | goarch: 16 | - amd64 17 | - "386" 18 | - arm64 19 | 20 | ignore: 21 | - goos: darwin 22 | goarch: "386" 23 | 24 | source: 25 | enabled: true 26 | 27 | dockers: 28 | - goos: linux 29 | goarch: amd64 30 | image_templates: 31 | - seznam/slo-exporter:{{ .Tag }}-amd64 32 | - seznam/slo-exporter:v{{ .Major }}.{{ .Minor }}-amd64 33 | - seznam/slo-exporter:v{{ .Major }}-amd64 34 | - seznam/slo-exporter:latest-amd64 35 | use: buildx 36 | build_flag_templates: 37 | - --pull 38 | # Labels according to opencontainers label schema https://github.com/opencontainers/image-spec/blob/master/annotations.md 39 | - --label=org.opencontainers.image.created={{.Date}} 40 | - --label=org.opencontainers.image.revision={{.FullCommit}} 41 | - --label=org.opencontainers.image.version={{.Version}} 42 | 43 | - --label=org.opencontainers.image.title={{.ProjectName}} 44 | - --label=org.opencontainers.image.description=Tool to evaluate and generate standardizedSLO metrics from distinct data sources. 45 | - --label=org.opencontainers.image.vendor=Seznam, a.s. 46 | - --label=org.opencontainers.image.authors=sklik.devops@firma.seznam.cz 47 | - --label=org.opencontainers.image.url={{.GitURL}} 48 | - --label=org.opencontainers.image.documentation={{.GitURL}} 49 | - "--platform=linux/amd64" 50 | - goos: linux 51 | goarch: arm64 52 | image_templates: 53 | - seznam/slo-exporter:{{ .Tag }}-arm64 54 | - seznam/slo-exporter:v{{ .Major }}.{{ .Minor }}-arm64 55 | - seznam/slo-exporter:v{{ .Major }}-arm64 56 | - seznam/slo-exporter:latest-arm64 57 | use: buildx 58 | build_flag_templates: 59 | - --pull 60 | # Labels according to opencontainers label schema https://github.com/opencontainers/image-spec/blob/master/annotations.md 61 | - --label=org.opencontainers.image.created={{.Date}} 62 | - --label=org.opencontainers.image.revision={{.FullCommit}} 63 | - --label=org.opencontainers.image.version={{.Version}} 64 | 65 | - --label=org.opencontainers.image.title={{.ProjectName}} 66 | - --label=org.opencontainers.image.description=Tool to evaluate and generate standardizedSLO metrics from distinct data sources. 67 | - --label=org.opencontainers.image.vendor=Seznam, a.s. 68 | - --label=org.opencontainers.image.authors=sklik.devops@firma.seznam.cz 69 | - --label=org.opencontainers.image.url={{.GitURL}} 70 | - --label=org.opencontainers.image.documentation={{.GitURL}} 71 | - "--platform=linux/arm64" 72 | docker_manifests: 73 | - name_template: "seznam/slo-exporter:{{ .Tag }}" 74 | image_templates: 75 | - "seznam/slo-exporter:{{ .Tag }}-amd64" 76 | - "seznam/slo-exporter:{{ .Tag }}-arm64" 77 | - name_template: "seznam/slo-exporter:v{{ .Major }}.{{ .Minor }}" 78 | image_templates: 79 | - "seznam/slo-exporter:v{{ .Major }}.{{ .Minor }}-amd64" 80 | - "seznam/slo-exporter:v{{ .Major }}.{{ .Minor }}-arm64" 81 | - name_template: "seznam/slo-exporter:v{{ .Major }}" 82 | image_templates: 83 | - "seznam/slo-exporter:v{{ .Major }}-amd64" 84 | - "seznam/slo-exporter:v{{ .Major }}-arm64" 85 | - name_template: "seznam/slo-exporter:latest" 86 | image_templates: 87 | - "seznam/slo-exporter:latest-amd64" 88 | - "seznam/slo-exporter:latest-arm64" 89 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | In order to foster an inclusive, kind, harassment-free, and cooperative community, community enforces this code of conduct on the project. 4 | 5 | ## Summary 6 | 7 | Harassment in code and discussion or violation of physical boundaries is completely unacceptable anywhere in the project codebases, issue trackers, chatrooms, mailing lists, meetups, and other events. Violators will be warned by the core team. Repeat violations will result in being blocked or banned by the core team at or before the 3rd violation. 8 | 9 | ## In detail 10 | 11 | Harassment includes offensive verbal comments related to gender identity, gender expression, sexual orientation, disability, physical appearance, body size, race, religion, sexual images, deliberate intimidation, stalking, sustained disruption, and unwelcome sexual attention. 12 | 13 | Individuals asked to stop any harassing behavior are expected to comply immediately. 14 | 15 | Maintainers are also subject to the anti-harassment policy. 16 | 17 | If anyone engages in harassing behavior, including maintainers, we may take appropriate action, up to and including warning the offender, deletion of comments, removal from the project’s codebase and communication systems, and escalation to GitHub support. 18 | 19 | If you are being harassed, notice that someone else is being harassed, or have any other concerns, please contact a member of the core team immediately. 20 | 21 | We expect everyone to follow these rules anywhere in the project codebases, issue trackers, chatrooms, and mailing lists. 22 | 23 | Finally, don’t forget that it is human to make mistakes! We all do. Let’s work together to help each other, resolve issues, and learn from the mistakes that we will all inevitably make from time to time. 24 | 25 | ## Thanks 26 | 27 | Thanks to the [thoughtbot team](https://thoughtbot.com/). 28 | 29 | ## (Code of conduct) license 30 | 31 | To the extent possible under law, the [thoughtbot team](https://thoughtbot.com/) has waived all copyright and related or neighboring rights to thoughtbot Code of Conduct. This work is published from the United States. 32 | 33 | ![](https://licensebuttons.net/p/zero/1.0/88x31.png) 34 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | #### Table Of Contents 2 | 3 | [Code of Conduct](#code-of-conduct) 4 | 5 | [I just have a question](#i-just-have-a-question) 6 | 7 | [Your First Code Contribution](#your-first-code-contribution) 8 | 9 | [Pull Requests](#pull-requests) 10 | 11 | [Styleguides](#styleguides) 12 | 13 | ### Code of Conduct 14 | This project and everyone participating in it is governed by the [Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. 15 | 16 | 17 | ## I just have a question 18 | Please file an issue with the `question` label or contact us via [Slack](/README.md#community). 19 | 20 | ### Your First Code Contribution 21 | 22 | Unsure where to begin contributing to slo-exporter? You can start by looking through these `good-first-issue` and `help-wanted` issues: 23 | 24 | * [Good first issues](https://github.com/seznam/slo-exporter/labels/good%20first%20issue) - issues which should only require a few lines of code, and a test or two. 25 | * [Help wanted issues](https://github.com/seznam/slo-exporter/labels/help%20wanted) - issues which should be a bit more involved than `good-first-issues`. 26 | 27 | ### Pull Requests 28 | 29 | Please follow these steps to have your contribution considered by the maintainers: 30 | 31 | 2. Follow the [styleguides](#styleguides) 32 | 3. After you submit your pull request, verify that all [status checks](https://help.github.com/articles/about-status-checks/) are passing
What if the status checks are failing?If a status check is failing, and you believe that the failure is unrelated to your change, please leave a comment on the pull request explaining why you believe the failure is unrelated.
33 | 34 | While the prerequisites above must be satisfied prior to having your pull request reviewed, the reviewer(s) may ask you to complete additional design work, tests, or other changes before your pull request can be ultimately accepted. 35 | 36 | ## Styleguides 37 | 38 | ### Git Commit Messages 39 | 40 | * Use the present tense ("Add feature" not "Added feature") 41 | * Use the imperative mood ("Move cursor to..." not "Moves cursor to...") 42 | * Limit the first line to 72 characters or less 43 | * Reference issues and pull requests liberally after the first line 44 | 45 | ### Golang Styleguide 46 | 47 | Follow golang [revive](github.com/mgechev/revive) advices and make sure revive reports same or less issues. 48 | 49 | ### Documentation Styleguide 50 | 51 | * Use [Markdown](https://daringfireball.net/projects/markdown). 52 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:stable-slim 2 | 3 | RUN apt-get update && apt-get install ca-certificates -y && apt-get clean 4 | 5 | COPY slo_exporter /slo_exporter/ 6 | COPY Dockerfile / 7 | 8 | WORKDIR /slo_exporter 9 | 10 | ENTRYPOINT ["/slo_exporter/slo_exporter"] 11 | 12 | CMD ["--help"] 13 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f  2 | SRC_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) 3 | TMP_DIR ?= $(SRC_DIR)/tmp 4 | TMP_BIN_DIR ?= $(TMP_DIR)/bin 5 | 6 | GORELEASER_VERSION ?= v2.4.4 7 | 8 | .PHONY: all 9 | all: lint test-and-coverage build test-release 10 | 11 | $(TMP_DIR): 12 | mkdir -p $(TMP_DIR) 13 | 14 | $(TMP_BIN_DIR): 15 | mkdir -p $(TMP_BIN_DIR) 16 | 17 | GORELEASER ?= $(TMP_BIN_DIR)/goreleaser 18 | $(GORELEASER): $(TMP_BIN_DIR) 19 | @echo "Downloading goreleaser version $(GORELEASER_VERSION) to $(TMP_BIN_DIR) ..." 20 | @curl -sNL "https://github.com/goreleaser/goreleaser/releases/download/$(GORELEASER_VERSION)/goreleaser_Linux_x86_64.tar.gz" | tar -xzf - -C $(TMP_BIN_DIR) 21 | 22 | RELEASE_NOTES ?= $(TMP_DIR)/release_notes 23 | $(RELEASE_NOTES): $(TMP_DIR) 24 | @echo "Generating release notes to $(RELEASE_NOTES) ..." 25 | @csplit -q -n1 --suppress-matched -f $(TMP_DIR)/release-notes-part CHANGELOG.md '/## \[\s*v.*\]/' {1} 26 | @mv $(TMP_DIR)/release-notes-part1 $(RELEASE_NOTES) 27 | @rm $(TMP_DIR)/release-notes-part* 28 | 29 | .PHONY: golangci-lint 30 | golangci-lint: 31 | @echo "Downloading golangci-lint..." 32 | go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.61.0 33 | 34 | .PHONY: lint 35 | lint: golangci-lint 36 | golangci-lint run --timeout 10m 37 | 38 | .PHONY: lint-fix 39 | lint-fix: golangci-lint 40 | golangci-lint run --fix --timeout 10m 41 | 42 | SLO_EXPORTER_BIN ?= slo_exporter 43 | .PHONY: build 44 | build: 45 | GOOS=$(OS) GOARCH=$(ARCH) CGO_ENABLED=0 go build -o $(SLO_EXPORTER_BIN) -a $(SRC_DIR)/cmd/slo_exporter.go 46 | 47 | .PHONY: docker-build 48 | docker: build 49 | docker build -t slo_exporter . 50 | 51 | .PHONY: e2e-test 52 | e2e-test: build 53 | ./test/run_tests.sh 54 | 55 | .PHONY: test 56 | test: $(TMP_DIR) 57 | go test -v --race -coverprofile=$(TMP_DIR)/coverage.out $(shell go list $(SRC_DIR)/... | grep -v /vendor/) 58 | 59 | .PHONY: benchmark 60 | benchmark: clean 61 | ./scripts/benchmark.sh 62 | 63 | .PHONY: test-and-coverage 64 | test-and-coverage: test 65 | go tool cover -func $(TMP_DIR)/coverage.out 66 | 67 | .PHONY: cross-build 68 | cross-build: $(GORELEASER) 69 | $(GORELEASER) build --clean 70 | 71 | .PHONY: test-release 72 | test-release: $(RELEASE_NOTES) $(GORELEASER) 73 | $(GORELEASER) release --snapshot --clean --release-notes $(RELEASE_NOTES) 74 | 75 | .PHONY: release 76 | release: $(RELEASE_NOTES) $(GORELEASER) 77 | @echo "Releasing new version do GitHub and DockerHub using goreleaser..." 78 | $(GORELEASER) release --clean --release-notes $(RELEASE_NOTES) 79 | 80 | .PHONY: clean 81 | clean: 82 | rm -rf dist $(TMP_DIR) $(SLO_EXPORTER_BIN) 83 | find . -type f -name "*.pos" -prune -exec rm -f {} \; 84 | find . -type d -name "test_output" -prune -exec rm -rf {} \; 85 | -------------------------------------------------------------------------------- /docs/architecture.md: -------------------------------------------------------------------------------- 1 | # Architecture 2 | SLO-exporter is written in Go and built using [the pipeline pattern](https://blog.golang.org/pipelines). 3 | 4 | The processed event is passed from one module to another to allow its modification or filtering 5 | for the final state to be reported as an SLI event. 6 | 7 | The flow of the processing pipeline can be dynamically set using configuration file, so it can be used 8 | for various use cases and event types. 9 | 10 | ### Event Types 11 | Slo-exporter differentiates between two event types: 12 | 13 | ##### Raw 14 | This is an event which came from the data source, it has metadata and quantity 15 | and you can set its event key which will be in the resulting metrics and can be used for classification of the event. 16 | 17 | ##### SLO event 18 | Final event generated from the raw event. This event has already evaluated result and classification 19 | an is then reported to output metrics. 20 | 21 | ### Module types 22 | There is set of implemented modules to be used and are divided to three basic types based on their input/output. 23 | 24 | ##### `producer` 25 | Does not read any events but produces them. These modules serve as sources of the events. 26 | 27 | ##### `ingester` 28 | Reads events but does not produce any. These modules serves for reporting the SLO metrics to some external systems. 29 | 30 | ##### `processor` 31 | Combination of `producer` and `ingester`. It reads an event and produces new or modified one. 32 | -------------------------------------------------------------------------------- /docs/configuration.md: -------------------------------------------------------------------------------- 1 | # Configuration 2 | Slo exporter itself is configured using one [base YAML file](#base-config). 3 | Path to this file is configured using the `--config-file` flag. 4 | Additional configuration files might be needed by some modules 5 | depending on their needs and if they are used in the pipeline at all. 6 | 7 | #### ENV variables 8 | Every configuration option in the base YAML file can be overridden (the configuration MUST be present in the yaml config for the ENV to be loaded, it can be even empty string) by using ENV variable. 9 | The schema of ENV variable naming is `SLO_EXPORTER_` prefix and than in uppercase any key of the YAML 10 | structure in uppercase without any underscores. Underscores are used for separating nested structures. 11 | Example: `SLO_EXPORTER_WEBSERVERLISTENADDRESS=0.0.0.0:8080` or for module configuration `SLO_EXPORTER_TAILER_TAILEDFILE=access.log` 12 | 13 | #### CMD flags 14 | ```bash 15 | $ ./slo_exporter --help-long 16 | usage: slo_exporter --config-file=CONFIG-FILE [] 17 | 18 | Flags: 19 | --help Show context-sensitive help (also try --help-long and --help-man). 20 | --config-file=CONFIG-FILE SLO exporter configuration file. 21 | --log-level="info" Log level (error, warn, info, debug,trace). 22 | --log-format="text" Log format (text, json). 23 | --check-config Only check config file and exit with 0 if ok and other status code if not. 24 | ``` 25 | 26 | #### Processing pipeline 27 | Slo-exporter allows to dynamically compose the pipeline structure, 28 | but there is few basic rules it needs to follow: 29 | - [`producer`](#producers) can be only at the beginning of the pipeline (meaning only single producer is allowed) 30 | - [`ingester`](#ingesters) module cannot be at the beginning of pipeline. 31 | - [`ingester`](#ingesters) module can only be linked to preceding [`producer`](architecture.md#producer) module. 32 | - Type of produced event by the preceding module must match the ingested type of the following one. 33 | 34 | 35 | ### Base config 36 | ```yaml 37 | # Address where the web interface should listen on. 38 | webServerListenAddress: "0.0.0.0:8080" 39 | # Maximum time to wait for all events to be processed after receiving SIGTERM or SIGINT. 40 | maximumGracefulShutdownDuration: "10s" 41 | # How long to wait after processing pipeline has been shutdown before stopping http server w metric serving. 42 | # Useful to make sure metrics are scraped by Prometheus. Ideally set it to Prometheus scrape interval + 1s or more. 43 | # Should be less or equal to afterPipelineShutdownDelay 44 | afterPipelineShutdownDelay: "1s" 45 | 46 | # Defines architecture of the pipeline how the event will be processed by the modules. 47 | pipeline: [] 48 | 49 | # Contains configuration for distinct pipeline module. 50 | modules: 51 | : 52 | ``` 53 | 54 | ### `moduleType`: 55 | 56 | ##### Producers: 57 | Only produces new events from the specified data source. 58 | - [`envoy_access_log_server`](modules/envoy_access_log_server.md) 59 | - [`tailer`](modules/tailer.md) 60 | - [`prometheusIngester`](modules/prometheus_ingester.md) 61 | - [`envoyAccessLogServer`](modules/envoy_access_log_server.md) 62 | - [`kafkaIngester`](modules/kafka_ingester.md) 63 | 64 | ##### Processors: 65 | Reads input events, does some processing based in the module type and produces modified event. 66 | - [`eventKeyGenerator`](modules/event_key_generator.md) 67 | - [`metadataClassifier`](modules/metadata_classifier.md) 68 | - [`relabel`](modules/relabel.md) 69 | - [`dynamicClassifier`](modules/dynamic_classifier.md) 70 | - [`statisticalClassifier`](modules/statistical_classifier.md) 71 | - [`sloEventProducer`](modules/slo_event_producer.md) 72 | 73 | ##### Ingesters: 74 | Only reads input events but does not produce any. 75 | - [`prometheusExporter`](modules/prometheus_exporter.md) 76 | 77 | Details how they work and their `moduleConfig` can be found in their own 78 | linked documentation in the [docs/modules](modules) folder. 79 | 80 | #### Configuration examples 81 | Actual examples of usage with full configuration can be found in the [`examples/`](examples) directory. 82 | 83 | #### Configuration testing 84 | If you want to verify that your configuration is valid, use the `--check-config` flag. 85 | Slo-exporter then just verifies if the configuration is valid and exits with status 0 if ok and 1 if not. 86 | -------------------------------------------------------------------------------- /docs/glossary.md: -------------------------------------------------------------------------------- 1 | ## Glossary 2 | Here we describe some of the terms used through the repository. We assume that you have read chapters on SLO from Google's [SRE book](https://landing.google.com/sre/sre-book/toc/) and [SRE workbook](https://landing.google.com/sre/workbook/toc/), so the main focus here is to describe 3 | 4 | ### locality, namespace 5 | We use this labels internally to differentiate between individual K8S clusters (`locality`) and K8S namespaces (`namespace`). 6 | 7 | ### slo-domain 8 | This label groups slo-types and slo-classes into single entity which shares the same error budget policy and stakeholders. SLO domain usually contains multiple error budgets (equal to no. of slo-types * number of slo-classes for individual slo-types). 9 | 10 | ### slo-type 11 | Differentiates individual SLIs - e.g. freshness, availability, etc. Some of the SLIs may be represented by multiple slo-types, multiple percentiles for latency SLI as slo-types latency90, latency99 as an example. 12 | 13 | ### slo-class 14 | Label which enable to group events from the same slo-domain and slo-type. It may serve multiple purposes, e.g. to 15 | - group events to the same classes of importance 16 | - group events which share the same SLO thresholds 17 | - group events with similar frequency of occurrence 18 | 19 | ### event_key 20 | The last level of SLO event's grouping. Its content depends on desired level of SLO drilldown accuracy. It may contain name of RPC method, or normalized path of HTTP request together with HTTP method (e.g. `GET:/campaigns/list`). See [architecture](./architecture.md) for details on SLO event's structure. 21 | 22 | ### Error budget policy 23 | A formal document which specifies actions which are to be triggered based on current state of error budget. Stopping all rollouts and shifting developers' focus on service's stability when error budget is depleted is the most common example. See [example error budget policy as published by Google](https://landing.google.com/sre/workbook/chapters/error-budget-policy/) -------------------------------------------------------------------------------- /docs/modules/dynamic_classifier.md: -------------------------------------------------------------------------------- 1 | # Dynamic classifier 2 | 3 | | | | 4 | |----------------|---------------------| 5 | | `moduleName` | `dynamicClassifier` | 6 | | Module type | `processor` | 7 | | Input event | `raw` | 8 | | Output event | `raw` | 9 | 10 | The SLO calculation is based on some domains and classes which group together 11 | events by their functionality but also priority or demands on their quality. 12 | 13 | This is called classification and for the SLO calculation you need to assign those events 14 | to their domains and classes. These information how to classify them 15 | can be specified using CSV files or they can come along with the event. 16 | 17 | This module checks if the incoming event isn't already classified and if it isn't, it checks 18 | the CSV file specifications if they can classify the event and adds the classification if possible. 19 | 20 | The motivation behind this is that application itself can have the classification defined in it's code. 21 | Then it just passes it along with the event (HTTP request in headers for example) and there is no need 22 | to have the classification held centrally somewhere. 23 | 24 | There is one issue, for example when generating SLO events from proxy log which proxies traffic to web 25 | server sending those classification along. If the application stops working, it won't send the 26 | classification, so we wouldn't know how to classify it. To mitigate this issue this module also 27 | caches all the classifications of input events which are already classified. 28 | This way it can classify the events even if the application goes down if they were called before. 29 | 30 | Also, this cache can be initialized with defined values on startup, so that we can correctly classify events even for application which does not provide us with the classification by themselves. 31 | 32 | 33 | #### `moduleConfig` 34 | ```yaml 35 | # Paths to CSV files containing exact match classification rules. 36 | exactMatchesCsvFiles: [] 37 | # Paths to CSV files containing regexp match classification rules. 38 | regexpMatchesCsvFiles: 39 | - "conf/userportal.csv" 40 | # Metadata key names of the event which will be added to the `events_processed_total` metric if the event cannot be classified. 41 | # Name of the resulting label will be converted to snake case and prefixed with `metadata_` 42 | unclassifiedEventMetadataKeys: 43 | - "userAgent" 44 | ``` 45 | 46 | ##### Example of the CSV with exact classification: 47 | ```csv 48 | test-domain,test-app,test-class,"GET:/testing-endpoint" 49 | ``` 50 | 51 | ##### Example of the CSV with regexp classification: 52 | ```csv 53 | test-domain,test-app,test-class,"/api/test/.*" 54 | test-domain,test-app,test-class-all,"/api/.*" 55 | ``` 56 | 57 | ##### CSV comments 58 | CSV configuration files support single line comments. Comment has to start with the `#` character with no leading whitespaces. 59 | Example: 60 | ```csv 61 | # Example of comment 62 | test-domain,test-app,test-class,"/api/test/.*" 63 | ``` 64 | 65 | 66 | -------------------------------------------------------------------------------- /docs/modules/event_key_generator.md: -------------------------------------------------------------------------------- 1 | # Event key generator 2 | 3 | | | | 4 | |----------------|---------------------| 5 | | `moduleName` | `eventKeyGenerator` | 6 | | Module type | `processor` | 7 | | Input event | `raw` | 8 | | Output event | `raw` | 9 | 10 | This module allows you to generate an identifier of the event type. 11 | It will join all values of specified event metadata keys (if found) using the separator 12 | and use it as the new identifier. 13 | 14 | `moduleConfig` 15 | ```yaml 16 | # Separator to be used to join the selected metadata values. 17 | filedSeparator: ":" 18 | # If the event key should be overwritten if it's already set for the input event. 19 | overrideExistingEventKey: true 20 | # Keys which values will be joined as the resulting eventKey in specified order 21 | metadataKeys: 22 | - 23 | ``` 24 | 25 | If given metadata_key is missing in the event's metadata, the empty value is not included in the resulting eventKey. 26 | 27 | E.g. given the following configuration: 28 | ``` 29 | metadataKeys: 30 | app: test_app 31 | name: test_name 32 | endpoint: test_endpoint 33 | ``` 34 | The following metadata `{'app': 'test_app', 'endpoint': 'test_endpoint'}` would result to event_key `test_app:test_endpoint`. 35 | 36 | -------------------------------------------------------------------------------- /docs/modules/event_metadata_renamer.md: -------------------------------------------------------------------------------- 1 | # Event metadata renamer 2 | 3 | *Module status is _experimental_, it may be modified or removed even in non-major release.* 4 | 5 | | | | 6 | |----------------|------------------------| 7 | | `moduleName` | `eventMetadataRenamer` | 8 | | Module type | `processor` | 9 | | Input event | `raw` | 10 | | Output event | `raw` | 11 | 12 | This module allows you to modify the event metadata by renaming its keys. Refusals of overriding an already existing _destination_ are reported as a Warning log as well as within exposed Prometheus' metric. 13 | 14 | `moduleConfig` 15 | ```yaml 16 | eventMetadataRenamerConfigs: 17 | - source: keyX 18 | destination: keyY 19 | ``` 20 | -------------------------------------------------------------------------------- /docs/modules/kafka_ingester.md: -------------------------------------------------------------------------------- 1 | # Kafka ingester 2 | 3 | | | | 4 | |----------------|-------------------------| 5 | | `moduleName` | `kafkaIngester` | 6 | | Module type | `producer` | 7 | | Output event | `raw` | 8 | 9 | Kafka ingester generates events from Kafka messages. 10 | 11 | `moduleConfig` 12 | ```yaml 13 | # Allow verbose logging of events within Kafka library. Global logger with its configured log level is used. 14 | logKafkaEvents: false 15 | # Allow logging of errors within Kafka library. Global logger with its configured log level is used. 16 | logKafkaErrors: true 17 | # List of Kafka brokers 18 | brokers: 19 | - # e.g. kafka-1.example.com:9092 20 | topic: "" 21 | groupId: "" 22 | # commitInterval indicates the interval at which offsets are committed to the broker. 23 | # If 0 (default), commits will be handled synchronously. 24 | commitInterval: # e.g. 0, 5s, 10m 25 | # retentionTime optionally sets the length of time the consumer group will be saved by the broker. 26 | # Default: 24h 27 | retentionTime: 28 | # fallbackStartOffset determines from whence the consumer group should begin consuming when it finds a partition without a committed offset. 29 | # Default: FirstOffset 30 | fallbackStartOffset: 31 | ``` 32 | 33 | 34 | For every received message from Kafka: 35 | - data in Key is ignored 36 | - data in Value is unmarshalled according to the schema version specified in Kafka message header `slo-exporter-schema-version` (defaults to `v1` if none specified). 37 | 38 | ### Supported data schemas 39 | #### `v1` 40 | ``` 41 | { 42 | "metadata": { 43 | "name": "eventName" 44 | ... 45 | }, 46 | # Defaults to 1 if none specified 47 | "quantity": "10", 48 | "slo_classification": { 49 | "app": "testApp", 50 | "class": "critical", 51 | "domain": "testDomain" 52 | } 53 | } 54 | ``` 55 | 56 | Strictly speaking, none of the keys is mandatory, however please note that: 57 | - Event with explicitly set quantity=0 is basically noop for Producer module. To give an example, prometheusExporter does not increment any SLO metric for such events. 58 | - Event with empty Metadata does not allow much logic in following modules. 59 | - In case you want to allow ingesting events without SLO classification, you need to make sure that all events are classified within rest of the SLO exporter pipeline. 60 | -------------------------------------------------------------------------------- /docs/modules/metadata_classifier.md: -------------------------------------------------------------------------------- 1 | # Metadata classifier 2 | 3 | | | | 4 | |----------------|------------------------------| 5 | | `moduleName` | `metadataClassifier` | 6 | | Module type | `processor` | 7 | | Input event | `raw` | 8 | | Output event | `raw` | 9 | 10 | This module allows you to classify an event using its metadata. 11 | Specify keys which values will be used as according slo classification items. 12 | If the key cannot be found, original value of classification will be left intact. 13 | By default, the module will override event classification. 14 | This can be disabled to classify it only if it wasn't classified before. 15 | 16 | `moduleConfig` 17 | ```yaml 18 | # Key of metadata value to be used as classification slo domain. 19 | sloDomainMetadataKey: 20 | # Key of metadata value to be used as classification slo domain. 21 | sloClassMetadataKey: 22 | # Key of metadata value to be used as classification slo domain. 23 | sloAppMetadataKey: 24 | # If classification of already classified event should be overwritten. 25 | overrideExistingValues: true 26 | ``` 27 | -------------------------------------------------------------------------------- /docs/modules/prometheus_exporter.md: -------------------------------------------------------------------------------- 1 | # Prometheus exporter 2 | 3 | | | | 4 | |----------------|----------------------| 5 | | `moduleName` | `prometheusExporter` | 6 | | Module type | `ingester` | 7 | | Input event | `SLO` | 8 | 9 | This module exposes the SLO metrics in Prometheus format, so they can be 10 | scraped, computed, visualized and alerted on. 11 | 12 | SLO is often computed over long time ranges such as 4 weeks. 13 | But on the other hand, for debugging it is essential to be able to distinct what event type 14 | caused the issue. To allow this, this exporter exposes cascade of aggregated metrics (see the example below). 15 | From the highest level over whole slo domain to the lowest granularity of each event type. 16 | 17 | This way the alerting and usual visualization can use the high level metrics, but in case of issues 18 | it's possible to drill down right to the root cause. 19 | 20 | The `normalizer` module is intended to mitigate possible issues witch exploding of event type cardinality. 21 | But to make sure, if any unique event type slips through, to avoid the cardinality explosion, 22 | the module allows to set maximum limit of exposed event types. any other new will be replaces with configured placeholder. 23 | 24 | `moduleConfig` 25 | ```yaml 26 | # Name of the resulting counter metric to be exposed representing counter of slo events by it's classification and result. 27 | metricName: "slo_events_total" 28 | # Limit of unique event keys, when exceeded, the event key in the label is replaced with placeholder. 29 | maximumUniqueEventKeys: 1000 30 | # Placeholder to replace new event keys when the limit is hit. 31 | ExceededKeyLimitPlaceholder: "cardinalityLimitExceeded" 32 | # *Experimental* List of original raw event metadata keys to be added as an exemplars labels. 33 | ExemplarMetadataKeys: ["trace-id"] 34 | # Names of labels to be used for specific event information. 35 | labelNames: 36 | # Contains information about the event result (success, fail, ...). 37 | result: "result" 38 | # Domain of the SLO event. 39 | sloDomain: "slo_domain" 40 | # SLO class of the event. 41 | sloClass: "slo_class" 42 | # Application, to which the event belongs. 43 | sloApp: "slo_app" 44 | # Unique identifier of the event. 45 | # This label holds value of Key attribute of the input SLO event 46 | eventKey: "event_key" 47 | ``` 48 | 49 | ## Exposed metrics example 50 | Given the default configuration as specified above, the resulting exposed metrics will be as follows: 51 | ``` 52 | slo_domain:slo_events_total{result=~"success|fail",slo_domain="__domain_name__"} 53 | slo_domain_slo_class:slo_events_total{result=~"success|fail",slo_domain="__domain_name__",slo_class="__slo_class__"} 54 | slo_domain_slo_class_slo_app:slo_events_total{result=~"success|fail",slo_domain="__domain_name__",slo_class="__slo_class__",slo_app="__slo_app__"} 55 | slo_domain_slo_class_slo_app_event_key:slo_events_total{result=~"success|fail",slo_domain="__domain_name__",slo_class="__slo_class__",slo_app="__slo_app__",event_key="__event_key__"} 56 | ``` 57 | 58 | Each of the timeseries will have additional labels which are (optionally) specified in [sloEventProducer](./slo_event_producer.md) rules configuration (as `additional_metadata`) - for example slo_version, slo_type,... 59 | -------------------------------------------------------------------------------- /docs/modules/relabel.md: -------------------------------------------------------------------------------- 1 | # Relabel 2 | 3 | | | | 4 | |----------------|--------------| 5 | | `moduleName` | `relabel` | 6 | | Module type | `processor` | 7 | | Input event | `raw` | 8 | | Output event | `raw` | 9 | 10 | This module allows you to modify the event metadata or drop the event at all. 11 | It uses native Prometheus `relabel_config` syntax. In this case metadata is referred as labels. 12 | See [the upstream documentation](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) 13 | for more info. Referenced metadata keys needs to be a valid Prometheus' label name. 14 | 15 | 16 | `moduleConfig` 17 | ```yaml 18 | eventRelabelConfigs: 19 | - 20 | ``` 21 | 22 | You can find some [examples here](/examples). 23 | -------------------------------------------------------------------------------- /docs/modules/statistical_classifier.md: -------------------------------------------------------------------------------- 1 | # Statistical classifier 2 | 3 | | | | 4 | |----------------|-------------------------| 5 | | `moduleName` | `statisticalClassifier` | 6 | | Module type | `processor` | 7 | | Input event | `raw` | 8 | | Output event | `raw` | 9 | 10 | This module watches observes statistical distribution of all incoming already classified events. 11 | This distribution is then used to classify incoming unclassified events. 12 | It produces only classified events, if any error or issue is encountered, the event is dropped. 13 | You can specify default weights which will be used if there were no events recently (at least for interval specified in `historyWindowSize`) to calculate the weights from. 14 | 15 | This module allows you to ensure no events will be dropped just because they were not classified. 16 | Of course the precision is based on the previously observed data but it is still better than drop the events completely. 17 | 18 | Applicable for example in the following cases: 19 | 20 | - Application usually sends its event identifier within HTTP headers. 21 | In cases where communication is interrupted in a way that this header is not sent 22 | (e.g. HTTP 5xx or 499 status codes), we have no way how to identify (and thus classify) the event. 23 | 24 | 25 | `moduleConfig` 26 | ```yaml 27 | # Time interval from which calculate the distribution used for classification. 28 | historyWindowSize: "30m" 29 | # How often the weights calculated over the historyWindowSize will be updated. 30 | historyWeightUpdateInterval: "1m" 31 | # Default weights to be used in case that there were no events recently to deduce the real weights. 32 | defaultWeights: 33 | - 34 | ``` 35 | 36 | `classificationWeight` 37 | ```yaml 38 | # Dimensionless number to be compared with other default weights. 39 | weight: 40 | # Classification to be guessed with the specified weight. 41 | classification: 42 | sloDomain: 43 | sloClass: 44 | ``` 45 | -------------------------------------------------------------------------------- /docs/modules/tailer.md: -------------------------------------------------------------------------------- 1 | # Tailer 2 | 3 | | | | 4 | |----------------|-------------| 5 | | `moduleName` | `tailer` | 6 | | Module type | `producer` | 7 | | Output event | `raw` | 8 | 9 | This module is able to tail file and parse each line using regular expression with named groups. 10 | Those group names are used as metadata keys of the produces event and values are the matching strings. 11 | 12 | It persists the last read position to file, so it can continue if restarted. 13 | 14 | It can be used for example to tail proxy log and create events from it 15 | so you can calculate SLO for your HTTP servers etc. 16 | 17 | `moduleConfig` 18 | ```yaml 19 | # Path to file to be processed. 20 | tailedFile: "/logs/access_log" 21 | # If tailed file should be followed for new lines once all current lines are processed. 22 | follow: true 23 | # If tailed file should be reopened. 24 | reopen: true 25 | # Path to file where to persist position of tailing. 26 | positionFile: "" 27 | # How often current position should be persisted to the position file. 28 | positionPersistenceInterval: "2s" 29 | # Defines RE which is used to parse the log line. 30 | # Currently known named groups which are used to extract information for generated Events are: 31 | # sloDomain - part of SLO classification for the given event. 32 | # sloApp - part of SLO classification for the given event. 33 | # sloClass - part of SLO classification for the given event. 34 | # All other named groups will be added to to the request event as event.Metadata. 35 | loglineParseRegexp: '^(?P[A-Fa-f0-9.:]{4,50}) \S+ \S+ \[(?P