├── .bingo
    ├── .gitignore
    ├── README.md
    ├── Variables.mk
    ├── bingo.mod
    ├── bingo.sum
    ├── faillint.mod
    ├── faillint.sum
    ├── go.mod
    ├── goimports.mod
    ├── goimports.sum
    ├── gojq.mod
    ├── gojq.sum
    ├── gojsontoyaml.mod
    ├── gojsontoyaml.sum
    ├── golangci-lint.mod
    ├── golangci-lint.sum
    ├── jb.mod
    ├── jb.sum
    ├── jsonnet-deps.mod
    ├── jsonnet-deps.sum
    ├── jsonnet-lint.mod
    ├── jsonnet-lint.sum
    ├── jsonnet.mod
    ├── jsonnet.sum
    ├── jsonnetfmt.mod
    ├── jsonnetfmt.sum
    ├── promtool.mod
    ├── promtool.sum
    ├── variables.env
    ├── yq.mod
    └── yq.sum
├── .circleci
    └── config.yml
├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .gitleaks.toml
├── CODEOWNERS
├── COPYRIGHT
├── LICENSE
├── Makefile
├── README.md
├── build_deploy.sh
├── configuration
    ├── observatorium
    │   ├── metric-federation-rules.libsonnet
    │   ├── queries-ruler.libsonnet
    │   ├── queries.libsonnet
    │   ├── rbac.go
    │   ├── ruler-remote-write.libsonnet
    │   ├── slo.go
    │   └── tenants.libsonnet
    ├── rhelemeter
    │   └── metrics.json
    ├── telemeter-rosa
    │   ├── README.md
    │   └── metrics.json
    └── telemeter
    │   └── metrics.json
├── crds
    ├── loki.grafana.com_alertingrules.libsonnet
    ├── loki.grafana.com_recordingrules.libsonnet
    └── observatorium-logs-crds-template.jsonnet
├── docs
    ├── observatorium-logs.png
    ├── observatorium.md
    ├── readme.md
    ├── rhelemeter.md
    ├── sop
    │   ├── observatorium.md
    │   ├── remote_write_load_shedding_sop.md
    │   └── tenant_removal_sop.md
    ├── telemeter.md
    └── telemeter.png
├── go.mod
├── go.sum
├── jsonnetfile.json
├── jsonnetfile.lock.json
├── lib
    └── k.libsonnet
├── loki-operational.json
├── magefiles
    ├── alertmanager.go
    ├── cache.go
    ├── gateway.go
    ├── lib.go
    ├── magefile.go
    ├── operator.go
    ├── secrets.go
    ├── servicemonitors.go
    ├── telemeter_rules.go
    ├── template.go
    └── thanos.go
├── mimic.go
├── observability
    ├── config.libsonnet
    ├── dashboards
    │   ├── observatorium-api-logs.libsonnet
    │   ├── observatorium-api.libsonnet
    │   ├── observatorium-gubernator.libsonnet
    │   ├── opentelemetry.libsonnet
    │   ├── rhobs-instance-utilization-overview.libsonnet
    │   ├── rules-objstore.libsonnet
    │   ├── slo.libsonnet
    │   ├── telemeter-canary.libsonnet
    │   ├── telemeter.libsonnet
    │   └── tracing.libsonnet
    ├── grafana-obs-logs.jsonnet
    ├── grafana.jsonnet
    ├── observatorium-logs
    │   ├── loki-overview.libsonnet
    │   └── loki-tenant-alerts.libsonnet
    ├── prometheus_rule_tests
    │   ├── observatorium-custom-metrics.prometheusrulestests.yaml
    │   ├── observatorium-tenants.prometheusrulestests.yaml
    │   ├── rhobs-slos-mst.prometheusrulestests.yaml
    │   └── rhobs-slos-telemeter.prometheusrulestests.yaml
    ├── prometheusrules.jsonnet
    └── utils.jsonnet
├── resources
    ├── observability
    │   ├── grafana
    │   │   ├── observatorium-logs
    │   │   │   ├── grafana-dashboards-rules-template.yaml
    │   │   │   └── grafana-dashboards-template.yaml
    │   │   └── observatorium
    │   │   │   ├── grafana-dashboard-observatorium-alertmanager-overview.configmap.yaml
    │   │   │   ├── grafana-dashboard-observatorium-api.configmap.yaml
    │   │   │   ├── grafana-dashboard-observatorium-gubernator.configmap.yaml
    │   │   │   ├── grafana-dashboard-observatorium-memcached-memcached-overview.configmap.yaml
    │   │   │   ├── grafana-dashboard-observatorium-thanos-compact.configmap.yaml
    │   │   │   ├── grafana-dashboard-observatorium-thanos-overview.configmap.yaml
    │   │   │   ├── grafana-dashboard-observatorium-thanos-query-frontend.configmap.yaml
    │   │   │   ├── grafana-dashboard-observatorium-thanos-query.configmap.yaml
    │   │   │   ├── grafana-dashboard-observatorium-thanos-receive-controller.configmap.yaml
    │   │   │   ├── grafana-dashboard-observatorium-thanos-receive.configmap.yaml
    │   │   │   ├── grafana-dashboard-observatorium-thanos-rule.configmap.yaml
    │   │   │   ├── grafana-dashboard-observatorium-thanos-store.configmap.yaml
    │   │   │   ├── grafana-dashboard-rhobs-instance-utilization-overview.configmap.yaml
    │   │   │   ├── grafana-dashboard-rules-objstore.configmap.yaml
    │   │   │   ├── grafana-dashboard-slo-mst-production.configmap.yaml
    │   │   │   ├── grafana-dashboard-slo-mst-stage.configmap.yaml
    │   │   │   ├── grafana-dashboard-slo-rhobsp02ue1-production.configmap.yaml
    │   │   │   ├── grafana-dashboard-slo-telemeter-production.configmap.yaml
    │   │   │   ├── grafana-dashboard-slo-telemeter-stage.configmap.yaml
    │   │   │   ├── grafana-dashboard-telemeter-canary.configmap.yaml
    │   │   │   ├── grafana-dashboard-telemeter.configmap.yaml
    │   │   │   ├── grafana-dashboard-tracing-jaeger.configmap.yaml
    │   │   │   └── grafana-dashboard-tracing-otel.configmap.yaml
    │   └── prometheusrules
    │   │   ├── observatorium-alertmanager-production.prometheusrules.yaml
    │   │   ├── observatorium-alertmanager-stage.prometheusrules.yaml
    │   │   ├── observatorium-custom-metrics-production.prometheusrules.yaml
    │   │   ├── observatorium-custom-metrics-stage.prometheusrules.yaml
    │   │   ├── observatorium-custom-metrics.prometheusrulestests.yaml
    │   │   ├── observatorium-gubernator-production.prometheusrules.yaml
    │   │   ├── observatorium-gubernator-stage.prometheusrules.yaml
    │   │   ├── observatorium-http-traffic-production.prometheusrules.yaml
    │   │   ├── observatorium-http-traffic-stage.prometheusrules.yaml
    │   │   ├── observatorium-obsctl-reloader-production.prometheusrules.yaml
    │   │   ├── observatorium-obsctl-reloader-stage.prometheusrules.yaml
    │   │   ├── observatorium-proactive-monitoring-production.prometheusrules.yaml
    │   │   ├── observatorium-proactive-monitoring-stage.prometheusrules.yaml
    │   │   ├── observatorium-tenants-production.prometheusrules.yaml
    │   │   ├── observatorium-tenants-stage.prometheusrules.yaml
    │   │   ├── observatorium-tenants.prometheusrulestests.yaml
    │   │   ├── observatorium-thanos-production.prometheusrules.yaml
    │   │   ├── observatorium-thanos-stage.prometheusrules.yaml
    │   │   ├── pyrra
    │   │       ├── mst-production-api-alerting-availability-slo.yaml
    │   │       ├── mst-production-api-alerting-notif-availability-slo.yaml
    │   │       ├── mst-production-api-logs-prom-tail-availability-slo.yaml
    │   │       ├── mst-production-api-logs-query-availability-slo.yaml
    │   │       ├── mst-production-api-logs-query-range-availability-slo.yaml
    │   │       ├── mst-production-api-logs-tail-availability-slo.yaml
    │   │       ├── mst-production-api-logs-write-availability-slo.yaml
    │   │       ├── mst-production-api-logs-write-latency-slo.yaml
    │   │       ├── mst-production-api-metrics-query-availability-slo.yaml
    │   │       ├── mst-production-api-metrics-query-range-availability-slo.yaml
    │   │       ├── mst-production-api-metrics-read-100M-latency-slo.yaml
    │   │       ├── mst-production-api-metrics-read-10M-latency-slo.yaml
    │   │       ├── mst-production-api-metrics-read-1M-latency-slo.yaml
    │   │       ├── mst-production-api-metrics-rule-query-availability-slo.yaml
    │   │       ├── mst-production-api-metrics-rule-read-100M-latency-slo.yaml
    │   │       ├── mst-production-api-metrics-rule-read-10M-latency-slo.yaml
    │   │       ├── mst-production-api-metrics-rule-read-1M-latency-slo.yaml
    │   │       ├── mst-production-api-metrics-write-availability-slo.yaml
    │   │       ├── mst-production-api-metrics-write-latency-slo.yaml
    │   │       ├── mst-production-api-rules-raw-read-availability-slo.yaml
    │   │       ├── mst-production-api-rules-raw-write-availability-slo.yaml
    │   │       ├── mst-production-api-rules-read-availability-slo.yaml
    │   │       ├── mst-production-api-rules-sync-availability-slo.yaml
    │   │       ├── mst-stage-api-alerting-availability-slo.yaml
    │   │       ├── mst-stage-api-alerting-notif-availability-slo.yaml
    │   │       ├── mst-stage-api-logs-prom-tail-availability-slo.yaml
    │   │       ├── mst-stage-api-logs-query-availability-slo.yaml
    │   │       ├── mst-stage-api-logs-query-range-availability-slo.yaml
    │   │       ├── mst-stage-api-logs-tail-availability-slo.yaml
    │   │       ├── mst-stage-api-logs-write-availability-slo.yaml
    │   │       ├── mst-stage-api-logs-write-latency-slo.yaml
    │   │       ├── mst-stage-api-metrics-query-availability-slo.yaml
    │   │       ├── mst-stage-api-metrics-query-range-availability-slo.yaml
    │   │       ├── mst-stage-api-metrics-read-100M-latency-slo.yaml
    │   │       ├── mst-stage-api-metrics-read-10M-latency-slo.yaml
    │   │       ├── mst-stage-api-metrics-read-1M-latency-slo.yaml
    │   │       ├── mst-stage-api-metrics-rule-query-availability-slo.yaml
    │   │       ├── mst-stage-api-metrics-rule-read-100M-latency-slo.yaml
    │   │       ├── mst-stage-api-metrics-rule-read-10M-latency-slo.yaml
    │   │       ├── mst-stage-api-metrics-rule-read-1M-latency-slo.yaml
    │   │       ├── mst-stage-api-metrics-write-availability-slo.yaml
    │   │       ├── mst-stage-api-metrics-write-latency-slo.yaml
    │   │       ├── mst-stage-api-rules-raw-read-availability-slo.yaml
    │   │       ├── mst-stage-api-rules-raw-write-availability-slo.yaml
    │   │       ├── mst-stage-api-rules-read-availability-slo.yaml
    │   │       ├── mst-stage-api-rules-sync-availability-slo.yaml
    │   │       ├── rhelemeter-production-rhobs-rhelemeter-server-metrics-receive-availability-slo.yaml
    │   │       ├── rhelemeter-production-rhobs-rhelemeter-server-metrics-receive-latency-slo.yaml
    │   │       ├── rhelemeter-stage-rhobs-rhelemeter-server-metrics-receive-availability-slo.yaml
    │   │       ├── rhelemeter-stage-rhobs-rhelemeter-server-metrics-receive-latency-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-alerting-availability-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-alerting-notif-availability-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-metrics-query-availability-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-metrics-query-range-availability-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-metrics-read-100M-latency-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-metrics-read-10M-latency-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-metrics-read-1M-latency-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-metrics-rule-query-availability-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-metrics-rule-read-100M-latency-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-metrics-rule-read-10M-latency-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-metrics-rule-read-1M-latency-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-metrics-write-availability-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-metrics-write-latency-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-rules-raw-read-availability-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-rules-raw-write-availability-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-rules-read-availability-slo.yaml
    │   │       ├── rhobsp02ue1-production-api-rules-sync-availability-slo.yaml
    │   │       ├── telemeter-production-api-alerting-availability-slo.yaml
    │   │       ├── telemeter-production-api-alerting-notif-availability-slo.yaml
    │   │       ├── telemeter-production-api-metrics-query-availability-slo.yaml
    │   │       ├── telemeter-production-api-metrics-query-range-availability-slo.yaml
    │   │       ├── telemeter-production-api-metrics-read-100M-latency-slo.yaml
    │   │       ├── telemeter-production-api-metrics-read-10M-latency-slo.yaml
    │   │       ├── telemeter-production-api-metrics-read-1M-latency-slo.yaml
    │   │       ├── telemeter-production-api-metrics-rule-query-availability-slo.yaml
    │   │       ├── telemeter-production-api-metrics-rule-read-100M-latency-slo.yaml
    │   │       ├── telemeter-production-api-metrics-rule-read-10M-latency-slo.yaml
    │   │       ├── telemeter-production-api-metrics-rule-read-1M-latency-slo.yaml
    │   │       ├── telemeter-production-api-metrics-write-availability-slo.yaml
    │   │       ├── telemeter-production-api-metrics-write-latency-slo.yaml
    │   │       ├── telemeter-production-api-rules-raw-read-availability-slo.yaml
    │   │       ├── telemeter-production-api-rules-raw-write-availability-slo.yaml
    │   │       ├── telemeter-production-api-rules-read-availability-slo.yaml
    │   │       ├── telemeter-production-api-rules-sync-availability-slo.yaml
    │   │       ├── telemeter-production-rhobs-telemeter-server-metrics-receive-availability-slo.yaml
    │   │       ├── telemeter-production-rhobs-telemeter-server-metrics-receive-latency-slo.yaml
    │   │       ├── telemeter-production-rhobs-telemeter-server-metrics-upload-availability-slo.yaml
    │   │       ├── telemeter-production-rhobs-telemeter-server-metrics-upload-latency-slo.yaml
    │   │       ├── telemeter-staging-api-alerting-availability-slo.yaml
    │   │       ├── telemeter-staging-api-alerting-notif-availability-slo.yaml
    │   │       ├── telemeter-staging-api-metrics-query-availability-slo.yaml
    │   │       ├── telemeter-staging-api-metrics-query-range-availability-slo.yaml
    │   │       ├── telemeter-staging-api-metrics-read-100M-latency-slo.yaml
    │   │       ├── telemeter-staging-api-metrics-read-10M-latency-slo.yaml
    │   │       ├── telemeter-staging-api-metrics-read-1M-latency-slo.yaml
    │   │       ├── telemeter-staging-api-metrics-rule-query-availability-slo.yaml
    │   │       ├── telemeter-staging-api-metrics-rule-read-100M-latency-slo.yaml
    │   │       ├── telemeter-staging-api-metrics-rule-read-10M-latency-slo.yaml
    │   │       ├── telemeter-staging-api-metrics-rule-read-1M-latency-slo.yaml
    │   │       ├── telemeter-staging-api-metrics-write-availability-slo.yaml
    │   │       ├── telemeter-staging-api-metrics-write-latency-slo.yaml
    │   │       ├── telemeter-staging-api-rules-raw-read-availability-slo.yaml
    │   │       ├── telemeter-staging-api-rules-raw-write-availability-slo.yaml
    │   │       ├── telemeter-staging-api-rules-read-availability-slo.yaml
    │   │       ├── telemeter-staging-api-rules-sync-availability-slo.yaml
    │   │       ├── telemeter-staging-rhobs-telemeter-server-metrics-receive-availability-slo.yaml
    │   │       ├── telemeter-staging-rhobs-telemeter-server-metrics-receive-latency-slo.yaml
    │   │       ├── telemeter-staging-rhobs-telemeter-server-metrics-upload-availability-slo.yaml
    │   │       └── telemeter-staging-rhobs-telemeter-server-metrics-upload-latency-slo.yaml
    │   │   ├── rhobs-logs-mst-production.prometheusrules.yaml
    │   │   ├── rhobs-logs-mst-stage.prometheusrules.yaml
    │   │   ├── rhobs-slos-mst-production.prometheusrules.yaml
    │   │   ├── rhobs-slos-mst-stage.prometheusrules.yaml
    │   │   ├── rhobs-slos-mst.prometheusrulestests.yaml
    │   │   ├── rhobs-slos-rhelemeter-production.prometheusrules.yaml
    │   │   ├── rhobs-slos-rhelemeter-stage.prometheusrules.yaml
    │   │   ├── rhobs-slos-telemeter-production.prometheusrules.yaml
    │   │   ├── rhobs-slos-telemeter-stage.prometheusrules.yaml
    │   │   └── rhobs-slos-telemeter.prometheusrulestests.yaml
    ├── operations
    │   ├── bucket-inspect
    │   │   ├── README.md
    │   │   ├── cron-job-template.yaml
    │   │   ├── job-template.yaml
    │   │   └── s3-secret-template.yaml
    │   ├── bucket-replicate
    │   │   ├── README.md
    │   │   ├── cron-job-template.yaml
    │   │   ├── job-template.yaml
    │   │   ├── monitoring-template.yaml
    │   │   └── s3-secret-template.yaml
    │   └── rclone-bucket-replicate
    │   │   ├── README.md
    │   │   ├── job-template.env
    │   │   ├── job-template.yaml
    │   │   ├── monitoring-template.yaml
    │   │   ├── rclone-config-template.yaml
    │   │   ├── s3-secret-template.env
    │   │   └── s3-secret-template.yaml
    └── services
    │   ├── alertmanager
    │       ├── production
    │       │   ├── alertmanager-template.yaml
    │       │   └── service-monitor-template.yaml
    │       └── staging
    │       │   ├── alertmanager-template.yaml
    │       │   └── service-monitor-template.yaml
    │   ├── bundle
    │       ├── local
    │       │   ├── operator.yaml
    │       │   └── thanos-operator-crds.yaml
    │       ├── production
    │       │   ├── operator.yaml
    │       │   └── thanos-operator-crds.yaml
    │       └── staging
    │       │   ├── operator.yaml
    │       │   └── thanos-operator-crds.yaml
    │   ├── memcached
    │       ├── production
    │       │   ├── memcached-template.yaml
    │       │   └── service-monitor-memcached-template.yaml
    │       └── staging
    │       │   ├── memcached-template.yaml
    │       │   └── service-monitor-memcached-template.yaml
    │   ├── meta-monitoring
    │       ├── logging-template.yaml
    │       ├── profiling-template.yaml
    │       └── tracing-template.yaml
    │   ├── metric-federation-rule-template.yaml
    │   ├── objstore
    │       ├── local
    │       │   ├── thanos-default-secret.yaml
    │       │   └── thanos-telemeter-secret.yaml
    │       ├── production
    │       │   ├── thanos-default-secret.yaml
    │       │   └── thanos-telemeter-secret.yaml
    │       ├── staging
    │       │   ├── thanos-default-secret.yaml
    │       │   └── thanos-telemeter-secret.yaml
    │       └── thanos-object-store-secret.yaml
    │   ├── observatorium-api
    │       ├── production
    │       │   ├── observatorium-api-template.yaml
    │       │   └── service-monitor-observatorium-api-template.yaml
    │       └── staging
    │       │   ├── observatorium-api-template.yaml
    │       │   └── service-monitor-observatorium-api-template.yaml
    │   ├── observatorium-logs-template.yaml
    │   ├── observatorium-metrics-template.yaml
    │   ├── observatorium-template.yaml
    │   ├── observatorium-tenants-template.yaml
    │   ├── observatorium-traces-subscriptions-template.yaml
    │   ├── observatorium-traces-template.yaml
    │   ├── redis
    │       └── staging
    │       │   └── cache.yaml
    │   ├── rhelemeter-template.yaml
    │   ├── rhobs-thanos-operator
    │       ├── local
    │       │   ├── rhobs.yaml
    │       │   └── telemeter-rules.yaml
    │       ├── production
    │       │   └── rhobs.yaml
    │       └── staging
    │       │   ├── rhobs.yaml
    │       │   └── telemeter-rules.yaml
    │   ├── servicemonitors
    │       ├── local
    │       │   └── servicemonitors.yaml
    │       ├── production
    │       │   └── servicemonitors.yaml
    │       └── staging
    │       │   └── servicemonitors.yaml
    │   └── telemeter-template.yaml
├── services
    ├── components
    │   └── loki-caches.libsonnet
    ├── dex-template.jsonnet
    ├── metric-federation-rule-template.jsonnet
    ├── minio-template.jsonnet
    ├── observatorium-logs-template-overwrites.libsonnet
    ├── observatorium-logs-template.jsonnet
    ├── observatorium-logs.libsonnet
    ├── observatorium-metrics-template-overwrites.libsonnet
    ├── observatorium-metrics-template.jsonnet
    ├── observatorium-metrics.libsonnet
    ├── observatorium-template.jsonnet
    ├── observatorium-tenants-template.jsonnet
    ├── observatorium-traces-subscriptions-template.jsonnet
    ├── observatorium-traces-subscriptions.libsonnet
    ├── observatorium-traces-template.jsonnet
    ├── observatorium-traces.libsonnet
    ├── observatorium.libsonnet
    ├── prometheus
    │   ├── remote-write-proxy.libsonnet
    │   └── remote_write_proxy.conf
    ├── rhelemeter-template.jsonnet
    ├── rhelemeter.libsonnet
    ├── sidecars
    │   ├── jaeger-agent.libsonnet
    │   ├── oauth-proxy.libsonnet
    │   ├── opa-ams.libsonnet
    │   └── thanos-rule-syncer.libsonnet
    ├── telemeter-template.jsonnet
    └── telemeter.libsonnet
├── services_go
    ├── instances
    │   └── rhobs
    │   │   └── rhobs.go
    ├── observatorium
    │   ├── api.go
    │   ├── assets
    │   │   └── store-auto-shard-relabel-configMap.sh
    │   ├── cache.go
    │   ├── encoders.go
    │   ├── helpers.go
    │   ├── metrics.go
    │   ├── observatorium.go
    │   └── sidecars.go
    └── services.go
├── synchronize.sh
└── tests
    ├── ci
        ├── README.md
        ├── ci_test.sh
        ├── env
        │   ├── dex.test.ci.env
        │   ├── logging.test.ci.env
        │   ├── minio.test.ci.env
        │   ├── observatorium-logs.test.ci.env
        │   ├── observatorium-metric-federation-rule.test.ci.env
        │   ├── observatorium-metrics.ci.env
        │   ├── observatorium-parca.test.ci.env
        │   ├── observatorium.test.ci.env
        │   ├── rhelemeter.test.ci.env
        │   └── telemeter.ci.env
        ├── manifests
        │   ├── observatorium-up-logs.yaml
        │   ├── observatorium-up-metrics.yaml
        │   ├── pre-requisites.yaml
        │   ├── rbac.yaml
        │   └── test-tenant.yaml
        └── rhobsci.png
    ├── deploy
        ├── README.md
        ├── env
        │   ├── logging.test.env
        │   ├── observatorium-jaeger.test.env
        │   ├── observatorium-logs.test.env
        │   ├── observatorium-metric-federation-rule.test.env
        │   ├── observatorium-metrics.test.env
        │   ├── observatorium-parca.test.env
        │   ├── observatorium.test.env
        │   ├── rhelemeter.test.env
        │   └── telemeter.test.env
        ├── launch.sh
        ├── manifests
        │   ├── clusterlogforwader.yaml
        │   ├── clusterlogging.yaml
        │   ├── dex-template.yaml
        │   ├── logging-operator.yaml
        │   ├── loki-operator.yaml
        │   ├── minio-template.yaml
        │   ├── observatorium-alertmanager-config-secret.yaml
        │   ├── observatorium-cluster-role-binding.yaml
        │   ├── observatorium-cluster-role.yaml
        │   ├── observatorium-logs-secret.yaml
        │   ├── observatorium-metrics-thanos-objectstorage-secret-template.yaml
        │   ├── observatorium-parca-secret.yaml
        │   ├── observatorium-rhobs-tenant-secret.yaml
        │   ├── observatorium-rules-objstore-secret.yaml
        │   ├── observatorium-service-account.yaml
        │   ├── observatorium-tools-network-policy.yaml
        │   ├── rhelemeter_certs
        │   │   ├── ca.crt
        │   │   ├── tls.crt
        │   │   └── tls.key
        │   └── telemeter-token-refersher-oidc-secret.yaml
        └── testdata
        │   └── client-info.json
    └── integration_tests
        ├── Dockerfile
        ├── README.md
        ├── build_deploy.sh
        ├── framework
            ├── .bingo
            │   ├── .gitignore
            │   ├── README.md
            │   ├── Variables.mk
            │   ├── go.mod
            │   ├── gojsontoyaml.mod
            │   ├── gojsontoyaml.sum
            │   ├── jsonnet-lint.mod
            │   ├── jsonnet-lint.sum
            │   ├── jsonnet.mod
            │   ├── jsonnet.sum
            │   ├── jsonnetfmt.mod
            │   ├── jsonnetfmt.sum
            │   └── variables.env
            ├── Dockerfile
            ├── Makefile
            ├── README.md
            ├── cmd
            │   └── rhobs-test
            │   │   └── main.go
            ├── examples
            │   └── manifests
            │   │   ├── dev
            │   │       ├── test-deployment-faulty.yaml
            │   │       ├── test-deployment.yaml
            │   │       ├── test-job.yaml
            │   │       └── test-rbac.yaml
            │   │   └── openshift
            │   │       ├── rhobs-rbac-template.yaml
            │   │       ├── rhobs-test-job-template.yaml
            │   │       ├── test-deployment-faulty-template.yaml
            │   │       └── test-deployment-template.yaml
            ├── go.mod
            ├── go.sum
            ├── integration-test.png
            ├── jsonnet
            │   ├── dev-manifests.jsonnet
            │   ├── job.libsonnet
            │   ├── ocp-manifests.jsonnet
            │   ├── rbac.libsonnet
            │   └── test-deployment.libsonnet
            └── pkg
            │   ├── client
            │       └── client.go
            │   ├── deployment
            │       ├── deployment.go
            │       └── deployment_test.go
            │   ├── logger
            │       └── logger.go
            │   ├── pod
            │       ├── pod.go
            │       └── pod_test.go
            │   ├── service
            │       ├── service.go
            │       └── service_test.go
            │   └── statefulset
            │       ├── statefulset.go
            │       └── statefulset_test.go
        ├── post-deploy-host-metering-job-template.yaml
        ├── post-deploy-logs-job-template.yaml
        ├── post-deploy-metrics-job-template.yaml
        └── runtest.sh


/.bingo/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | # Ignore everything
 3 | *
 4 | 
 5 | # But not these files:
 6 | !.gitignore
 7 | !*.mod
 8 | !*.sum
 9 | !README.md
10 | !Variables.mk
11 | !variables.env
12 | 
13 | *tmp.mod
14 | 


--------------------------------------------------------------------------------
/.bingo/README.md:
--------------------------------------------------------------------------------
 1 | # Project Development Dependencies.
 2 | 
 3 | This is directory which stores Go modules with pinned buildable package that is used within this repository, managed by https://github.com/bwplotka/bingo.
 4 | 
 5 | * Run `bingo get` to install all tools having each own module file in this directory.
 6 | * Run `bingo get <tool>` to install <tool> that have own module file in this directory.
 7 | * For Makefile: Make sure to put `include .bingo/Variables.mk` in your Makefile, then use $(<upper case tool name>) variable where <tool> is the .bingo/<tool>.mod.
 8 | * For shell: Run `source .bingo/variables.env` to source all environment variable for each tool.
 9 | * For go: Import `.bingo/variables.go` to for variable names.
10 | * See https://github.com/bwplotka/bingo or -h on how to add, remove or change binaries dependencies.
11 | 
12 | ## Requirements
13 | 
14 | * Go 1.14+
15 | 


--------------------------------------------------------------------------------
/.bingo/bingo.mod:
--------------------------------------------------------------------------------
1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
2 | 
3 | go 1.15
4 | 
5 | require github.com/bwplotka/bingo v0.8.0
6 | 


--------------------------------------------------------------------------------
/.bingo/faillint.mod:
--------------------------------------------------------------------------------
1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
2 | 
3 | go 1.19
4 | 
5 | require github.com/fatih/faillint v1.14.0
6 | 


--------------------------------------------------------------------------------
/.bingo/go.mod:
--------------------------------------------------------------------------------
1 | module _ // Fake go.mod auto-created by 'bingo' for go -moddir compatibility with non-Go projects. Commit this file, together with other .mod files.


--------------------------------------------------------------------------------
/.bingo/goimports.mod:
--------------------------------------------------------------------------------
1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
2 | 
3 | go 1.19
4 | 
5 | require golang.org/x/tools v0.9.3 // cmd/goimports
6 | 


--------------------------------------------------------------------------------
/.bingo/goimports.sum:
--------------------------------------------------------------------------------
 1 | golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8=
 2 | golang.org/x/mod v0.9.0 h1:KENHtAZL2y3NLMYZeHY9DW8HW8V+kQyJsY/V9JlKvCs=
 3 | golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk=
 4 | golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 5 | golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU=
 6 | golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
 7 | golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
 8 | golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 9 | golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM=
10 | golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
11 | golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4=
12 | golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s=
13 | golang.org/x/tools v0.9.3 h1:Gn1I8+64MsuTb/HpH+LmQtNas23LhUVr3rYZ0eKuaMM=
14 | golang.org/x/tools v0.9.3/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc=
15 | 


--------------------------------------------------------------------------------
/.bingo/gojq.mod:
--------------------------------------------------------------------------------
1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
2 | 
3 | go 1.15
4 | 
5 | require github.com/itchyny/gojq v0.12.12 // cmd/gojq
6 | 


--------------------------------------------------------------------------------
/.bingo/gojsontoyaml.mod:
--------------------------------------------------------------------------------
1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
2 | 
3 | go 1.14
4 | 
5 | require github.com/brancz/gojsontoyaml v0.0.0-20200602132005-3697ded27e8c
6 | 


--------------------------------------------------------------------------------
/.bingo/gojsontoyaml.sum:
--------------------------------------------------------------------------------
1 | github.com/brancz/gojsontoyaml v0.0.0-20200602132005-3697ded27e8c h1:hb6WqfcKQZlNx/vahy51SaIvKnoXD5609Nm0PC4msEM=
2 | github.com/brancz/gojsontoyaml v0.0.0-20200602132005-3697ded27e8c/go.mod h1:+00lOjYXPgMfxHVPvg9GDtc3BX5Xh5aFpB4gMB8gfMo=
3 | github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
4 | github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
5 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
6 | gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
7 | gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
8 | 


--------------------------------------------------------------------------------
/.bingo/golangci-lint.mod:
--------------------------------------------------------------------------------
1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
2 | 
3 | go 1.22.9
4 | 
5 | toolchain go1.23.1
6 | 
7 | require github.com/golangci/golangci-lint v1.63.4 // cmd/golangci-lint
8 | 


--------------------------------------------------------------------------------
/.bingo/jb.mod:
--------------------------------------------------------------------------------
1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
2 | 
3 | go 1.15
4 | 
5 | require github.com/jsonnet-bundler/jsonnet-bundler v0.5.1 // cmd/jb
6 | 


--------------------------------------------------------------------------------
/.bingo/jsonnet-deps.mod:
--------------------------------------------------------------------------------
1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
2 | 
3 | go 1.15
4 | 
5 | require github.com/google/go-jsonnet v0.19.1 // cmd/jsonnet-deps
6 | 


--------------------------------------------------------------------------------
/.bingo/jsonnet-lint.mod:
--------------------------------------------------------------------------------
1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
2 | 
3 | go 1.16
4 | 
5 | require github.com/google/go-jsonnet v0.19.1 // cmd/jsonnet-lint
6 | 


--------------------------------------------------------------------------------
/.bingo/jsonnet.mod:
--------------------------------------------------------------------------------
1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
2 | 
3 | go 1.15
4 | 
5 | require github.com/google/go-jsonnet v0.19.1 // cmd/jsonnet
6 | 


--------------------------------------------------------------------------------
/.bingo/jsonnetfmt.mod:
--------------------------------------------------------------------------------
1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
2 | 
3 | go 1.15
4 | 
5 | require github.com/google/go-jsonnet v0.19.1 // cmd/jsonnetfmt
6 | 


--------------------------------------------------------------------------------
/.bingo/promtool.mod:
--------------------------------------------------------------------------------
 1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
 2 | 
 3 | go 1.14
 4 | 
 5 | replace k8s.io/klog => github.com/simonpasquier/klog-gokit v0.3.0
 6 | 
 7 | replace k8s.io/klog/v2 => github.com/simonpasquier/klog-gokit/v3 v3.0.0
 8 | 
 9 | exclude github.com/linode/linodego v1.0.0
10 | 
11 | exclude github.com/grpc-ecosystem/grpc-gateway v1.14.7
12 | 
13 | exclude google.golang.org/api v0.30.0
14 | 
15 | require github.com/prometheus/prometheus v0.43.0 // cmd/promtool
16 | 


--------------------------------------------------------------------------------
/.bingo/variables.env:
--------------------------------------------------------------------------------
 1 | # Auto generated binary variables helper managed by https://github.com/bwplotka/bingo v0.9. DO NOT EDIT.
 2 | # All tools are designed to be build inside $GOBIN.
 3 | # Those variables will work only until 'bingo get' was invoked, or if tools were installed via Makefile's Variables.mk.
 4 | GOBIN=${GOBIN:=$(go env GOBIN)}
 5 | 
 6 | if [ -z "$GOBIN" ]; then
 7 | 	GOBIN="$(go env GOPATH)/bin"
 8 | fi
 9 | 
10 | 
11 | BINGO="${GOBIN}/bingo-v0.8.0"
12 | 
13 | FAILLINT="${GOBIN}/faillint-v1.14.0"
14 | 
15 | GOIMPORTS="${GOBIN}/goimports-v0.9.3"
16 | 
17 | GOJQ="${GOBIN}/gojq-v0.12.12"
18 | 
19 | GOJSONTOYAML="${GOBIN}/gojsontoyaml-v0.0.0-20200602132005-3697ded27e8c"
20 | 
21 | GOLANGCI_LINT="${GOBIN}/golangci-lint-v1.63.4"
22 | 
23 | JB="${GOBIN}/jb-v0.5.1"
24 | 
25 | JSONNET_DEPS="${GOBIN}/jsonnet-deps-v0.19.1"
26 | 
27 | JSONNET_LINT="${GOBIN}/jsonnet-lint-v0.19.1"
28 | 
29 | JSONNET="${GOBIN}/jsonnet-v0.19.1"
30 | 
31 | JSONNETFMT="${GOBIN}/jsonnetfmt-v0.19.1"
32 | 
33 | PROMTOOL="${GOBIN}/promtool-v0.43.0"
34 | 
35 | YQ="${GOBIN}/yq-v4.33.1"
36 | 
37 | 


--------------------------------------------------------------------------------
/.bingo/yq.mod:
--------------------------------------------------------------------------------
1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
2 | 
3 | go 1.15
4 | 
5 | require github.com/mikefarah/yq/v4 v4.33.1
6 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 |   workflow_dispatch:
 9 | 
10 | jobs:
11 |   build:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@v2
15 |     - name: Set up Go
16 |       uses: actions/setup-go@v2
17 |       with:
18 |         go-version: 1.23
19 |     - name: Vendor
20 |       run: make vendor_jsonnet
21 |     - name: Build
22 |       run: make grafana manifests prometheusrules
23 |     - name: Format
24 |       run: make format
25 |     - name: Lint
26 |       run: make lint
27 |     - name: Validate
28 |       run: make validate
29 |     - name: Diff
30 |       run: git diff --exit-code
31 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | vendor/
 2 | vendor_jsonnet/
 3 | .bin
 4 | .idea
 5 | .envrc
 6 | resources/.tmp/
 7 | tmp/
 8 | .fleet
 9 | .vscode
10 | .DS_Store
11 | 


--------------------------------------------------------------------------------
/.gitleaks.toml:
--------------------------------------------------------------------------------
1 | title = "gitleaks config"
2 | [allowlist]
3 | paths=[
4 |     '''dex-template.jsonnet''',
5 |     '''observatorium-template.yaml''',
6 |     '''dex-template.yaml''',
7 | ]


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | - bwplotka
2 | - kakkoyun
3 | - squat
4 | - onprem
5 | - spaparaju


--------------------------------------------------------------------------------
/COPYRIGHT:
--------------------------------------------------------------------------------
1 | Copyright (c) The Red Hat Monitoring Team
2 | Licensed under the Apache License 2.0.


--------------------------------------------------------------------------------
/build_deploy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash	
2 | # This is a placeholder file needed by the CI/CD templates	
3 | # You can safely ignore this file :)	
4 | exit 0


--------------------------------------------------------------------------------
/configuration/observatorium/metric-federation-rules.libsonnet:
--------------------------------------------------------------------------------
 1 | {
 2 |   prometheus+:: {
 3 |     recordingrules+: {
 4 |       groups+: [
 5 |         {
 6 |           name: 'rhacs.rules',
 7 |           interval: '1m',
 8 |           rules: [
 9 |             {
10 |               record: 'rhacs:rox_central_cluster_metrics_cpu_capacity:avg_over_time1h',
11 |               expr: |||
12 |                 rhacs:rox_central_cluster_metrics_cpu_capacity:avg_over_time1h
13 |               |||,
14 |             },
15 |           ],
16 |         },
17 |       ],
18 |     },
19 |   },
20 | }
21 | 


--------------------------------------------------------------------------------
/configuration/observatorium/queries-ruler.libsonnet:
--------------------------------------------------------------------------------
 1 | {
 2 |   queries: [
 3 |     {
 4 |       name: 'rule-query-path-sli-1M-samples',
 5 |       query: 'avg_over_time(avalanche_metric_mmmmm_0_0{tenant_id="0fc2b00e-201b-4c17-b9f2-19d91adc4fd2"}[1h])',
 6 |     },
 7 |     {
 8 |       name: 'rule-query-path-sli-10M-samples',
 9 |       query: 'avg_over_time(avalanche_metric_mmmmm_0_0{tenant_id="0fc2b00e-201b-4c17-b9f2-19d91adc4fd2"}[10h])',
10 |     },
11 |     {
12 |       name: 'rule-query-path-sli-100M-samples',
13 |       query: 'avg_over_time(avalanche_metric_mmmmm_0_0{tenant_id="0fc2b00e-201b-4c17-b9f2-19d91adc4fd2"}[100h])',
14 |     },
15 |   ],
16 | }
17 | 


--------------------------------------------------------------------------------
/configuration/observatorium/queries.libsonnet:
--------------------------------------------------------------------------------
 1 | {
 2 |   queries: [
 3 |     {
 4 |       name: 'query-path-sli-1M-samples',
 5 |       query: 'avg_over_time(avalanche_metric_mmmmm_0_0{tenant_id="0fc2b00e-201b-4c17-b9f2-19d91adc4fd2"}[1h])',
 6 |     },
 7 |     {
 8 |       name: 'query-path-sli-10M-samples',
 9 |       query: 'avg_over_time(avalanche_metric_mmmmm_0_0{tenant_id="0fc2b00e-201b-4c17-b9f2-19d91adc4fd2"}[10h])',
10 |     },
11 |     {
12 |       name: 'query-path-sli-100M-samples',
13 |       query: 'avg_over_time(avalanche_metric_mmmmm_0_0{tenant_id="0fc2b00e-201b-4c17-b9f2-19d91adc4fd2"}[100h])',
14 |     },
15 |   ],
16 | }
17 | 


--------------------------------------------------------------------------------
/configuration/observatorium/tenants.libsonnet:
--------------------------------------------------------------------------------
 1 | {
 2 |   tenants: [
 3 |     {
 4 |       name: 'rhobs',
 5 |       id: '770c1124-6ae8-4324-a9d4-9ce08590094b',
 6 |       oidc: {
 7 |         clientID: 'test',
 8 |         clientSecret: 'ZXhhbXBsZS1hcHAtc2VjcmV0',
 9 |         issuerURL: 'http://dex.dex.svc.cluster.local:5556/dex',
10 |         usernameClaim: 'email',
11 |       },
12 |     },
13 |     {
14 |       name: 'telemeter',
15 |       id: 'FB870BF3-9F3A-44FF-9BF7-D7A047A52F43',
16 |       oidc: {
17 |         clientID: 'test',
18 |         clientSecret: 'ZXhhbXBsZS1hcHAtc2VjcmV0',
19 |         issuerURL: 'http://dex.dex.svc.cluster.local:5556/dex',
20 |         usernameClaim: 'email',
21 |       },
22 |     },
23 |   ],
24 |   // Collect all tenants in a map for convenient access.
25 |   map:: {
26 |     [tenant.name]: tenant
27 |     for tenant in self.tenants
28 |   },
29 | }
30 | 


--------------------------------------------------------------------------------
/configuration/rhelemeter/metrics.json:
--------------------------------------------------------------------------------
1 | [
2 |   "{__name__=\"system_cpu_logical_count\"}"
3 | ]
4 | 


--------------------------------------------------------------------------------
/configuration/telemeter-rosa/README.md:
--------------------------------------------------------------------------------
1 | # ROSA HCP Billing Metrics
2 | There are a set of ROSA metrics that are federated from OBO into `telemeter-staging`, `telemeter-int` and `telemeter-prod`. These metrics are used for Subwatch billing of ROSA clusters via telemetry.   
3 | 
4 | The remote-write config can be found [here](https://gitlab.cee.redhat.com/service/osd-fleet-manager/-/blob/main/config/resources/managed-cluster-monitoring-stack.yaml). *Do not modify* without express approval from the ROSA team in the #sd-rosa-hcp channel.


--------------------------------------------------------------------------------
/configuration/telemeter-rosa/metrics.json:
--------------------------------------------------------------------------------
1 | [
2 |   "{__name__=\"hostedcluster:hypershift_cluster_vcpus:max\"}"
3 | ]
4 | 


--------------------------------------------------------------------------------
/crds/observatorium-logs-crds-template.jsonnet:
--------------------------------------------------------------------------------
 1 | local ar = (import 'loki.grafana.com_alertingrules.libsonnet');
 2 | local rr = (import 'loki.grafana.com_recordingrules.libsonnet');
 3 | 
 4 | {
 5 |   local clusterRole = {
 6 |     apiVersion: 'rbac.authorization.k8s.io/v1',
 7 |     kind: 'ClusterRole',
 8 |     metadata: {
 9 |       name: 'observatorium-logs-edit',
10 |       labels: {
11 |         'managed.openshift.io/aggregate-to-dedicated-admins': 'cluster',
12 |       },
13 |     },
14 |     rules: [
15 |       {
16 |         apiGroups: ['loki.grafana.com'],
17 |         resources: ['alertingrules', 'recordingrules'],
18 |         verbs: ['create', 'update', 'delete', 'patch', 'get', 'list', 'watch'],
19 |       },
20 |     ],
21 |   },
22 | 
23 |   local withServedV1Beta1 = function(crd) crd {
24 |     spec+: {
25 |       conversion:: {},
26 |       versions: [
27 |         v + (if v.name == 'v1beta1' then {
28 |                served: true,
29 |              } else {})
30 |         for v in super.versions
31 |       ],
32 |     },
33 |   },
34 | 
35 |   apiVersion: 'template.openshift.io/v1',
36 |   kind: 'Template',
37 |   metadata: {
38 |     name: 'observatorium-logs-crds',
39 |   },
40 |   objects: [
41 |     withServedV1Beta1(ar),
42 |     withServedV1Beta1(rr),
43 |     clusterRole,
44 |   ],
45 | }
46 | 


--------------------------------------------------------------------------------
/docs/observatorium-logs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rhobs/configuration/6d2b136e48c291c1a415394fed40219188620665/docs/observatorium-logs.png


--------------------------------------------------------------------------------
/docs/readme.md:
--------------------------------------------------------------------------------
 1 | # Observatorium
 2 | 
 3 | ## Table of Contents
 4 | 
 5 | - [Observatorium](./observatorium.md)
 6 | - [Observatorium Logs](./observatorium.md#observatorium-logs)
 7 | - [Observatorium Metrics](./observatorium.md#observatorium-metrics)
 8 | - [Telemeter](./telemeter.md)
 9 | - [Rhelemeter](./rhelemeter.md)
10 | 


--------------------------------------------------------------------------------
/docs/rhelemeter.md:
--------------------------------------------------------------------------------
1 | # rhelemeter
2 | 
3 | Rhelemeter is a specialized instance of [Telemeter](telemeter.md). While Telemeter receives metrics from OCP clusters, Rhelemeter 
4 | receives metrics from RHEL hosts.
5 | 
6 | The source code and more details can be found at [its upstream repository](https://github.com/openshift/telemeter/tree/master/cmd/rhelemeter-server).
7 | 


--------------------------------------------------------------------------------
/docs/telemeter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rhobs/configuration/6d2b136e48c291c1a415394fed40219188620665/docs/telemeter.png


--------------------------------------------------------------------------------
/lib/k.libsonnet:
--------------------------------------------------------------------------------
1 | (import 'github.com/jsonnet-libs/k8s-libsonnet/1.26/main.libsonnet')
2 | 


--------------------------------------------------------------------------------
/mimic.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"github.com/bwplotka/mimic"
 5 | 	cfgobservatorium "github.com/rhobs/configuration/configuration/observatorium"
 6 | )
 7 | 
 8 | func main() {
 9 | 	gen := mimic.New()
10 | 
11 | 	defer gen.Generate()
12 | 
13 | 	cfgobservatorium.GenSLO(gen.With("observability", "prometheusrules", "pyrra"), gen.With("observability", "prometheusrules"))
14 | 
15 | 	cfgobservatorium.GenerateRBACFile(gen.With(".tmp", "tenants"))
16 | 
17 | }
18 | 


--------------------------------------------------------------------------------
/observability/observatorium-logs/loki-tenant-alerts.libsonnet:
--------------------------------------------------------------------------------
 1 | function(namespace) {
 2 |   prometheusAlerts+:: {
 3 |     groups+: [
 4 |       {
 5 |         name: 'loki_tenant_alerts',
 6 |         rules: [
 7 |           {
 8 |             alert: 'LokiTenantRateLimitWarning',
 9 |             expr: |||
10 |               sum by (namespace, tenant, reason) (sum_over_time(rate(loki_discarded_samples_total{namespace="%s"}[1m])[30m:1m]))
11 |               > 100
12 |             ||| % namespace,
13 |             'for': '15m',
14 |             labels: {
15 |               severity: 'medium',
16 |             },
17 |             annotations: {
18 |               message: |||
19 |                 {{ $labels.tenant }} is experiencing rate limiting for reason '{{ $labels.reason }}': {{ printf "%.0f" $value }}
20 |               |||,
21 |             },
22 |           },
23 |         ],
24 |       },
25 |     ],
26 |   },
27 | }
28 | 


--------------------------------------------------------------------------------
/observability/utils.jsonnet:
--------------------------------------------------------------------------------
1 | {
2 |   instanceNamespace(name, metricsNamespace, upNamespace): if name == 'telemeter' then metricsNamespace else upNamespace,
3 |   instance_name_filter: '/^rhobs.*|telemeter-prod-01-prometheus|app-sre-stage-01-prometheus/',
4 | }
5 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/observatorium-gubernator-production.prometheusrules.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | $schema: /openshift/prometheus-rule-1.yml
 3 | apiVersion: monitoring.coreos.com/v1
 4 | kind: PrometheusRule
 5 | metadata:
 6 |   labels:
 7 |     prometheus: app-sre
 8 |     role: alert-rules
 9 |   name: observatorium-gubernator-production
10 | spec:
11 |   groups:
12 |   - name: gubernator-absent
13 |     rules:
14 |     - alert: gubernatorIsDown
15 |       annotations:
16 |         dashboard: https://grafana.app-sre.devshift.net/d/no-dashboard/gubernator-absent?orgId=1&refresh=10s&var-datasource={{$externalLabels.cluster}}-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
17 |         message: gubernator has disappeared from Prometheus target discovery.
18 |         runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#gubernatorisdown
19 |       expr: |
20 |         absent(up{job="observatorium-gubernator"} == 1)
21 |       for: 5m
22 |       labels:
23 |         service: telemeter
24 |         severity: critical
25 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/observatorium-gubernator-stage.prometheusrules.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | $schema: /openshift/prometheus-rule-1.yml
 3 | apiVersion: monitoring.coreos.com/v1
 4 | kind: PrometheusRule
 5 | metadata:
 6 |   labels:
 7 |     prometheus: app-sre
 8 |     role: alert-rules
 9 |   name: observatorium-gubernator-stage
10 | spec:
11 |   groups:
12 |   - name: gubernator-absent
13 |     rules:
14 |     - alert: gubernatorIsDown
15 |       annotations:
16 |         dashboard: https://grafana.app-sre.devshift.net/d/no-dashboard/gubernator-absent?orgId=1&refresh=10s&var-datasource={{$externalLabels.cluster}}-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
17 |         message: gubernator has disappeared from Prometheus target discovery.
18 |         runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#gubernatorisdown
19 |       expr: |
20 |         absent(up{job="observatorium-gubernator"} == 1)
21 |       for: 5m
22 |       labels:
23 |         service: telemeter
24 |         severity: high
25 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/observatorium-http-traffic-production.prometheusrules.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | $schema: /openshift/prometheus-rule-1.yml
 3 | apiVersion: monitoring.coreos.com/v1
 4 | kind: PrometheusRule
 5 | metadata:
 6 |   labels:
 7 |     prometheus: app-sre
 8 |     role: alert-rules
 9 |   name: observatorium-http-traffic-production
10 | spec:
11 |   groups:
12 |   - name: observatorium-http-traffic
13 |     rules:
14 |     - alert: ObservatoriumHttpTrafficErrorRateHigh
15 |       annotations:
16 |         dashboard: https://grafana.app-sre.devshift.net/d/no-dashboard/observatorium-http-traffic?orgId=1&refresh=10s&var-datasource={{$externalLabels.cluster}}-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
17 |         message: Observatorium route  {{$labels.route}}  are failing to handle {{$value | humanize}}% of requests.
18 |         runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#observatoriumhttptrafficerrorratehigh
19 |       expr: |
20 |         (sum by (route) (rate(haproxy_backend_http_responses_total{route=~"observatorium.*|telemeter.*|infogw.*", code="5xx"} [5m])) / sum by (route) (rate(haproxy_backend_http_responses_total{route=~"observatorium.*|telemeter.*|infogw.*"}[5m]))) * 100 > 25
21 |       labels:
22 |         service: telemeter
23 |         severity: medium
24 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/observatorium-http-traffic-stage.prometheusrules.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | $schema: /openshift/prometheus-rule-1.yml
 3 | apiVersion: monitoring.coreos.com/v1
 4 | kind: PrometheusRule
 5 | metadata:
 6 |   labels:
 7 |     prometheus: app-sre
 8 |     role: alert-rules
 9 |   name: observatorium-http-traffic-stage
10 | spec:
11 |   groups:
12 |   - name: observatorium-http-traffic
13 |     rules:
14 |     - alert: ObservatoriumHttpTrafficErrorRateHigh
15 |       annotations:
16 |         dashboard: https://grafana.app-sre.devshift.net/d/no-dashboard/observatorium-http-traffic?orgId=1&refresh=10s&var-datasource={{$externalLabels.cluster}}-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
17 |         message: Observatorium route  {{$labels.route}}  are failing to handle {{$value | humanize}}% of requests.
18 |         runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#observatoriumhttptrafficerrorratehigh
19 |       expr: |
20 |         (sum by (route) (rate(haproxy_backend_http_responses_total{route=~"observatorium.*|telemeter.*|infogw.*", code="5xx"} [5m])) / sum by (route) (rate(haproxy_backend_http_responses_total{route=~"observatorium.*|telemeter.*|infogw.*"}[5m]))) * 100 > 25
21 |       labels:
22 |         service: telemeter
23 |         severity: medium
24 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/observatorium-proactive-monitoring-production.prometheusrules.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | $schema: /openshift/prometheus-rule-1.yml
 3 | apiVersion: monitoring.coreos.com/v1
 4 | kind: PrometheusRule
 5 | metadata:
 6 |   labels:
 7 |     prometheus: app-sre
 8 |     role: alert-rules
 9 |   name: observatorium-proactive-monitoring-production
10 | spec:
11 |   groups:
12 |   - name: observatorium-proactive-monitoring
13 |     rules:
14 |     - alert: ObservatoriumProActiveMetricsQueryErrorRateHigh
15 |       annotations:
16 |         dashboard: https://grafana.app-sre.devshift.net/d/no-dashboard/observatorium-proactive-monitoring?orgId=1&refresh=10s&var-datasource={{$externalLabels.cluster}}-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
17 |         message: Observatorium metric queries {{$labels.job}} in {{$labels.namespace}} are failing to handle {{$value | humanize}}% of requests.
18 |         runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#observatoriumproactivemetricsqueryerrorratehigh
19 |       expr: |
20 |         ( sum by (namespace, job, query) (rate(up_custom_query_errors_total[5m])) / sum by (namespace, job, query) (rate(up_custom_query_executed_total[5m]))) * 100 > 25
21 |       labels:
22 |         service: telemeter
23 |         severity: medium
24 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/observatorium-proactive-monitoring-stage.prometheusrules.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | $schema: /openshift/prometheus-rule-1.yml
 3 | apiVersion: monitoring.coreos.com/v1
 4 | kind: PrometheusRule
 5 | metadata:
 6 |   labels:
 7 |     prometheus: app-sre
 8 |     role: alert-rules
 9 |   name: observatorium-proactive-monitoring-stage
10 | spec:
11 |   groups:
12 |   - name: observatorium-proactive-monitoring
13 |     rules:
14 |     - alert: ObservatoriumProActiveMetricsQueryErrorRateHigh
15 |       annotations:
16 |         dashboard: https://grafana.app-sre.devshift.net/d/no-dashboard/observatorium-proactive-monitoring?orgId=1&refresh=10s&var-datasource={{$externalLabels.cluster}}-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
17 |         message: Observatorium metric queries {{$labels.job}} in {{$labels.namespace}} are failing to handle {{$value | humanize}}% of requests.
18 |         runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#observatoriumproactivemetricsqueryerrorratehigh
19 |       expr: |
20 |         ( sum by (namespace, job, query) (rate(up_custom_query_errors_total[5m])) / sum by (namespace, job, query) (rate(up_custom_query_executed_total[5m]))) * 100 > 25
21 |       labels:
22 |         service: telemeter
23 |         severity: medium
24 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-alerting-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API Thanos Rule failing to send alerts to Alertmanager and
 7 |       is burning too much error budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIAlertmanagerAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-alerting-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIAlertmanagerAvailabilityErrorBudgetBurning
17 |   description: API Thanos Rule failing to send alerts to Alertmanager and is burning
18 |     too much error budget to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: thanos_alert_sender_alerts_dropped_total{container="thanos-rule",
23 |           namespace="observatorium-mst-production", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: thanos_alert_sender_alerts_dropped_total{container="thanos-rule",
27 |           namespace="observatorium-mst-production"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-logs-prom-tail-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API logs /prom_tail is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APILogsPromTailAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     service: observatorium-api
13 |   name: api-logs-prom-tail-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APILogsPromTailAvailabilityErrorBudgetBurning
17 |   description: API logs /prom_tail is burning too much error budget to guarantee availability
18 |     SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="prom_tail",
23 |           group="logsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="prom_tail",
27 |           group="logsv1"}
28 |   target: "95"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-logs-query-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API logs /query, /labels, or /label_values handler is burning
 7 |       too much error budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APILogsQueryAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     service: observatorium-api
13 |   name: api-logs-query-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APILogsQueryAvailabilityErrorBudgetBurning
17 |   description: API logs /query, /labels, or /label_values handler is burning too much
18 |     error budget to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler=~"query|label|labels|label_values",
23 |           group="logsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler=~"query|label|labels|label_values",
27 |           group="logsv1"}
28 |   target: "95"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-logs-query-range-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API logs /query_range handler is burning too much error budget
 7 |       to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APILogsQueryRangeAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     service: observatorium-api
13 |   name: api-logs-query-range-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APILogsQueryRangeAvailabilityErrorBudgetBurning
17 |   description: API logs /query_range handler is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="query_range",
23 |           group="logsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="query_range",
27 |           group="logsv1"}
28 |   target: "95"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-logs-tail-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API logs /tail is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APILogsTailAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     service: observatorium-api
13 |   name: api-logs-tail-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APILogsTailAvailabilityErrorBudgetBurning
17 |   description: API logs /tail is burning too much error budget to guarantee availability
18 |     SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="tail",
23 |           group="logsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="tail",
27 |           group="logsv1"}
28 |   target: "95"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-logs-write-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API logs /push handler is burning too much error budget to
 7 |       guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APILogsPushAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     service: observatorium-api
13 |   name: api-logs-write-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APILogsPushAvailabilityErrorBudgetBurning
17 |   description: API logs /push handler is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="push",
23 |           group="logsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="push",
27 |           group="logsv1"}
28 |   target: "95"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-logs-write-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /push handler is burning too much error budget to guarantee
 7 |       latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APILogsPushLatencyErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     service: observatorium-api
13 |   name: api-logs-write-latency-slo
14 | spec:
15 |   alerting:
16 |     name: APILogsPushLatencyErrorBudgetBurning
17 |   description: API /push handler is burning too much error budget to guarantee latency
18 |     SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: http_request_duration_seconds_bucket{job="observatorium-observatorium-mst-api",
24 |           handler="push", group="logsv1", code=~"^2..$", le="5"}
25 |       total:
26 |         metric: http_request_duration_seconds_count{job="observatorium-observatorium-mst-api",
27 |           handler="push", group="logsv1", code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-metrics-query-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query handler is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsQueryAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-query-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsQueryAvailabilityErrorBudgetBurning
17 |   description: API /query handler is burning too much error budget to guarantee availability
18 |     SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="query",
23 |           group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="query",
27 |           group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-metrics-query-range-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query_range handler is burning too much error budget to
 7 |       guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsQueryRangeAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-query-range-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsQueryRangeAvailabilityErrorBudgetBurning
17 |   description: API /query_range handler is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="query_range",
23 |           group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="query_range",
27 |           group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-metrics-read-100M-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query endpoint is burning too much error budget for 100M
 7 |       samples, to guarantee latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsReadLatency100MErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-read-100M-latency-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsReadLatency100MErrorBudgetBurning
17 |   description: API /query endpoint is burning too much error budget for 100M samples,
18 |     to guarantee latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: up_custom_query_duration_seconds_bucket{query="query-path-sli-1M-samples",
24 |           namespace="observatorium-mst-production", http_code=~"^2..$", le="120"}
25 |       total:
26 |         metric: up_custom_query_duration_seconds_count{query="query-path-sli-1M-samples",
27 |           namespace="observatorium-mst-production", http_code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-metrics-read-1M-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query endpoint is burning too much error budget for 1M
 7 |       samples, to guarantee latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsReadLatency1MErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-read-1M-latency-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsReadLatency1MErrorBudgetBurning
17 |   description: API /query endpoint is burning too much error budget for 1M samples,
18 |     to guarantee latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: up_custom_query_duration_seconds_bucket{query="query-path-sli-1M-samples",
24 |           namespace="observatorium-mst-production", http_code=~"^2..$", le="10"}
25 |       total:
26 |         metric: up_custom_query_duration_seconds_count{query="query-path-sli-1M-samples",
27 |           namespace="observatorium-mst-production", http_code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-metrics-rule-query-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query handler endpoint for rules evaluation is burning
 7 |       too much error budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsRulerQueryAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-rule-query-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsRulerQueryAvailabilityErrorBudgetBurning
17 |   description: API /query handler endpoint for rules evaluation is burning too much
18 |     error budget to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-ruler-query", handler="query",
23 |           code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-ruler-query", handler="query"}
27 |   target: "99"
28 |   window: 28d
29 | status: {}
30 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-metrics-write-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /receive handler is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsWriteAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-write-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsWriteAvailabilityErrorBudgetBurning
17 |   description: API /receive handler is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="receive",
23 |           group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="receive",
27 |           group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-metrics-write-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /receive handler is burning too much error budget to guarantee
 7 |       latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsWriteLatencyErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-write-latency-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsWriteLatencyErrorBudgetBurning
17 |   description: API /receive handler is burning too much error budget to guarantee
18 |     latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: http_request_duration_seconds_bucket{job="observatorium-observatorium-mst-api",
24 |           handler="receive", group="metricsv1", code=~"^2..$", le="5"}
25 |       total:
26 |         metric: http_request_duration_seconds_count{job="observatorium-observatorium-mst-api",
27 |           handler="receive", group="metricsv1", code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-rules-raw-read-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /rules/raw endpoint for reads is burning too much error
 7 |       budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawReadAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-raw-read-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesRawReadAvailabilityErrorBudgetBurning
17 |   description: API /rules/raw endpoint for reads is burning too much error budget
18 |     to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="rules-raw",
23 |           method="GET", group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="rules-raw",
27 |           method="GET", group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-rules-raw-write-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /rules/raw endpoint for writes is burning too much error
 7 |       budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawWriteAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-raw-write-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesRawWriteAvailabilityErrorBudgetBurning
17 |   description: API /rules/raw endpoint for writes is burning too much error budget
18 |     to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="rules-raw",
23 |           method="PUT", group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="rules-raw",
27 |           method="PUT", group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-rules-read-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /rules endpoint is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesReadAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-read-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesReadAvailabilityErrorBudgetBurning
17 |   description: API /rules endpoint is burning too much error budget to guarantee availability
18 |     SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="rules",
23 |           method="GET", group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="rules",
27 |           method="GET", group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-production-api-rules-sync-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/283e7002d85c08126681241df2fdb22b/mst-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Thanos Ruler /reload endpoint is burning too much error budget
 7 |       to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesSyncAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-sync-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesSyncAvailabilityErrorBudgetBurning
17 |   description: Thanos Ruler /reload endpoint is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: client_api_requests_total{client="reload", container="thanos-rule-syncer",
23 |           namespace="observatorium-mst-production", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: client_api_requests_total{client="reload", container="thanos-rule-syncer",
27 |           namespace="observatorium-mst-production"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-alerting-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API Thanos Rule failing to send alerts to Alertmanager and
 7 |       is burning too much error budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIAlertmanagerAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-alerting-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIAlertmanagerAvailabilityErrorBudgetBurning
17 |   description: API Thanos Rule failing to send alerts to Alertmanager and is burning
18 |     too much error budget to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: thanos_alert_sender_alerts_dropped_total{container="thanos-rule",
23 |           namespace="observatorium-mst-stage", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: thanos_alert_sender_alerts_dropped_total{container="thanos-rule",
27 |           namespace="observatorium-mst-stage"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-logs-prom-tail-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API logs /prom_tail is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APILogsPromTailAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     service: observatorium-api
13 |   name: api-logs-prom-tail-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APILogsPromTailAvailabilityErrorBudgetBurning
17 |   description: API logs /prom_tail is burning too much error budget to guarantee availability
18 |     SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="prom_tail",
23 |           group="logsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="prom_tail",
27 |           group="logsv1"}
28 |   target: "95"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-logs-query-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API logs /query, /labels, or /label_values handler is burning
 7 |       too much error budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APILogsQueryAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     service: observatorium-api
13 |   name: api-logs-query-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APILogsQueryAvailabilityErrorBudgetBurning
17 |   description: API logs /query, /labels, or /label_values handler is burning too much
18 |     error budget to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler=~"query|label|labels|label_values",
23 |           group="logsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler=~"query|label|labels|label_values",
27 |           group="logsv1"}
28 |   target: "95"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-logs-query-range-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API logs /query_range handler is burning too much error budget
 7 |       to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APILogsQueryRangeAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     service: observatorium-api
13 |   name: api-logs-query-range-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APILogsQueryRangeAvailabilityErrorBudgetBurning
17 |   description: API logs /query_range handler is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="query_range",
23 |           group="logsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="query_range",
27 |           group="logsv1"}
28 |   target: "95"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-logs-tail-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API logs /tail is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APILogsTailAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     service: observatorium-api
13 |   name: api-logs-tail-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APILogsTailAvailabilityErrorBudgetBurning
17 |   description: API logs /tail is burning too much error budget to guarantee availability
18 |     SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="tail",
23 |           group="logsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="tail",
27 |           group="logsv1"}
28 |   target: "95"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-logs-write-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API logs /push handler is burning too much error budget to
 7 |       guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APILogsPushAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     service: observatorium-api
13 |   name: api-logs-write-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APILogsPushAvailabilityErrorBudgetBurning
17 |   description: API logs /push handler is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="push",
23 |           group="logsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="push",
27 |           group="logsv1"}
28 |   target: "95"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-logs-write-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /push handler is burning too much error budget to guarantee
 7 |       latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APILogsPushLatencyErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     service: observatorium-api
13 |   name: api-logs-write-latency-slo
14 | spec:
15 |   alerting:
16 |     name: APILogsPushLatencyErrorBudgetBurning
17 |   description: API /push handler is burning too much error budget to guarantee latency
18 |     SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: http_request_duration_seconds_bucket{job="observatorium-observatorium-mst-api",
24 |           handler="push", group="logsv1", code=~"^2..$", le="5"}
25 |       total:
26 |         metric: http_request_duration_seconds_count{job="observatorium-observatorium-mst-api",
27 |           handler="push", group="logsv1", code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-metrics-query-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query handler is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsQueryAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-query-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsQueryAvailabilityErrorBudgetBurning
17 |   description: API /query handler is burning too much error budget to guarantee availability
18 |     SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="query",
23 |           group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="query",
27 |           group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-metrics-query-range-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query_range handler is burning too much error budget to
 7 |       guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsQueryRangeAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-query-range-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsQueryRangeAvailabilityErrorBudgetBurning
17 |   description: API /query_range handler is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="query_range",
23 |           group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="query_range",
27 |           group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-metrics-read-100M-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query endpoint is burning too much error budget for 100M
 7 |       samples, to guarantee latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsReadLatency100MErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-read-100M-latency-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsReadLatency100MErrorBudgetBurning
17 |   description: API /query endpoint is burning too much error budget for 100M samples,
18 |     to guarantee latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: up_custom_query_duration_seconds_bucket{query="query-path-sli-1M-samples",
24 |           namespace="observatorium-mst-stage", http_code=~"^2..$", le="120"}
25 |       total:
26 |         metric: up_custom_query_duration_seconds_count{query="query-path-sli-1M-samples",
27 |           namespace="observatorium-mst-stage", http_code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-metrics-read-10M-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query endpoint is burning too much error budget for 100M
 7 |       samples, to guarantee latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsReadLatency10MErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-read-10M-latency-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsReadLatency10MErrorBudgetBurning
17 |   description: API /query endpoint is burning too much error budget for 100M samples,
18 |     to guarantee latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: up_custom_query_duration_seconds_bucket{query="query-path-sli-10M-samples",
24 |           namespace="observatorium-mst-stage", http_code=~"^2..$", le="30"}
25 |       total:
26 |         metric: up_custom_query_duration_seconds_count{query="query-path-sli-10M-samples",
27 |           namespace="observatorium-mst-stage", http_code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-metrics-read-1M-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query endpoint is burning too much error budget for 1M
 7 |       samples, to guarantee latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsReadLatency1MErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-read-1M-latency-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsReadLatency1MErrorBudgetBurning
17 |   description: API /query endpoint is burning too much error budget for 1M samples,
18 |     to guarantee latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: up_custom_query_duration_seconds_bucket{query="query-path-sli-1M-samples",
24 |           namespace="observatorium-mst-stage", http_code=~"^2..$", le="10"}
25 |       total:
26 |         metric: up_custom_query_duration_seconds_count{query="query-path-sli-1M-samples",
27 |           namespace="observatorium-mst-stage", http_code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-metrics-rule-query-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query handler endpoint for rules evaluation is burning
 7 |       too much error budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsRulerQueryAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-rule-query-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsRulerQueryAvailabilityErrorBudgetBurning
17 |   description: API /query handler endpoint for rules evaluation is burning too much
18 |     error budget to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-ruler-query", handler="query",
23 |           code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-ruler-query", handler="query"}
27 |   target: "99"
28 |   window: 28d
29 | status: {}
30 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-metrics-write-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /receive handler is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsWriteAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-write-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsWriteAvailabilityErrorBudgetBurning
17 |   description: API /receive handler is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="receive",
23 |           group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="receive",
27 |           group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-metrics-write-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /receive handler is burning too much error budget to guarantee
 7 |       latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsWriteLatencyErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-write-latency-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsWriteLatencyErrorBudgetBurning
17 |   description: API /receive handler is burning too much error budget to guarantee
18 |     latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: http_request_duration_seconds_bucket{job="observatorium-observatorium-mst-api",
24 |           handler="receive", group="metricsv1", code=~"^2..$", le="5"}
25 |       total:
26 |         metric: http_request_duration_seconds_count{job="observatorium-observatorium-mst-api",
27 |           handler="receive", group="metricsv1", code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-rules-raw-read-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /rules/raw endpoint for reads is burning too much error
 7 |       budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawReadAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-raw-read-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesRawReadAvailabilityErrorBudgetBurning
17 |   description: API /rules/raw endpoint for reads is burning too much error budget
18 |     to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="rules-raw",
23 |           method="GET", group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="rules-raw",
27 |           method="GET", group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-rules-raw-write-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /rules/raw endpoint for writes is burning too much error
 7 |       budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawWriteAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-raw-write-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesRawWriteAvailabilityErrorBudgetBurning
17 |   description: API /rules/raw endpoint for writes is burning too much error budget
18 |     to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="rules-raw",
23 |           method="PUT", group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="rules-raw",
27 |           method="PUT", group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-rules-read-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /rules endpoint is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesReadAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-read-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesReadAvailabilityErrorBudgetBurning
17 |   description: API /rules endpoint is burning too much error budget to guarantee availability
18 |     SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="rules",
23 |           method="GET", group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="rules",
27 |           method="GET", group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/mst-stage-api-rules-sync-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/92520ea4d6976f30d1618164e186ef9b/mst-stage-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Thanos Ruler /reload endpoint is burning too much error budget
 7 |       to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesSyncAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: mst-stage
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-sync-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesSyncAvailabilityErrorBudgetBurning
17 |   description: Thanos Ruler /reload endpoint is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: client_api_requests_total{client="reload", container="thanos-rule-syncer",
23 |           namespace="observatorium-mst-stage", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: client_api_requests_total{client="reload", container="thanos-rule-syncer",
27 |           namespace="observatorium-mst-stage"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/rhelemeter-production-rhobs-rhelemeter-server-metrics-receive-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d//rhelemeter-production-slos?orgId=1&refresh=10s&var-datasource=&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Rhelemeter Server /receive is burning too much error budget
 7 |       to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#RhelemeterServerMetricsReceiveWriteAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     pyrra.dev/service: telemeter
12 |     route: rhelemeter-server-receive
13 |   name: rhobs-rhelemeter-server-metrics-receive-availability-slo
14 | spec:
15 |   alerting:
16 |     name: RhelemeterServerMetricsReceiveWriteAvailabilityErrorBudgetBurning
17 |   description: Rhelemeter Server /receive is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive",
23 |           code=~"5.."}
24 |       grouping: null
25 |       total:
26 |         metric: haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}
27 |   target: "99"
28 |   window: 28d
29 | status: {}
30 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/rhelemeter-production-rhobs-rhelemeter-server-metrics-receive-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d//rhelemeter-production-slos?orgId=1&refresh=10s&var-datasource=&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Rhelemeter Server /receive is burning too much error budget
 7 |       to guarantee latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#RhelemeterServerMetricsReceiveWriteLatencyErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     pyrra.dev/service: telemeter
12 |     route: rhelemeter-server-receive
13 |   name: rhobs-rhelemeter-server-metrics-receive-latency-slo
14 | spec:
15 |   alerting:
16 |     name: RhelemeterServerMetricsReceiveWriteLatencyErrorBudgetBurning
17 |   description: Rhelemeter Server /receive is burning too much error budget to guarantee
18 |     latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: http_request_duration_seconds_bucket{job="rhelemeter-server", handler="receive",
24 |           code=~"^2..$", le="5"}
25 |       total:
26 |         metric: http_request_duration_seconds_count{job="rhelemeter-server", handler="receive",
27 |           code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/rhelemeter-stage-rhobs-rhelemeter-server-metrics-receive-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d//rhelemeter-stage-slos?orgId=1&refresh=10s&var-datasource=&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Rhelemeter Server /receive is burning too much error budget
 7 |       to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#RhelemeterServerMetricsReceiveWriteAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     pyrra.dev/service: telemeter
12 |     route: rhelemeter-server-receive
13 |   name: rhobs-rhelemeter-server-metrics-receive-availability-slo
14 | spec:
15 |   alerting:
16 |     name: RhelemeterServerMetricsReceiveWriteAvailabilityErrorBudgetBurning
17 |   description: Rhelemeter Server /receive is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive",
23 |           code=~"5.."}
24 |       grouping: null
25 |       total:
26 |         metric: haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}
27 |   target: "99"
28 |   window: 28d
29 | status: {}
30 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/rhelemeter-stage-rhobs-rhelemeter-server-metrics-receive-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d//rhelemeter-stage-slos?orgId=1&refresh=10s&var-datasource=&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Rhelemeter Server /receive is burning too much error budget
 7 |       to guarantee latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#RhelemeterServerMetricsReceiveWriteLatencyErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     pyrra.dev/service: telemeter
12 |     route: rhelemeter-server-receive
13 |   name: rhobs-rhelemeter-server-metrics-receive-latency-slo
14 | spec:
15 |   alerting:
16 |     name: RhelemeterServerMetricsReceiveWriteLatencyErrorBudgetBurning
17 |   description: Rhelemeter Server /receive is burning too much error budget to guarantee
18 |     latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: http_request_duration_seconds_bucket{job="rhelemeter-server", handler="receive",
24 |           code=~"^2..$", le="5"}
25 |       total:
26 |         metric: http_request_duration_seconds_count{job="rhelemeter-server", handler="receive",
27 |           code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/rhobsp02ue1-production-api-metrics-query-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/7f4df1c2d5518d5c3f2876ca9bb874a8/rhobsp02ue1-production-slos?orgId=1&refresh=10s&var-datasource=rhobsp02ue1-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query handler is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsQueryAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: rhobsp02ue1-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-query-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsQueryAvailabilityErrorBudgetBurning
17 |   description: API /query handler is burning too much error budget to guarantee availability
18 |     SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="query",
23 |           group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="query",
27 |           group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/rhobsp02ue1-production-api-metrics-query-range-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/7f4df1c2d5518d5c3f2876ca9bb874a8/rhobsp02ue1-production-slos?orgId=1&refresh=10s&var-datasource=rhobsp02ue1-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query_range handler is burning too much error budget to
 7 |       guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsQueryRangeAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: rhobsp02ue1-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-query-range-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsQueryRangeAvailabilityErrorBudgetBurning
17 |   description: API /query_range handler is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="query_range",
23 |           group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="query_range",
27 |           group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/rhobsp02ue1-production-api-metrics-rule-query-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/7f4df1c2d5518d5c3f2876ca9bb874a8/rhobsp02ue1-production-slos?orgId=1&refresh=10s&var-datasource=rhobsp02ue1-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query handler endpoint for rules evaluation is burning
 7 |       too much error budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsRulerQueryAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: rhobsp02ue1-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-rule-query-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsRulerQueryAvailabilityErrorBudgetBurning
17 |   description: API /query handler endpoint for rules evaluation is burning too much
18 |     error budget to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-ruler-query", handler="query",
23 |           code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-ruler-query", handler="query"}
27 |   target: "99"
28 |   window: 28d
29 | status: {}
30 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/rhobsp02ue1-production-api-metrics-write-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/7f4df1c2d5518d5c3f2876ca9bb874a8/rhobsp02ue1-production-slos?orgId=1&refresh=10s&var-datasource=rhobsp02ue1-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /receive handler is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsWriteAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: rhobsp02ue1-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-write-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsWriteAvailabilityErrorBudgetBurning
17 |   description: API /receive handler is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="receive",
23 |           group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="receive",
27 |           group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/rhobsp02ue1-production-api-metrics-write-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/7f4df1c2d5518d5c3f2876ca9bb874a8/rhobsp02ue1-production-slos?orgId=1&refresh=10s&var-datasource=rhobsp02ue1-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /receive handler is burning too much error budget to guarantee
 7 |       latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsWriteLatencyErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: rhobsp02ue1-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-write-latency-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsWriteLatencyErrorBudgetBurning
17 |   description: API /receive handler is burning too much error budget to guarantee
18 |     latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: http_request_duration_seconds_bucket{job="observatorium-observatorium-mst-api",
24 |           handler="receive", group="metricsv1", code=~"^2..$", le="5"}
25 |       total:
26 |         metric: http_request_duration_seconds_count{job="observatorium-observatorium-mst-api",
27 |           handler="receive", group="metricsv1", code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/rhobsp02ue1-production-api-rules-raw-read-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/7f4df1c2d5518d5c3f2876ca9bb874a8/rhobsp02ue1-production-slos?orgId=1&refresh=10s&var-datasource=rhobsp02ue1-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /rules/raw endpoint for reads is burning too much error
 7 |       budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawReadAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: rhobsp02ue1-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-raw-read-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesRawReadAvailabilityErrorBudgetBurning
17 |   description: API /rules/raw endpoint for reads is burning too much error budget
18 |     to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="rules-raw",
23 |           method="GET", group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="rules-raw",
27 |           method="GET", group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/rhobsp02ue1-production-api-rules-read-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/7f4df1c2d5518d5c3f2876ca9bb874a8/rhobsp02ue1-production-slos?orgId=1&refresh=10s&var-datasource=rhobsp02ue1-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /rules endpoint is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesReadAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: rhobsp02ue1-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-read-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesReadAvailabilityErrorBudgetBurning
17 |   description: API /rules endpoint is burning too much error budget to guarantee availability
18 |     SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="rules",
23 |           method="GET", group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-mst-api", handler="rules",
27 |           method="GET", group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/rhobsp02ue1-production-api-rules-sync-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/7f4df1c2d5518d5c3f2876ca9bb874a8/rhobsp02ue1-production-slos?orgId=1&refresh=10s&var-datasource=rhobsp02ue1-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Thanos Ruler /reload endpoint is burning too much error budget
 7 |       to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesSyncAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: rhobsp02ue1-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-sync-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesSyncAvailabilityErrorBudgetBurning
17 |   description: Thanos Ruler /reload endpoint is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: client_api_requests_total{client="reload", container="thanos-rule-syncer",
23 |           namespace="observatorium-mst-production", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: client_api_requests_total{client="reload", container="thanos-rule-syncer",
27 |           namespace="observatorium-mst-production"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-production-api-metrics-query-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/f9fa7677fb4a2669f123f9a0f2234b47/telemeter-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query handler is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsQueryAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-query-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsQueryAvailabilityErrorBudgetBurning
17 |   description: API /query handler is burning too much error budget to guarantee availability
18 |     SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="query",
23 |           group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="query",
27 |           group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-production-api-metrics-query-range-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/f9fa7677fb4a2669f123f9a0f2234b47/telemeter-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query_range handler is burning too much error budget to
 7 |       guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsQueryRangeAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-query-range-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsQueryRangeAvailabilityErrorBudgetBurning
17 |   description: API /query_range handler is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="query_range",
23 |           group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="query_range",
27 |           group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-production-api-metrics-rule-query-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/f9fa7677fb4a2669f123f9a0f2234b47/telemeter-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query handler endpoint for rules evaluation is burning
 7 |       too much error budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsRulerQueryAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-rule-query-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsRulerQueryAvailabilityErrorBudgetBurning
17 |   description: API /query handler endpoint for rules evaluation is burning too much
18 |     error budget to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-ruler-query", handler="query",
23 |           code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-ruler-query", handler="query"}
27 |   target: "99"
28 |   window: 28d
29 | status: {}
30 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-production-api-metrics-write-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/f9fa7677fb4a2669f123f9a0f2234b47/telemeter-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /receive handler is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsWriteAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-write-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsWriteAvailabilityErrorBudgetBurning
17 |   description: API /receive handler is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="receive",
23 |           group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="receive",
27 |           group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-production-api-metrics-write-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/f9fa7677fb4a2669f123f9a0f2234b47/telemeter-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /receive handler is burning too much error budget to guarantee
 7 |       latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsWriteLatencyErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-write-latency-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsWriteLatencyErrorBudgetBurning
17 |   description: API /receive handler is burning too much error budget to guarantee
18 |     latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: http_request_duration_seconds_bucket{job="observatorium-observatorium-api",
24 |           handler="receive", group="metricsv1", code=~"^2..$", le="5"}
25 |       total:
26 |         metric: http_request_duration_seconds_count{job="observatorium-observatorium-api",
27 |           handler="receive", group="metricsv1", code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-production-api-rules-raw-read-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/f9fa7677fb4a2669f123f9a0f2234b47/telemeter-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /rules/raw endpoint for reads is burning too much error
 7 |       budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawReadAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-raw-read-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesRawReadAvailabilityErrorBudgetBurning
17 |   description: API /rules/raw endpoint for reads is burning too much error budget
18 |     to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="rules-raw",
23 |           method="GET", group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="rules-raw",
27 |           method="GET", group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-production-api-rules-raw-write-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/f9fa7677fb4a2669f123f9a0f2234b47/telemeter-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /rules/raw endpoint for writes is burning too much error
 7 |       budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawWriteAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-raw-write-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesRawWriteAvailabilityErrorBudgetBurning
17 |   description: API /rules/raw endpoint for writes is burning too much error budget
18 |     to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="rules-raw",
23 |           method="PUT", group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="rules-raw",
27 |           method="PUT", group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-production-api-rules-read-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/f9fa7677fb4a2669f123f9a0f2234b47/telemeter-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /rules endpoint is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesReadAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-read-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesReadAvailabilityErrorBudgetBurning
17 |   description: API /rules endpoint is burning too much error budget to guarantee availability
18 |     SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="rules",
23 |           method="GET", group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="rules",
27 |           method="GET", group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-production-api-rules-sync-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/f9fa7677fb4a2669f123f9a0f2234b47/telemeter-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Thanos Ruler /reload endpoint is burning too much error budget
 7 |       to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesSyncAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-production
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-sync-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesSyncAvailabilityErrorBudgetBurning
17 |   description: Thanos Ruler /reload endpoint is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: client_api_requests_total{client="reload", container="thanos-rule-syncer",
23 |           namespace="observatorium-metrics-production", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: client_api_requests_total{client="reload", container="thanos-rule-syncer",
27 |           namespace="observatorium-metrics-production"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-production-rhobs-telemeter-server-metrics-receive-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/f9fa7677fb4a2669f123f9a0f2234b47/telemeter-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Telemeter Server /receive is burning too much error budget
 7 |       to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#TelemeterServerMetricsReceiveWriteAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     pyrra.dev/service: telemeter
12 |     route: telemeter-server-receive
13 |   name: rhobs-telemeter-server-metrics-receive-availability-slo
14 | spec:
15 |   alerting:
16 |     name: TelemeterServerMetricsReceiveWriteAvailabilityErrorBudgetBurning
17 |   description: Telemeter Server /receive is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: haproxy_server_http_responses_total{route="telemeter-server-metrics-v1-receive",
23 |           code=~"5.."}
24 |       grouping: null
25 |       total:
26 |         metric: haproxy_server_http_responses_total{route="telemeter-server-metrics-v1-receive"}
27 |   target: "99"
28 |   window: 28d
29 | status: {}
30 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-production-rhobs-telemeter-server-metrics-receive-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/f9fa7677fb4a2669f123f9a0f2234b47/telemeter-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Telemeter Server /receive is burning too much error budget
 7 |       to guarantee latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#TelemeterServerMetricsReceiveWriteLatencyErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     pyrra.dev/service: telemeter
12 |     route: telemeter-server-receive
13 |   name: rhobs-telemeter-server-metrics-receive-latency-slo
14 | spec:
15 |   alerting:
16 |     name: TelemeterServerMetricsReceiveWriteLatencyErrorBudgetBurning
17 |   description: Telemeter Server /receive is burning too much error budget to guarantee
18 |     latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: http_request_duration_seconds_bucket{job="telemeter-server", handler="receive",
24 |           code=~"^2..$", le="5"}
25 |       total:
26 |         metric: http_request_duration_seconds_count{job="telemeter-server", handler="receive",
27 |           code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-production-rhobs-telemeter-server-metrics-upload-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/f9fa7677fb4a2669f123f9a0f2234b47/telemeter-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Telemeter Server /upload is burning too much error budget to
 7 |       guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#TelemeterServerMetricsUploadWriteAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     pyrra.dev/service: telemeter
12 |     route: telemeter-server-upload
13 |   name: rhobs-telemeter-server-metrics-upload-availability-slo
14 | spec:
15 |   alerting:
16 |     name: TelemeterServerMetricsUploadWriteAvailabilityErrorBudgetBurning
17 |   description: Telemeter Server /upload is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: haproxy_server_http_responses_total{route="telemeter-server-upload",
23 |           code=~"5.."}
24 |       grouping: null
25 |       total:
26 |         metric: haproxy_server_http_responses_total{route="telemeter-server-upload"}
27 |   target: "99"
28 |   window: 28d
29 | status: {}
30 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-production-rhobs-telemeter-server-metrics-upload-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/f9fa7677fb4a2669f123f9a0f2234b47/telemeter-production-slos?orgId=1&refresh=10s&var-datasource=telemeter-prod-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Telemeter Server /upload is burning too much error budget to
 7 |       guarantee latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#TelemeterServerMetricsUploadWriteLatencyErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     pyrra.dev/service: telemeter
12 |     route: telemeter-server-upload
13 |   name: rhobs-telemeter-server-metrics-upload-latency-slo
14 | spec:
15 |   alerting:
16 |     name: TelemeterServerMetricsUploadWriteLatencyErrorBudgetBurning
17 |   description: Telemeter Server /upload is burning too much error budget to guarantee
18 |     latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: http_request_duration_seconds_bucket{job="telemeter-server", handler="upload",
24 |           code=~"^2..$", le="5"}
25 |       total:
26 |         metric: http_request_duration_seconds_count{job="telemeter-server", handler="upload",
27 |           code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-staging-api-metrics-query-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/080e53f245a15445bdf777ae0e66945d/telemeter-staging-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query handler is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsQueryAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-staging
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-query-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsQueryAvailabilityErrorBudgetBurning
17 |   description: API /query handler is burning too much error budget to guarantee availability
18 |     SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="query",
23 |           group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="query",
27 |           group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-staging-api-metrics-query-range-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/080e53f245a15445bdf777ae0e66945d/telemeter-staging-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query_range handler is burning too much error budget to
 7 |       guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsQueryRangeAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-staging
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-query-range-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsQueryRangeAvailabilityErrorBudgetBurning
17 |   description: API /query_range handler is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="query_range",
23 |           group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="query_range",
27 |           group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-staging-api-metrics-read-100M-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/080e53f245a15445bdf777ae0e66945d/telemeter-staging-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query endpoint is burning too much error budget for 100M
 7 |       samples, to guarantee latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsReadLatency100MErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-staging
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-read-100M-latency-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsReadLatency100MErrorBudgetBurning
17 |   description: API /query endpoint is burning too much error budget for 100M samples,
18 |     to guarantee latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: up_custom_query_duration_seconds_bucket{query="query-path-sli-1M-samples",
24 |           namespace="observatorium-stage", http_code=~"^2..$", le="120"}
25 |       total:
26 |         metric: up_custom_query_duration_seconds_count{query="query-path-sli-1M-samples",
27 |           namespace="observatorium-stage", http_code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-staging-api-metrics-read-10M-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/080e53f245a15445bdf777ae0e66945d/telemeter-staging-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query endpoint is burning too much error budget for 100M
 7 |       samples, to guarantee latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsReadLatency10MErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-staging
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-read-10M-latency-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsReadLatency10MErrorBudgetBurning
17 |   description: API /query endpoint is burning too much error budget for 100M samples,
18 |     to guarantee latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: up_custom_query_duration_seconds_bucket{query="query-path-sli-10M-samples",
24 |           namespace="observatorium-stage", http_code=~"^2..$", le="30"}
25 |       total:
26 |         metric: up_custom_query_duration_seconds_count{query="query-path-sli-10M-samples",
27 |           namespace="observatorium-stage", http_code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-staging-api-metrics-read-1M-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/080e53f245a15445bdf777ae0e66945d/telemeter-staging-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query endpoint is burning too much error budget for 1M
 7 |       samples, to guarantee latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsReadLatency1MErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-staging
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-read-1M-latency-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsReadLatency1MErrorBudgetBurning
17 |   description: API /query endpoint is burning too much error budget for 1M samples,
18 |     to guarantee latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: up_custom_query_duration_seconds_bucket{query="query-path-sli-1M-samples",
24 |           namespace="observatorium-stage", http_code=~"^2..$", le="10"}
25 |       total:
26 |         metric: up_custom_query_duration_seconds_count{query="query-path-sli-1M-samples",
27 |           namespace="observatorium-stage", http_code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-staging-api-metrics-rule-query-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/080e53f245a15445bdf777ae0e66945d/telemeter-staging-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /query handler endpoint for rules evaluation is burning
 7 |       too much error budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsRulerQueryAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-staging
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-rule-query-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsRulerQueryAvailabilityErrorBudgetBurning
17 |   description: API /query handler endpoint for rules evaluation is burning too much
18 |     error budget to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-ruler-query", handler="query",
23 |           code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-ruler-query", handler="query"}
27 |   target: "99"
28 |   window: 28d
29 | status: {}
30 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-staging-api-metrics-write-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/080e53f245a15445bdf777ae0e66945d/telemeter-staging-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /receive handler is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsWriteAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-staging
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-write-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsWriteAvailabilityErrorBudgetBurning
17 |   description: API /receive handler is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="receive",
23 |           group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="receive",
27 |           group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-staging-api-metrics-write-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/080e53f245a15445bdf777ae0e66945d/telemeter-staging-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /receive handler is burning too much error budget to guarantee
 7 |       latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIMetricsWriteLatencyErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-staging
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-metrics-write-latency-slo
14 | spec:
15 |   alerting:
16 |     name: APIMetricsWriteLatencyErrorBudgetBurning
17 |   description: API /receive handler is burning too much error budget to guarantee
18 |     latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: http_request_duration_seconds_bucket{job="observatorium-observatorium-api",
24 |           handler="receive", group="metricsv1", code=~"^2..$", le="5"}
25 |       total:
26 |         metric: http_request_duration_seconds_count{job="observatorium-observatorium-api",
27 |           handler="receive", group="metricsv1", code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-staging-api-rules-raw-read-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/080e53f245a15445bdf777ae0e66945d/telemeter-staging-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /rules/raw endpoint for reads is burning too much error
 7 |       budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawReadAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-staging
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-raw-read-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesRawReadAvailabilityErrorBudgetBurning
17 |   description: API /rules/raw endpoint for reads is burning too much error budget
18 |     to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="rules-raw",
23 |           method="GET", group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="rules-raw",
27 |           method="GET", group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-staging-api-rules-raw-write-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/080e53f245a15445bdf777ae0e66945d/telemeter-staging-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /rules/raw endpoint for writes is burning too much error
 7 |       budget to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesRawWriteAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-staging
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-raw-write-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesRawWriteAvailabilityErrorBudgetBurning
17 |   description: API /rules/raw endpoint for writes is burning too much error budget
18 |     to guarantee availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="rules-raw",
23 |           method="PUT", group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="rules-raw",
27 |           method="PUT", group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-staging-api-rules-read-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/080e53f245a15445bdf777ae0e66945d/telemeter-staging-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: API /rules endpoint is burning too much error budget to guarantee
 7 |       availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesReadAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-staging
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-read-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesReadAvailabilityErrorBudgetBurning
17 |   description: API /rules endpoint is burning too much error budget to guarantee availability
18 |     SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="rules",
23 |           method="GET", group="metricsv1", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: http_requests_total{job="observatorium-observatorium-api", handler="rules",
27 |           method="GET", group="metricsv1"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-staging-api-rules-sync-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/080e53f245a15445bdf777ae0e66945d/telemeter-staging-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Thanos Ruler /reload endpoint is burning too much error budget
 7 |       to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#APIRulesSyncAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     instance: telemeter-staging
12 |     pyrra.dev/service: observatorium-api
13 |   name: api-rules-sync-availability-slo
14 | spec:
15 |   alerting:
16 |     name: APIRulesSyncAvailabilityErrorBudgetBurning
17 |   description: Thanos Ruler /reload endpoint is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: client_api_requests_total{client="reload", container="thanos-rule-syncer",
23 |           namespace="observatorium-metrics-stage", code=~"^5..$"}
24 |       grouping: null
25 |       total:
26 |         metric: client_api_requests_total{client="reload", container="thanos-rule-syncer",
27 |           namespace="observatorium-metrics-stage"}
28 |   target: "99"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-staging-rhobs-telemeter-server-metrics-receive-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/080e53f245a15445bdf777ae0e66945d/telemeter-staging-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Telemeter Server /receive is burning too much error budget
 7 |       to guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#TelemeterServerMetricsReceiveWriteAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     pyrra.dev/service: telemeter
12 |     route: telemeter-server-receive
13 |   name: rhobs-telemeter-server-metrics-receive-availability-slo
14 | spec:
15 |   alerting:
16 |     name: TelemeterServerMetricsReceiveWriteAvailabilityErrorBudgetBurning
17 |   description: Telemeter Server /receive is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: haproxy_server_http_responses_total{route="telemeter-server-metrics-v1-receive",
23 |           code=~"5.."}
24 |       grouping: null
25 |       total:
26 |         metric: haproxy_server_http_responses_total{route="telemeter-server-metrics-v1-receive"}
27 |   target: "99"
28 |   window: 28d
29 | status: {}
30 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-staging-rhobs-telemeter-server-metrics-receive-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/080e53f245a15445bdf777ae0e66945d/telemeter-staging-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Telemeter Server /receive is burning too much error budget
 7 |       to guarantee latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#TelemeterServerMetricsReceiveWriteLatencyErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     pyrra.dev/service: telemeter
12 |     route: telemeter-server-receive
13 |   name: rhobs-telemeter-server-metrics-receive-latency-slo
14 | spec:
15 |   alerting:
16 |     name: TelemeterServerMetricsReceiveWriteLatencyErrorBudgetBurning
17 |   description: Telemeter Server /receive is burning too much error budget to guarantee
18 |     latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: http_request_duration_seconds_bucket{job="telemeter-server", handler="receive",
24 |           code=~"^2..$", le="5"}
25 |       total:
26 |         metric: http_request_duration_seconds_count{job="telemeter-server", handler="receive",
27 |           code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-staging-rhobs-telemeter-server-metrics-upload-availability-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/080e53f245a15445bdf777ae0e66945d/telemeter-staging-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Telemeter Server /upload is burning too much error budget to
 7 |       guarantee availability SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#TelemeterServerMetricsUploadWriteAvailabilityErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     pyrra.dev/service: telemeter
12 |     route: telemeter-server-upload
13 |   name: rhobs-telemeter-server-metrics-upload-availability-slo
14 | spec:
15 |   alerting:
16 |     name: TelemeterServerMetricsUploadWriteAvailabilityErrorBudgetBurning
17 |   description: Telemeter Server /upload is burning too much error budget to guarantee
18 |     availability SLOs.
19 |   indicator:
20 |     ratio:
21 |       errors:
22 |         metric: haproxy_server_http_responses_total{route="telemeter-server-upload",
23 |           code=~"5.."}
24 |       grouping: null
25 |       total:
26 |         metric: haproxy_server_http_responses_total{route="telemeter-server-upload"}
27 |   target: "99"
28 |   window: 28d
29 | status: {}
30 | 


--------------------------------------------------------------------------------
/resources/observability/prometheusrules/pyrra/telemeter-staging-rhobs-telemeter-server-metrics-upload-latency-slo.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   annotations:
 5 |     pyrra.dev/dashboard: https://grafana.app-sre.devshift.net/d/080e53f245a15445bdf777ae0e66945d/telemeter-staging-slos?orgId=1&refresh=10s&var-datasource=app-sre-stage-01-prometheus&var-namespace={{$labels.namespace}}&var-job=All&var-pod=All&var-interval=5m
 6 |     pyrra.dev/message: Telemeter Server /upload is burning too much error budget to
 7 |       guarantee latency SLOs.
 8 |     pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#TelemeterServerMetricsUploadWriteLatencyErrorBudgetBurning
 9 |   creationTimestamp: null
10 |   labels:
11 |     pyrra.dev/service: telemeter
12 |     route: telemeter-server-upload
13 |   name: rhobs-telemeter-server-metrics-upload-latency-slo
14 | spec:
15 |   alerting:
16 |     name: TelemeterServerMetricsUploadWriteLatencyErrorBudgetBurning
17 |   description: Telemeter Server /upload is burning too much error budget to guarantee
18 |     latency SLOs.
19 |   indicator:
20 |     latency:
21 |       grouping: null
22 |       success:
23 |         metric: http_request_duration_seconds_bucket{job="telemeter-server", handler="upload",
24 |           code=~"^2..$", le="5"}
25 |       total:
26 |         metric: http_request_duration_seconds_count{job="telemeter-server", handler="upload",
27 |           code=~"^2..$"}
28 |   target: "90"
29 |   window: 28d
30 | status: {}
31 | 


--------------------------------------------------------------------------------
/resources/operations/bucket-inspect/README.md:
--------------------------------------------------------------------------------
 1 | # What
 2 | 
 3 | This template deploys [Thanos Bucket Inspect](https://thanos.io/tip/components/tools.md/#bucket-insepct)
 4 | as a Kubernetes Job or CronJob.
 5 | 
 6 | # SOP
 7 | 
 8 | Create a Kubernetes Secret that contains the credentials for the target object storage provider, or use the
 9 | template provided in this directory for S3 compatible object storage providers.
10 | 
11 | ```yaml
12 | apiVersion: v1
13 | kind: Secret
14 | metadata:
15 |   name: thanos-bucket-inspect-config
16 | type: Opaque
17 | stringData:
18 |   from-config.yaml: |
19 |     # see https://thanos.io/tip/thanos/storage.md/
20 | ```
21 | 
22 | Process the template and run the Job
23 | 
24 | ```bash
25 | oc process -f job-template.yaml | oc apply -f -
26 | ```
27 | 
28 | Alternatively, you can run it as a CronJob
29 | ```bash
30 | oc process -f cron-job-template.yaml | oc apply -f -
31 | ```
32 | 


--------------------------------------------------------------------------------
/resources/operations/bucket-inspect/s3-secret-template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   name: Thanos Bucket Inspect
 5 |   labels:
 6 |     app.kubernetes.io/name: thanos-bucket-inspect
 7 |     app.kubernetes.io/part-of: observatorium
 8 | description: |
 9 |   This template creates a Secret that supports Thanos Object Storage inspection for S3.
10 | parameters:
11 |   - name: NAMESPACE
12 |     description: The namespace where the Secret will be created.
13 |     value: 'observatorium-operations'
14 |   - name: OBJ_STORE_CONFIG_SECRET_NAME
15 |     value: 'thanos-bucket-inspect-config'
16 |   - name: ACCESS_KEY_ID
17 |   - name: SECRET_ACCESS_KEY
18 |   - name: S3_BUCKET_NAME
19 |   - name: S3_BUCKET_ENDPOINT
20 |     value: s3.us-east-1.amazonaws.com
21 |   - name: S3_BUCKET_REGION
22 |     value: us-east-1
23 |   - name: K8S_SECRET_KEY
24 |     value: config.yaml
25 | objects:
26 |   - apiVersion: v1
27 |     kind: Secret
28 |     metadata:
29 |       name: ${OBJ_STORE_CONFIG_SECRET_NAME}
30 |       namespace: ${NAMESPACE}
31 |       labels:
32 |         app.kubernetes.io/name: thanos-bucket-inspect
33 |         app.kubernetes.io/part-of: observatorium
34 |     type: Opaque
35 |     stringData:
36 |       ${K8S_SECRET_KEY}: |
37 |         type: S3
38 |         config:
39 |           bucket: ${S3_BUCKET_NAME}
40 |           region: ${S3_BUCKET_REGION}
41 |           access_key: ${ACCESS_KEY_ID}
42 |           secret_key: ${SECRET_ACCESS_KEY}
43 |           endpoint: ${S3_BUCKET_ENDPOINT}
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/resources/operations/bucket-replicate/monitoring-template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   name: rhobs-thanos-bucket-replicate-pod-monitor
 5 |   labels:
 6 |     app.kubernetes.io/name: thanos-bucket-replicate
 7 |     app.kubernetes.io/part-of: observatorium
 8 | parameters:
 9 |   - name: NAMESPACE
10 |     description: The namespace where the running Job will reside.
11 |     value: 'observatorium-operations'
12 |   - name: NAME
13 |     description: The name of the Job.
14 |     value: 'thanos-bucket-replicate'
15 | objects:
16 |   - apiVersion: monitoring.coreos.com/v1
17 |     kind: PodMonitor
18 |     metadata:
19 |       name: observatorium-operations-thanos-bucket-replicate
20 |       labels:
21 |         prometheus: app-sre
22 |     spec:
23 |       namespaceSelector:
24 |         matchNames:
25 |           - ${NAMESPACE}
26 |       selector:
27 |         matchLabels:
28 |           job-name: ${NAME}
29 |       podMetricsEndpoints:
30 |         - port: metrics
31 |           interval: 30s
32 |           path: /metrics
33 | 
34 | 


--------------------------------------------------------------------------------
/resources/operations/bucket-replicate/s3-secret-template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   name: Thanos Bucket Inspect
 5 |   labels:
 6 |     app.kubernetes.io/name: thanos-bucket-replicate-secret
 7 |     app.kubernetes.io/part-of: observatorium
 8 | description: |
 9 |   This template creates a Secret that supports Thanos Object Storage for S3.
10 | parameters:
11 |   - name: NAMESPACE
12 |     description: The namespace where the Secret will be created.
13 |     value: 'observatorium-operations'
14 |   - name: OBJ_STORE_CONFIG_SECRET_NAME
15 |     value: 'thanos-bucket-config'
16 |   - name: K8S_SECRET_KEY
17 |     value: config.yaml
18 |   - name: ACCESS_KEY_ID
19 |   - name: SECRET_ACCESS_KEY
20 |   - name: S3_BUCKET_NAME
21 |   - name: S3_BUCKET_ENDPOINT
22 |     value: s3.us-east-1.amazonaws.com
23 |   - name: S3_BUCKET_REGION
24 |     value: us-east-1
25 | objects:
26 |   - apiVersion: v1
27 |     kind: Secret
28 |     metadata:
29 |       name: ${OBJ_STORE_CONFIG_SECRET_NAME}
30 |       namespace: ${NAMESPACE}
31 |       labels:
32 |         app.kubernetes.io/name: thanos-bucket-replicate-secret
33 |         app.kubernetes.io/part-of: observatorium
34 |     type: Opaque
35 |     stringData:
36 |       ${K8S_SECRET_KEY}: |
37 |         type: S3
38 |         config:
39 |           bucket: ${S3_BUCKET_NAME}
40 |           region: ${S3_BUCKET_REGION}
41 |           access_key: ${ACCESS_KEY_ID}
42 |           secret_key: ${SECRET_ACCESS_KEY}
43 |           endpoint: ${S3_BUCKET_ENDPOINT}
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/resources/operations/rclone-bucket-replicate/README.md:
--------------------------------------------------------------------------------
1 | # What
2 | 
3 | This template deploys [Rclone Copy Command](https://rclone.org/commands/rclone_copy/)
4 | as a Kubernetes Job.
5 | 
6 | # SOP
7 | TBC 
8 | 
9 | 


--------------------------------------------------------------------------------
/resources/operations/rclone-bucket-replicate/job-template.env:
--------------------------------------------------------------------------------
1 | OBJ_STORE_CONFIG_SECRET_NAME=rclone-rhobs-testing-secret
2 | NAMESPACE=rclone-test
3 | SRC_ENDPOINT=SRC
4 | SRC_BUCKET=telemeter-thanos-testing
5 | DST_ENDPOINT=DST
6 | DST_BUCKET=replicate


--------------------------------------------------------------------------------
/resources/operations/rclone-bucket-replicate/monitoring-template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   name: rhobs-rclone-bucket-replicate-pod-monitor
 5 |   labels:
 6 |     app.kubernetes.io/name: rclone-bucket-replicate
 7 |     app.kubernetes.io/part-of: observatorium
 8 | parameters:
 9 |   - name: NAMESPACE
10 |     description: The namespace where the running Job will reside.
11 |     value: 'observatorium-operations'
12 |   - name: NAME
13 |     description: The name of the Job.
14 |     value: 'rclone-bucket-replicate'
15 | objects:
16 |   - apiVersion: monitoring.coreos.com/v1
17 |     kind: PodMonitor
18 |     metadata:
19 |       name: observatorium-operations-rclone-bucket-replicate
20 |       labels:
21 |         prometheus: app-sre
22 |     spec:
23 |       namespaceSelector:
24 |         matchNames:
25 |           - ${NAMESPACE}
26 |       selector:
27 |         matchLabels:
28 |           job-name: ${NAME}
29 |       podMetricsEndpoints:
30 |         - port: metrics
31 |           interval: 30s
32 |           path: /metrics
33 | 
34 | 


--------------------------------------------------------------------------------
/resources/operations/rclone-bucket-replicate/s3-secret-template.env:
--------------------------------------------------------------------------------
 1 | OBJ_STORE_CONFIG_SECRET_NAME=
 2 | NAMESPACE=rclone-test
 3 | SOURCE_ACCESS_KEY_ID=
 4 | SOURCE_SECRET_ACCESS_KEY=+tm
 5 | SOURCE_S3_BUCKET_PROVIDER=AWS
 6 | SOURCE_S3_BUCKET_NAME=
 7 | SOURCE_S3_BUCKET_ENDPOINT=
 8 | SOURCE_S3_BUCKET_REGION=
 9 | 
10 | TARGET_ACCESS_KEY_ID=
11 | TARGET_SECRET_ACCESS_KEY=
12 | TARGET_S3_BUCKET_PROVIDER=
13 | TARGET_S3_BUCKET_NAME=
14 | TARGET_S3_BUCKET_ENDPOINT=
15 | TARGET_S3_BUCKET_REGION=


--------------------------------------------------------------------------------
/resources/services/alertmanager/production/service-monitor-template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   name: alertmanager-service-monitor-rhobs-production
 6 | objects:
 7 | - apiVersion: monitoring.coreos.com/v1
 8 |   kind: ServiceMonitor
 9 |   metadata:
10 |     creationTimestamp: null
11 |     labels:
12 |       app.kubernetes.io/component: alertmanager
13 |       app.kubernetes.io/instance: observatorium
14 |       app.kubernetes.io/name: alertmanager
15 |       app.kubernetes.io/part-of: observatorium
16 |       prometheus: app-sre
17 |     name: alertmanager
18 |     namespace: openshift-customer-monitoring
19 |   spec:
20 |     endpoints:
21 |     - port: http
22 |       relabelings:
23 |       - action: replace
24 |         separator: /
25 |         sourceLabels:
26 |         - namespace
27 |         - pod
28 |         targetLabel: instance
29 |     namespaceSelector:
30 |       matchNames:
31 |       - rhobs-production
32 |     selector:
33 |       matchLabels:
34 |         app.kubernetes.io/component: alertmanager
35 |         app.kubernetes.io/instance: observatorium
36 |         app.kubernetes.io/name: alertmanager
37 |         app.kubernetes.io/part-of: observatorium
38 | 


--------------------------------------------------------------------------------
/resources/services/alertmanager/staging/service-monitor-template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   name: alertmanager-service-monitor-rhobs-stage
 6 | objects:
 7 | - apiVersion: monitoring.coreos.com/v1
 8 |   kind: ServiceMonitor
 9 |   metadata:
10 |     creationTimestamp: null
11 |     labels:
12 |       app.kubernetes.io/component: alertmanager
13 |       app.kubernetes.io/instance: observatorium
14 |       app.kubernetes.io/name: alertmanager
15 |       app.kubernetes.io/part-of: observatorium
16 |       prometheus: app-sre
17 |     name: alertmanager
18 |     namespace: openshift-customer-monitoring
19 |   spec:
20 |     endpoints:
21 |     - port: http
22 |       relabelings:
23 |       - action: replace
24 |         separator: /
25 |         sourceLabels:
26 |         - namespace
27 |         - pod
28 |         targetLabel: instance
29 |     namespaceSelector:
30 |       matchNames:
31 |       - rhobs-stage
32 |     selector:
33 |       matchLabels:
34 |         app.kubernetes.io/component: alertmanager
35 |         app.kubernetes.io/instance: observatorium
36 |         app.kubernetes.io/name: alertmanager
37 |         app.kubernetes.io/part-of: observatorium
38 | 


--------------------------------------------------------------------------------
/resources/services/memcached/staging/service-monitor-memcached-template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   name: memcached-service-monitor
 6 | objects:
 7 | - apiVersion: monitoring.coreos.com/v1
 8 |   kind: ServiceMonitor
 9 |   metadata:
10 |     creationTimestamp: null
11 |     labels:
12 |       app.kubernetes.io/component: api-memcached
13 |       app.kubernetes.io/instance: rhobs
14 |       app.kubernetes.io/name: memcached
15 |       app.kubernetes.io/part-of: observatorium
16 |       app.kubernetes.io/version: 1.5-316
17 |       prometheus: app-sre
18 |     name: api-memcached
19 |     namespace: openshift-customer-monitoring
20 |   spec:
21 |     endpoints:
22 |     - honorLabels: true
23 |       interval: 30s
24 |       path: /metrics
25 |       port: metrics
26 |     namespaceSelector:
27 |       matchNames:
28 |       - rhobs-stage
29 |     selector:
30 |       matchLabels:
31 |         app.kubernetes.io/component: api-memcached
32 |         app.kubernetes.io/instance: rhobs
33 |         app.kubernetes.io/name: memcached
34 |         app.kubernetes.io/part-of: observatorium
35 |         app.kubernetes.io/version: 1.5-316
36 | 


--------------------------------------------------------------------------------
/resources/services/objstore/local/thanos-default-secret.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Secret
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   labels:
 6 |     app.kubernetes.io/name: observatorium-mst-thanos-objectstorage
 7 |   name: observatorium-mst-thanos-objectstorage
 8 |   namespace: rhobs-local
 9 | stringData:
10 |   thanos.yaml: |-
11 |     type: S3
12 |     config:
13 |       bucket: thanos
14 |       region: us-east-1
15 |       access_key: minio
16 |       secret_key: minio123
17 |       endpoint: minio.observatorium-minio.svc:9000
18 |       insecure: true
19 | type: Opaque
20 | 


--------------------------------------------------------------------------------
/resources/services/objstore/local/thanos-telemeter-secret.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Secret
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   labels:
 6 |     app.kubernetes.io/name: thanos-objectstorage
 7 |   name: thanos-objectstorage
 8 |   namespace: rhobs-local
 9 | stringData:
10 |   thanos.yaml: |-
11 |     type: S3
12 |     config:
13 |       bucket: thanos
14 |       region: us-east-1
15 |       access_key: minio
16 |       secret_key: minio123
17 |       endpoint: minio.observatorium-minio.svc:9000
18 |       insecure: true
19 | type: Opaque
20 | 


--------------------------------------------------------------------------------
/resources/services/objstore/production/thanos-default-secret.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   name: thanos-default-secret
 6 | objects:
 7 | - apiVersion: v1
 8 |   kind: Secret
 9 |   metadata:
10 |     creationTimestamp: null
11 |     labels:
12 |       app.kubernetes.io/name: observatorium-mst-thanos-objectstorage
13 |     name: observatorium-mst-thanos-objectstorage
14 |     namespace: rhobs-production
15 |   stringData:
16 |     thanos.yaml: |-
17 |       type: S3
18 |       config:
19 |         bucket: ${S3_BUCKET_NAME}
20 |         region: ${S3_BUCKET_REGION}
21 |         access_key: ${ACCESS_KEY_ID}
22 |         secret_key: ${SECRET_ACCESS_KEY}
23 |         endpoint: ${S3_BUCKET_ENDPOINT}
24 |   type: Opaque
25 | parameters:
26 | - name: S3_BUCKET_NAME
27 | - name: S3_BUCKET_REGION
28 | - name: S3_BUCKET_ENDPOINT
29 | - name: ACCESS_KEY_ID
30 | - name: SECRET_ACCESS_KEY
31 | 


--------------------------------------------------------------------------------
/resources/services/objstore/production/thanos-telemeter-secret.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   name: thanos-telemeter-secret
 6 | objects:
 7 | - apiVersion: v1
 8 |   kind: Secret
 9 |   metadata:
10 |     creationTimestamp: null
11 |     labels:
12 |       app.kubernetes.io/name: thanos-objectstorage
13 |     name: thanos-objectstorage
14 |     namespace: rhobs-production
15 |   stringData:
16 |     thanos.yaml: |-
17 |       type: S3
18 |       config:
19 |         bucket: ${S3_BUCKET_NAME}
20 |         region: ${S3_BUCKET_REGION}
21 |         access_key: ${ACCESS_KEY_ID}
22 |         secret_key: ${SECRET_ACCESS_KEY}
23 |         endpoint: ${S3_BUCKET_ENDPOINT}
24 |   type: Opaque
25 | parameters:
26 | - name: S3_BUCKET_NAME
27 | - name: S3_BUCKET_REGION
28 | - name: S3_BUCKET_ENDPOINT
29 | - name: ACCESS_KEY_ID
30 | - name: SECRET_ACCESS_KEY
31 | 


--------------------------------------------------------------------------------
/resources/services/objstore/staging/thanos-default-secret.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   name: thanos-default-secret
 6 | objects:
 7 | - apiVersion: v1
 8 |   kind: Secret
 9 |   metadata:
10 |     creationTimestamp: null
11 |     labels:
12 |       app.kubernetes.io/name: observatorium-mst-thanos-objectstorage
13 |     name: observatorium-mst-thanos-objectstorage
14 |     namespace: rhobs-stage
15 |   stringData:
16 |     thanos.yaml: |-
17 |       type: S3
18 |       config:
19 |         bucket: ${S3_BUCKET_NAME}
20 |         region: ${S3_BUCKET_REGION}
21 |         access_key: ${ACCESS_KEY_ID}
22 |         secret_key: ${SECRET_ACCESS_KEY}
23 |         endpoint: ${S3_BUCKET_ENDPOINT}
24 |   type: Opaque
25 | parameters:
26 | - name: S3_BUCKET_NAME
27 | - name: S3_BUCKET_REGION
28 | - name: S3_BUCKET_ENDPOINT
29 | - name: ACCESS_KEY_ID
30 | - name: SECRET_ACCESS_KEY
31 | 


--------------------------------------------------------------------------------
/resources/services/objstore/staging/thanos-telemeter-secret.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   name: thanos-telemeter-secret
 6 | objects:
 7 | - apiVersion: v1
 8 |   kind: Secret
 9 |   metadata:
10 |     creationTimestamp: null
11 |     labels:
12 |       app.kubernetes.io/name: thanos-objectstorage
13 |     name: thanos-objectstorage
14 |     namespace: rhobs-stage
15 |   stringData:
16 |     thanos.yaml: |-
17 |       type: S3
18 |       config:
19 |         bucket: ${S3_BUCKET_NAME}
20 |         region: ${S3_BUCKET_REGION}
21 |         access_key: ${ACCESS_KEY_ID}
22 |         secret_key: ${SECRET_ACCESS_KEY}
23 |         endpoint: ${S3_BUCKET_ENDPOINT}
24 |   type: Opaque
25 | parameters:
26 | - name: S3_BUCKET_NAME
27 | - name: S3_BUCKET_REGION
28 | - name: S3_BUCKET_ENDPOINT
29 | - name: ACCESS_KEY_ID
30 | - name: SECRET_ACCESS_KEY
31 | 


--------------------------------------------------------------------------------
/resources/services/objstore/thanos-object-store-secret.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   name: thanos-object-store-secret
 6 | objects:
 7 | - apiVersion: v1
 8 |   kind: Secret
 9 |   metadata:
10 |     creationTimestamp: null
11 |     labels:
12 |       app.kubernetes.io/name: ${SECRET_NAME}
13 |     name: ${SECRET_NAME}
14 |     namespace: ${NAMESPACE}
15 |   stringData:
16 |     thanos.yaml: |-
17 |       type: S3
18 |       config:
19 |         bucket: ${S3_BUCKET_NAME}
20 |         region: ${S3_BUCKET_REGION}
21 |         access_key: ${ACCESS_KEY_ID}
22 |         secret_key: ${SECRET_ACCESS_KEY}
23 |         endpoint: ${S3_BUCKET_ENDPOINT}
24 |   type: Opaque
25 | parameters:
26 | - name: SECRET_NAME
27 | - name: NAMESPACE
28 | - name: S3_BUCKET_NAME
29 | - name: S3_BUCKET_REGION
30 | - name: S3_BUCKET_ENDPOINT
31 | - name: ACCESS_KEY_ID
32 | - name: SECRET_ACCESS_KEY
33 | 


--------------------------------------------------------------------------------
/resources/services/observatorium-api/production/service-monitor-observatorium-api-template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   name: observatorium-api-service-monitor
 6 | objects:
 7 | - apiVersion: monitoring.coreos.com/v1
 8 |   kind: ServiceMonitor
 9 |   metadata:
10 |     creationTimestamp: null
11 |     labels:
12 |       app.kubernetes.io/component: api
13 |       app.kubernetes.io/instance: rhobs
14 |       app.kubernetes.io/name: observatorium-api
15 |       app.kubernetes.io/part-of: rhobs
16 |       app.kubernetes.io/version: 9aada65247a07782465beb500323a0e18d7e3d05
17 |       prometheus: app-sre
18 |     name: rhobs-gateway
19 |     namespace: openshift-customer-monitoring
20 |   spec:
21 |     endpoints:
22 |     - interval: 30s
23 |       path: /metrics
24 |       port: internal
25 |     - interval: 30s
26 |       path: /metrics
27 |       port: opa-ams-metrics
28 |     - interval: 30s
29 |       path: /metrics
30 |       port: metrics
31 |     namespaceSelector:
32 |       matchNames:
33 |       - rhobs-production
34 |     selector:
35 |       matchLabels:
36 |         app.kubernetes.io/component: api
37 |         app.kubernetes.io/instance: rhobs
38 |         app.kubernetes.io/name: observatorium-api
39 |         app.kubernetes.io/part-of: rhobs
40 | 


--------------------------------------------------------------------------------
/resources/services/observatorium-api/staging/service-monitor-observatorium-api-template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   name: observatorium-api-service-monitor
 6 | objects:
 7 | - apiVersion: monitoring.coreos.com/v1
 8 |   kind: ServiceMonitor
 9 |   metadata:
10 |     creationTimestamp: null
11 |     labels:
12 |       app.kubernetes.io/component: api
13 |       app.kubernetes.io/instance: rhobs
14 |       app.kubernetes.io/name: observatorium-api
15 |       app.kubernetes.io/part-of: rhobs
16 |       app.kubernetes.io/version: 9aada65247a07782465beb500323a0e18d7e3d05
17 |       prometheus: app-sre
18 |     name: rhobs-gateway
19 |     namespace: openshift-customer-monitoring
20 |   spec:
21 |     endpoints:
22 |     - interval: 30s
23 |       path: /metrics
24 |       port: internal
25 |     - interval: 30s
26 |       path: /metrics
27 |       port: opa-ams-metrics
28 |     - interval: 30s
29 |       path: /metrics
30 |       port: metrics
31 |     namespaceSelector:
32 |       matchNames:
33 |       - rhobs-stage
34 |     selector:
35 |       matchLabels:
36 |         app.kubernetes.io/component: api
37 |         app.kubernetes.io/instance: rhobs
38 |         app.kubernetes.io/name: observatorium-api
39 |         app.kubernetes.io/part-of: rhobs
40 | 


--------------------------------------------------------------------------------
/resources/services/observatorium-tenants-template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   annotations:
 5 |     qontract.recycle: "true"
 6 |   name: ${SECRET_NAME}
 7 | objects:
 8 | - apiVersion: v1
 9 |   kind: Secret
10 |   metadata:
11 |     name: ${SECRET_NAME}
12 |     annotations:
13 |       qontract.recycle: "true"
14 |   stringData:
15 |     client-id: ${CLIENT_ID}
16 |     client-secret: ${CLIENT_SECRET}
17 |     issuer-url: https://sso.redhat.com/auth/realms/redhat-external
18 |     tenants.yaml: |
19 |       ${TENANTS}
20 |   type: Opaque
21 | parameters:
22 | - name: SECRET_NAME
23 | - name: CLIENT_ID
24 | - name: CLIENT_SECRET
25 | - name: TENANTS
26 | 


--------------------------------------------------------------------------------
/resources/services/servicemonitors/local/servicemonitors.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: monitoring.coreos.com/v1
 2 | kind: ServiceMonitor
 3 | metadata:
 4 |   creationTimestamp: null
 5 |   labels:
 6 |     app.kubernetes.io/component: monitoring
 7 |     app.kubernetes.io/created-by: thanos-operator
 8 |     app.kubernetes.io/instance: controller-manager-metrics
 9 |     app.kubernetes.io/managed-by: rhobs
10 |     app.kubernetes.io/name: servicemonitor
11 |     app.kubernetes.io/part-of: thanos-operator
12 |     prometheus: app-sre
13 |   name: thanos-operator-controller-manager-metrics
14 |   namespace: openshift-customer-monitoring
15 | spec:
16 |   endpoints:
17 |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
18 |     path: /metrics
19 |     port: https
20 |     scheme: https
21 |     tlsConfig:
22 |       ca: {}
23 |       cert: {}
24 |       insecureSkipVerify: true
25 |   namespaceSelector:
26 |     matchNames:
27 |     - rhobs-local
28 |   selector:
29 |     matchLabels:
30 |       control-plane: controller-manager
31 | 


--------------------------------------------------------------------------------
/services/observatorium-tenants-template.jsonnet:
--------------------------------------------------------------------------------
 1 | {
 2 |   apiVersion: 'v1',
 3 |   kind: 'Template',
 4 |   metadata: {
 5 |     name: '${SECRET_NAME}',
 6 |     annotations: {
 7 |       'qontract.recycle': 'true',
 8 |     },
 9 |   },
10 |   objects: [
11 |     {
12 |       apiVersion: 'v1',
13 |       kind: 'Secret',
14 |       metadata+: {
15 |         name: '${SECRET_NAME}',
16 |         annotations: {
17 |           'qontract.recycle': 'true',
18 |         },
19 |       },
20 |       type: 'Opaque',
21 |       stringData: {
22 |         'client-id': '${CLIENT_ID}',
23 |         'client-secret': '${CLIENT_SECRET}',
24 |         'issuer-url': 'https://sso.redhat.com/auth/realms/redhat-external',
25 |         'tenants.yaml': '${TENANTS}',
26 |       },
27 |     },
28 |   ],
29 |   parameters: [
30 |     { name: 'SECRET_NAME' },
31 |     { name: 'CLIENT_ID' },
32 |     { name: 'CLIENT_SECRET' },
33 |     { name: 'TENANTS' },
34 |   ],
35 | }
36 | 


--------------------------------------------------------------------------------
/services/observatorium-traces-subscriptions-template.jsonnet:
--------------------------------------------------------------------------------
 1 | local subscriptions = import 'observatorium-traces-subscriptions.libsonnet';
 2 | {
 3 |   apiVersion: 'template.openshift.io/v1',
 4 |   kind: 'Template',
 5 |   metadata: {
 6 |     name: 'observatorium-traces-subscriptions',
 7 |   },
 8 |   objects: [
 9 |     subscriptions.otelcol,
10 |     subscriptions.jaeger,
11 |     subscriptions.elasticsearch,
12 |   ],
13 |   parameters: [
14 |     { name: 'OPENTELEMETRY_OPERATOR_VERSION', value: '0.44.1-1' },
15 |     { name: 'OPENTELEMETRY_OPERATOR_NAMESPACE', value: 'openshift-operators' },
16 |     { name: 'OPENTELEMETRY_OPERATOR_SOURCE', value: 'redhat-operators' },
17 |     { name: 'JAEGER_OPERATOR_VERSION', value: '1.30.2' },
18 |     { name: 'JAEGER_OPERATOR_NAMESPACE', value: 'openshift-operators' },
19 |     { name: 'JAEGER_OPERATOR_SOURCE', value: 'redhat-operators' },
20 |     { name: 'ELASTICSEARCH_OPERATOR_VERSION', value: '5.4.1-24' },
21 |     { name: 'ELASTICSEARCH_OPERATOR_NAMESPACE', value: 'openshift-operators' },
22 |     { name: 'ELASTICSEARCH_OPERATOR_SOURCE', value: 'redhat-operators' },
23 |   ],
24 | }
25 | 


--------------------------------------------------------------------------------
/services/observatorium-traces-template.jsonnet:
--------------------------------------------------------------------------------
 1 | local obs = import 'observatorium.libsonnet';
 2 | {
 3 |   apiVersion: 'template.openshift.io/v1',
 4 |   kind: 'Template',
 5 |   metadata: {
 6 |     name: 'observatorium-traces',
 7 |   },
 8 |   objects: [
 9 |     obs.elasticsearch,
10 |   ] + [
11 |     obs.tracing.manifests[name] {
12 |       metadata+: {
13 |       },
14 |     }
15 |     for name in std.objectFields(obs.tracing.manifests)
16 |   ],
17 |   parameters: [
18 |     { name: 'NAMESPACE', value: 'observatorium-traces' },
19 |     { name: 'OPENTELEMETRY_COLLECTOR_IMAGE', value: 'ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib' },
20 |     { name: 'OPENTELEMETRY_COLLECTOR_IMAGE_TAG', value: '0.46.0' },
21 |     { name: 'ELASTICSEARCH_MEMORY', value: '4Gi' },
22 |     { name: 'ELASTICSEARCH_REQUEST_CPU', value: '200m' },
23 |     { name: 'ELASTICSEARCH_NAME', value: 'shared-es' },
24 |     { name: 'ELASTICSEARCH_NODE_COUNT', value: '1' },
25 |     { name: 'ELASTICSEARCH_REDUNDANCY_POLICY', value: 'ZeroRedundancy' },
26 |   ],
27 | }
28 | 


--------------------------------------------------------------------------------
/services/prometheus/remote_write_proxy.conf:
--------------------------------------------------------------------------------
 1 | daemon off;
 2 | worker_processes 1;
 3 | error_log /dev/stderr;
 4 | pid /tmp/nginx.pid;
 5 | 
 6 | events {
 7 |     worker_connections 1024;
 8 | }
 9 | 
10 | http {
11 |   log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
12 |                     '$status $body_bytes_sent "$http_referer" '
13 |                     '"$http_user_agent" "$http_x_forwarded_for"';
14 | 
15 |   server {
16 |     listen *:%(listen_port)d;
17 |     server_name _;
18 |     access_log  /dev/stdout  main;
19 |     error_log /dev/stderr;
20 | 
21 |     location / {
22 |       proxy_set_header THANOS-TENANT %(thanos_tenant)s;
23 |       proxy_pass %(forward_host)s;
24 |     }
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/services_go/observatorium/assets/store-auto-shard-relabel-configMap.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Kubernetes replicas are named with the following convention "<statefulset-name>-<ordinal>". 
 4 | # This parameter expansion removes all characters until the last hyphen, capturing only the ordinal.
 5 | export ORDINAL_INDEX=${HOSTNAME##*-}
 6 | # This parameter expansion removes all characters after the last hyphen, capturing only the statefulset name.
 7 | export STATEFULSET_NAME="${HOSTNAME%-*}"
 8 | export THANOS_STORE_REPLICAS=$(oc get statefulset ${STATEFULSET_NAME} -n ${NAMESPACE} -o=jsonpath='{.status.replicas}')
 9 | 
10 | # Logging parameters
11 | echo "generating store hashmod config with ORDINAL_INDEX=${ORDINAL_INDEX} THANOS_STORE_REPLICAS=${STATEFULSET_NAME} HOSTNAME=${HOSTNAME} NAMESPACE=${NAMESPACE} THANOS_STORE_REPLICAS=${THANOS_STORE_REPLICAS}"
12 | 
13 | cat <<EOF >/tmp/config/hashmod-config.yaml
14 | - action: hashmod
15 |   source_labels:
16 |     - __block_id
17 |   target_label: shard
18 |   modulus: ${THANOS_STORE_REPLICAS}
19 | - action: keep
20 |   source_labels:
21 |     - shard
22 |   regex: ${ORDINAL_INDEX}
23 | EOF
24 | 


--------------------------------------------------------------------------------
/services_go/observatorium/observatorium.go:
--------------------------------------------------------------------------------
 1 | package observatorium
 2 | 
 3 | import (
 4 | 	"github.com/bwplotka/mimic"
 5 | )
 6 | 
 7 | type Observatorium struct {
 8 | 	Cluster  string
 9 | 	Instance string // Instance is the name of the observatorium instance
10 | 	// MetricsInstances is the list of metrics instances in the observatorium instance
11 | 	// These are the different deployment units to which our tenants are mapped (e.g. default, rhel, telemeter)
12 | 	MetricsInstances ObservatoriumMetrics
13 | 	API              ObservatoriumAPI
14 | }
15 | 
16 | // Manifests generates the manifests for the instance of observatorium.
17 | func (o *Observatorium) Manifests(generator *mimic.Generator) {
18 | 	o.MetricsInstances.Manifests(generator.With(o.Cluster, o.Instance))
19 | 	o.API.Manifests(generator.With(o.Cluster, o.Instance))
20 | }
21 | 


--------------------------------------------------------------------------------
/services_go/services.go:
--------------------------------------------------------------------------------
 1 | package services
 2 | 
 3 | import (
 4 | 	"github.com/bwplotka/mimic"
 5 | 	"github.com/rhobs/configuration/services_go/instances/rhobs"
 6 | )
 7 | 
 8 | // Generate generates the manifests for all observatorium instances.
 9 | func Generate(gen *mimic.Generator) {
10 | 	rhobsConfigs := rhobs.ClusterConfigs()
11 | 	for _, obsCfg := range rhobsConfigs {
12 | 		obsCfg.Manifests(gen)
13 | 	}
14 | }
15 | 


--------------------------------------------------------------------------------
/synchronize.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | set -x
 5 | set -o pipefail
 6 | 
 7 | tmpdir=$(mktemp -d -t app-interface-XXXXXXXXXX)
 8 | echo $tmpdir
 9 | 
10 | git clone git@gitlab.cee.redhat.com:service/app-interface.git $tmpdir
11 | 
12 | cp -r resources/* $tmpdir/resources/
13 | 
14 | cd $tmpdir
15 | 
16 | echo -n "Enter a new branch name and press [ENTER] (will be prefixed with synchronize_): "
17 | read branchname
18 | 
19 | echo -n "Enter a environment name and press [ENTER] (e.g stage, production): "
20 | read environment
21 | 
22 | branch="synchronize_${branchname}_${environment}"
23 | 
24 | git checkout -b $branch
25 | 
26 | git add resources/*-${environment}.*
27 | 
28 | git commit
29 | 
30 | echo -n "Enter your fork URL to push to and press [ENTER] (e.g: git@gitlab.cee.redhat.com:USERNAME/app-interface.git): "
31 | read fork
32 | 
33 | git push $fork $branch
34 | 
35 | rm -rf $tmpdir
36 | 


--------------------------------------------------------------------------------
/tests/ci/env/dex.test.ci.env:
--------------------------------------------------------------------------------
1 | DEX_CPU_REQUEST=30m
2 | DEX_CPU_LIMITS=50m
3 | DEX_MEMORY_REQUEST=100Mi
4 | DEX_MEMORY_LIMITS=100Mi
5 | DEX_STORAGE=0.25Gi
6 | 


--------------------------------------------------------------------------------
/tests/ci/env/logging.test.ci.env:
--------------------------------------------------------------------------------
1 | NAMESPACE=observatorium-tools
2 | ACCESS_KEY_ID=minio
3 | SECRET_ACCESS_KEY=minio123
4 | S3_BUCKET_NAME=loki
5 | S3_BUCKET_ENDPOINT=http://minio.minio.svc.cluster.local:9000
6 | S3_BUCKET_REGION=""
7 | LOKI_SIZE=1x.extra-small
8 | LOKI_STORAGE_CLASS=kubevirt-hostpath-provisioner
9 | 


--------------------------------------------------------------------------------
/tests/ci/env/minio.test.ci.env:
--------------------------------------------------------------------------------
1 | MINIO_CPU_REQUEST=50m
2 | MINIO_CPU_LIMITS=50m
3 | MINIO_MEMORY_REQUEST=200Mi
4 | MINIO_MEMORY_LIMITS=200Mi
5 | MINIO_STORAGE=0.25Gi
6 | 


--------------------------------------------------------------------------------
/tests/ci/env/observatorium-metric-federation-rule.test.ci.env:
--------------------------------------------------------------------------------
 1 | STORAGE_CLASS=kubevirt-hostpath-provisioner
 2 | THANOS_RULER_CPU_LIMIT=50m
 3 | THANOS_RULER_CPU_REQUEST=25m
 4 | THANOS_RULER_MEMORY_LIMIT=200Mi
 5 | THANOS_RULER_MEMORY_REQUEST=200Mi
 6 | THANOS_S3_SECRET=thanos-test-s3
 7 | THANOS_QUERIER_NAMESPACE=observatorium-metrics
 8 | JAEGER_AGENT_IMAGE=jaegertracing/jaeger-agent
 9 | SERVICE_ACCOUNT_NAME=observatorium
10 | THANOS_RULER_PVC_REQUEST=0.25Gi
11 | THANOS_RULER_REPLICAS=1
12 | 


--------------------------------------------------------------------------------
/tests/ci/env/observatorium-parca.test.ci.env:
--------------------------------------------------------------------------------
 1 | IMAGE=ghcr.io/parca-dev/parca
 2 | PARCA_CPU_REQUEST=30m
 3 | PARCA_MEMORY_REQUEST=500Mi
 4 | PARCA_CPU_LIMITS=50m
 5 | PARCA_MEMORY_LIMITS=500Mi
 6 | ACCESS_KEY_ID=minio
 7 | SECRET_ACCESS_KEY=minio123
 8 | S3_BUCKET_NAME=parca
 9 | S3_BUCKET_ENDPOINT=minio.minio.svc.cluster.local:9000
10 | S3_BUCKET_REGION=eu-central-1
11 | SD_NAMESPACE_LIST='["observatorium-metrics"]'
12 | 


--------------------------------------------------------------------------------
/tests/ci/env/observatorium.test.ci.env:
--------------------------------------------------------------------------------
 1 | SERVICE_ACCOUNT_NAME=default
 2 | JAEGER_AGENT_IMAGE=jaegertracing/jaeger-agent
 3 | RULES_OBJSTORE_S3_SECRET=rules-objstore-s3
 4 | MANAGED_TENANTS=rhobs
 5 | OBSERVATORIUM_URL=http://observatorium-observatorium-api.observatorium.svc.cluster.local:8080
 6 | 
 7 | GUBERNATOR_REPLICAS=1
 8 | GUBERNATOR_CPU_LIMIT=50m
 9 | GUBERNATOR_CPU_REQUEST=25m
10 | GUBERNATOR_MEMORY_LIMIT=100Mi
11 | GUBERNATOR_MEMORY_REQUEST=50Mi
12 | OBSERVATORIUM_API_REPLICAS=1
13 | OBSERVATORIUM_API_CPU_LIMIT=50m
14 | OBSERVATORIUM_API_CPU_REQUEST=25m
15 | OBSERVATORIUM_API_MEMORY_LIMIT=100Mi
16 | OBSERVATORIUM_API_MEMORY_REQUEST=50Mi
17 | UP_REPLICAS=1
18 | UP_CPU_REQUEST=25m
19 | UP_CPU_LIMIT=50m
20 | UP_MEMORY_REQUEST=50Mi
21 | UP_MEMORY_LIMIT=100Mi
22 | MEMCACHED_CPU_LIMIT=50m
23 | MEMCACHED_CPU_REQUEST=25m
24 | MEMCACHED_EXPORTER_CPU_LIMIT=50m
25 | MEMCACHED_EXPORTER_CPU_REQUEST=25m
26 | MEMCACHED_EXPORTER_MEMORY_LIMIT=100Mi
27 | MEMCACHED_MEMORY_LIMIT=100Mi
28 | MEMCACHED_MEMORY_REQUEST=50Mi
29 | OAUTH_PROXY_CPU_LIMITS=50m
30 | OAUTH_PROXY_CPU_REQUEST=25m
31 | OAUTH_PROXY_MEMORY_LIMITS=100Mi
32 | OAUTH_PROXY_MEMORY_REQUEST=50Mi
33 | OPA_AMS_CPU_LIMIT=50m
34 | OPA_AMS_CPU_REQUEST=25m
35 | OPA_AMS_MEMORY_LIMIT=100Mi
36 | OPA_AMS_MEMORY_REQUEST=50Mi
37 | 


--------------------------------------------------------------------------------
/tests/ci/env/rhelemeter.test.ci.env:
--------------------------------------------------------------------------------
 1 | RHELEMETER_SERVER_CPU_LIMIT=50m
 2 | RHELEMETER_SERVER_CPU_REQUEST=25m
 3 | RHELEMETER_SERVER_MEMORY_LIMIT=100Mi
 4 | RHELEMETER_SERVER_MEMORY_REQUEST=50Mi
 5 | RHELEMETER_FORWARD_URL=http://observatorium-observatorium-api.observatorium.svc.cluster.local:8080/api/metrics/v1/test/api/v1/receive
 6 | RHELEMETER_OIDC_ISSUER=http://dex.dex.svc.cluster.local:5556/dex
 7 | RHELEMETER_CLIENT_ID=test
 8 | RHELEMETER_TENANT_ID=test
 9 | RHELEMETER_CLIENT_SECRET=ZXhhbXBsZS1hcHAtc2VjcmV0
10 | RHELEMETER_CLIENT_INFO_PSK=ZXhhbXBsZS1hcHAtc2VjcmV0
11 | 


--------------------------------------------------------------------------------
/tests/ci/env/telemeter.ci.env:
--------------------------------------------------------------------------------
 1 | SERVICE_ACCOUNT_NAME=default
 2 | TELEMETER_FORWARD_URL=http://observatorium-observatorium-api.observatorium.svc.cluster.local:8080/api/metrics/v1/telemeter/api/v1/receive
 3 | 
 4 | MEMCACHED_CPU_LIMIT=1
 5 | REPLICAS=1
 6 | MEMCACHED_CPU_LIMIT=30m
 7 | MEMCACHED_CPU_REQUEST=25m
 8 | MEMCACHED_EXPORTER_CPU_LIMIT=50m
 9 | MEMCACHED_EXPORTER_CPU_REQUEST=25m
10 | MEMCACHED_EXPORTER_MEMORY_LIMIT=100Mi
11 | MEMCACHED_MEMORY_LIMIT=100Mi
12 | MEMCACHED_MEMORY_REQUEST=50Mi
13 | OAUTH_PROXY_CPU_REQUEST=25m
14 | OAUTH_PROXY_MEMORY_REQUEST=50Mi
15 | OAUTH_PROXY_CPU_LIMITS=50m
16 | OAUTH_PROXY_MEMORY_LIMITS=100Mi
17 | TELEMETER_SERVER_CPU_LIMIT=100m
18 | TELEMETER_SERVER_CPU_REQUEST=25m
19 | TELEMETER_SERVER_MEMORY_LIMIT=100Mi
20 | TELEMETER_SERVER_MEMORY_REQUEST=50Mi
21 | 


--------------------------------------------------------------------------------
/tests/ci/manifests/rbac.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | data:
 3 |   rbac.yaml: |-
 4 |     "roleBindings":
 5 |     - "name": "test"
 6 |       "roles":
 7 |       - "read-write"
 8 |       "subjects":
 9 |       - "kind": "user"
10 |         "name": "admin@example.com"
11 |     "roles":
12 |     - "name": "read-write"
13 |       "permissions":
14 |       - "read"
15 |       - "write"
16 |       "resources":
17 |       - "logs"
18 |       - "metrics"
19 |       "tenants":
20 |       - "test"
21 | kind: ConfigMap
22 | metadata:
23 |   labels:
24 |     app.kubernetes.io/component: api
25 |     app.kubernetes.io/instance: observatorium
26 |     app.kubernetes.io/name: observatorium-api
27 |     app.kubernetes.io/part-of: observatorium
28 |   name: observatorium-observatorium-api
29 |   namespace: observatorium
30 | 


--------------------------------------------------------------------------------
/tests/ci/manifests/test-tenant.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Secret
 3 | metadata:
 4 |   labels:
 5 |     app.kubernetes.io/component: api
 6 |     app.kubernetes.io/instance: observatorium
 7 |     app.kubernetes.io/name: observatorium-api
 8 |     app.kubernetes.io/part-of: observatorium
 9 |   name: observatorium-observatorium-api
10 |   namespace: observatorium
11 | stringData:
12 |   client-id: test
13 |   client-secret: ZXhhbXBsZS1hcHAtc2VjcmV0
14 |   issuer-url: http://dex.dex.svc.cluster.local:5556/dex
15 |   tenants.yaml: |-
16 |     "tenants":
17 |     - "id": "1610b0c3-c509-4592-a256-a1871353dbfa"
18 |       "name": "test"
19 |       "oidc":
20 |         "clientID": "test"
21 |         "clientSecret": "ZXhhbXBsZS1hcHAtc2VjcmV0"
22 |         "issuerURL": "http://dex.dex.svc.cluster.local:5556/dex"
23 |         "usernameClaim": "email"
24 |       "rateLimits":
25 |       - "endpoint": "/api/metrics/v1/.+/api/v1/receive"
26 |         "limit": 1000
27 |         "window": "1s"
28 |       - "endpoint": "/api/logs/v1/.*"
29 |         "limit": 1000
30 |         "window": "1s"
31 | 


--------------------------------------------------------------------------------
/tests/ci/rhobsci.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rhobs/configuration/6d2b136e48c291c1a415394fed40219188620665/tests/ci/rhobsci.png


--------------------------------------------------------------------------------
/tests/deploy/env/logging.test.env:
--------------------------------------------------------------------------------
1 | NAMESPACE=observatorium-tools
2 | ACCESS_KEY_ID=minio
3 | SECRET_ACCESS_KEY=minio123
4 | S3_BUCKET_NAME=loki
5 | S3_BUCKET_ENDPOINT=http://minio.minio.svc.cluster.local:9000
6 | S3_BUCKET_REGION=""
7 | LOKI_SIZE=1x.extra-small
8 | LOKI_STORAGE_CLASS=gp2-csi
9 | 


--------------------------------------------------------------------------------
/tests/deploy/env/observatorium-jaeger.test.env:
--------------------------------------------------------------------------------
 1 | SERVICE_ACCOUNT_NAME=observatorium
 2 | STORAGE_CLASS=gp2-csi
 3 | 
 4 | JAEGER_CPU_REQUEST=100m
 5 | JAEGER_MEMORY_REQUEST=100Mi
 6 | JAEGER_CPU_LIMITS=200m
 7 | JAEGER_MEMORY_LIMITS=200Mi
 8 | OAUTH_PROXY_CPU_REQUEST=100m
 9 | OAUTH_PROXY_MEMORY_REQUEST=100Mi
10 | OAUTH_PROXY_CPU_LIMITS=200m
11 | OAUTH_PROXY_MEMORY_LIMITS=200Mi
12 | 


--------------------------------------------------------------------------------
/tests/deploy/env/observatorium-metric-federation-rule.test.env:
--------------------------------------------------------------------------------
 1 | STORAGE_CLASS=gp2-csi
 2 | THANOS_QUERIER_NAMESPACE=observatorium-metrics
 3 | JAEGER_AGENT_IMAGE=jaegertracing/jaeger-agent
 4 | THANOS_S3_SECRET=thanos-test-s3
 5 | SERVICE_ACCOUNT_NAME=observatorium
 6 | 
 7 | THANOS_RULER_CPU_LIMIT=200m
 8 | THANOS_RULER_CPU_REQUEST=100m
 9 | THANOS_RULER_MEMORY_LIMIT=200Mi
10 | THANOS_RULER_MEMORY_REQUEST=100Mi
11 | 


--------------------------------------------------------------------------------
/tests/deploy/env/observatorium-parca.test.env:
--------------------------------------------------------------------------------
 1 | IMAGE=ghcr.io/parca-dev/parca
 2 | PARCA_CPU_REQUEST=100m
 3 | PARCA_MEMORY_REQUEST=500Mi
 4 | PARCA_CPU_LIMITS=200m
 5 | PARCA_MEMORY_LIMITS=1Gi
 6 | ACCESS_KEY_ID=minio
 7 | SECRET_ACCESS_KEY=minio123
 8 | S3_BUCKET_NAME=parca
 9 | S3_BUCKET_ENDPOINT=minio.minio.svc.cluster.local:9000
10 | S3_BUCKET_REGION=eu-central-1
11 | SD_NAMESPACE_LIST='["observatorium-metrics"]'
12 | 


--------------------------------------------------------------------------------
/tests/deploy/env/observatorium.test.env:
--------------------------------------------------------------------------------
 1 | SERVICE_ACCOUNT_NAME=default
 2 | JAEGER_AGENT_IMAGE=jaegertracing/jaeger-agent
 3 | RULES_OBJSTORE_S3_SECRET=rules-objstore-s3
 4 | MANAGED_TENANTS=rhobs
 5 | OBSERVATORIUM_URL=http://observatorium-observatorium-api.observatorium.svc.cluster.local:8080
 6 | 
 7 | GUBERNATOR_REPLICAS=1
 8 | OBSERVATORIUM_API_REPLICAS=1
 9 | UP_REPLICAS=1
10 | 
11 | OBSERVATORIUM_API_CPU_LIMIT=100m
12 | OBSERVATORIUM_API_MEMORY_LIMIT=100Mi
13 | OBSERVATORIUM_API_MEMORY_REQUEST=100Mi
14 | UP_CPU_LIMIT=100m
15 | UP_MEMORY_REQUEST=100Mi
16 | UP_MEMORY_LIMIT=100Mi
17 | 


--------------------------------------------------------------------------------
/tests/deploy/env/rhelemeter.test.env:
--------------------------------------------------------------------------------
 1 | RHELEMETER_SERVER_CPU_LIMIT=200m
 2 | RHELEMETER_SERVER_CPU_REQUEST=100m
 3 | RHELEMETER_SERVER_MEMORY_LIMIT=200Mi
 4 | RHELEMETER_SERVER_MEMORY_REQUEST=100Mi
 5 | RHELEMETER_FORWARD_URL=http://observatorium-observatorium-api.observatorium.svc.cluster.local:8080/api/metrics/v1/test/api/v1/receive
 6 | RHELEMETER_OIDC_ISSUER=http://dex.dex.svc.cluster.local:5556/dex
 7 | RHELEMETER_CLIENT_ID=test
 8 | RHELEMETER_TENANT_ID=test
 9 | RHELEMETER_CLIENT_SECRET=ZXhhbXBsZS1hcHAtc2VjcmV0
10 | RHELEMETER_CLIENT_INFO_PSK=ZXhhbXBsZS1hcHAtc2VjcmV0
11 | 


--------------------------------------------------------------------------------
/tests/deploy/env/telemeter.test.env:
--------------------------------------------------------------------------------
 1 | SERVICE_ACCOUNT_NAME=default
 2 | TELEMETER_FORWARD_URL=http://observatorium-observatorium-api.observatorium.svc.cluster.local:8080/api/metrics/v1/telemeter/api/v1/receive
 3 | 
 4 | REPLICAS=1
 5 | 
 6 | MEMCACHED_CPU_LIMIT=200m
 7 | MEMCACHED_CPU_REQUEST=100m
 8 | MEMCACHED_EXPORTER_CPU_LIMIT=200m
 9 | MEMCACHED_EXPORTER_CPU_REQUEST=100m
10 | MEMCACHED_EXPORTER_MEMORY_LIMIT=200Mi
11 | MEMCACHED_MEMORY_LIMIT=200Mi
12 | MEMCACHED_MEMORY_REQUEST=100Mi
13 | OAUTH_PROXY_CPU_REQUEST=100m
14 | OAUTH_PROXY_MEMORY_REQUEST=100Mi
15 | OAUTH_PROXY_CPU_LIMITS=200m
16 | OAUTH_PROXY_MEMORY_LIMITS=200Mi
17 | TELEMETER_SERVER_CPU_LIMIT=200m
18 | TELEMETER_SERVER_CPU_REQUEST=100m
19 | TELEMETER_SERVER_MEMORY_LIMIT=200Mi
20 | TELEMETER_SERVER_MEMORY_REQUEST=100Mi
21 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/clusterlogforwader.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: logging.openshift.io/v1
 2 | kind: ClusterLogForwarder
 3 | metadata:
 4 |   name: instance
 5 |   namespace: openshift-logging
 6 | spec:
 7 |   inputs:
 8 |   - application:
 9 |       namespaces:
10 |       - observatorium-metrics
11 |       - telemeter
12 |       - observatorium-logs
13 |       - observatorium
14 |     name: send-observatorium-app-logs
15 |   outputs:
16 |   - name: loki-app
17 |     type: loki
18 |     url: https://observatorium-lokistack-gateway-http.observatorium-tools.svc.cluster.local:8080/api/logs/v1/application
19 |   pipelines:
20 |   - inputRefs:
21 |     - send-observatorium-app-logs
22 |     name: observatorium-app-logs
23 |     outputRefs:
24 |     - loki-app
25 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/clusterlogging.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: logging.openshift.io/v1
 2 | kind: ClusterLogging
 3 | metadata:
 4 |   name: instance
 5 |   namespace: openshift-logging
 6 | spec:
 7 |   collection:
 8 |     logs:
 9 |       fluentd: {}
10 |       type: vector
11 |   managementState: Managed
12 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/logging-operator.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Namespace
 3 | metadata:
 4 |   name: openshift-logging
 5 |   annotations:
 6 |     openshift.io/node-selector: ""
 7 |   labels:
 8 |     openshift.io/cluster-monitoring: "true"
 9 | ---
10 | apiVersion: operators.coreos.com/v1
11 | kind: OperatorGroup
12 | metadata:
13 |   name: cluster-logging
14 |   namespace: openshift-logging
15 | spec:
16 |   targetNamespaces:
17 |   - openshift-logging
18 | ---
19 | apiVersion: operators.coreos.com/v1alpha1
20 | kind: Subscription
21 | metadata:
22 |   name: cluster-logging
23 |   namespace: openshift-logging
24 | spec:
25 |   channel: "stable-5.6"
26 |   name: cluster-logging
27 |   source: redhat-operators
28 |   sourceNamespace: openshift-marketplace
29 | 
30 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/loki-operator.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Namespace
 3 | metadata:
 4 |   name: openshift-operators-redhat
 5 |   annotations:
 6 |     openshift.io/node-selector: ""
 7 |   labels:
 8 |     openshift.io/cluster-monitoring: "true"
 9 | ---
10 | apiVersion: operators.coreos.com/v1
11 | kind: OperatorGroup
12 | metadata:
13 |   name: operator-group
14 |   namespace: openshift-operators-redhat
15 | spec:
16 |   targetNamespaces:
17 | ---
18 | apiVersion: operators.coreos.com/v1alpha1
19 | kind: Subscription
20 | metadata:
21 |   name: loki-operator
22 |   namespace: openshift-operators-redhat
23 | spec:
24 |   channel: stable-5.6
25 |   installPlanApproval: Automatic
26 |   name: loki-operator
27 |   source: redhat-operators
28 |   sourceNamespace: openshift-marketplace
29 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/observatorium-alertmanager-config-secret.yaml:
--------------------------------------------------------------------------------
1 | kind: Secret
2 | apiVersion: v1
3 | metadata:
4 |   name: alertmanager-config
5 | data:
6 |   alertmanager.yaml: >-
7 |     Cmdsb2JhbDoKICByZXNvbHZlX3RpbWVvdXQ6IDVtCiAgc2xhY2tfYXBpX3VybDogaHR0cHM6Ly9ob29rcy5zbGFjay5jb20vc2VydmljZXMvVDAyN0YzR0FKL0JGWVBCNTQwWi8xUlU0U2hMZmd4ZEpvMUNCTVpXaXgzRHYKcmVjZWl2ZXJzOgotIG5hbWU6IGRlZmF1bHQKcm91dGU6CiAgZ3JvdXBfaW50ZXJ2YWw6IDVtCiAgZ3JvdXBfd2FpdDogMzBzCiAgcmVjZWl2ZXI6IGRlZmF1bHQKICByZXBlYXRfaW50ZXJ2YWw6IDEyaAogIHJvdXRlczoKICAtIG1hdGNoOgogICAgICB0ZW5hbnRfaWQ6IDBmYzJiMDBlLTIwMWItNGMxNy1iOWYyLTE5ZDkxYWRjNGZkMgp0ZW1wbGF0ZXM6CiAgLSAnKi50bXBsJwo=
8 | type: Opaque
9 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/observatorium-cluster-role-binding.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: rbac.authorization.k8s.io/v1
 2 | kind: ClusterRoleBinding
 3 | metadata:
 4 |   name: observatorium
 5 | subjects:
 6 | - kind: ServiceAccount
 7 |   name: observatorium
 8 |   namespace: observatorium-metrics
 9 | roleRef:
10 |   kind: ClusterRole
11 |   name: observatorium
12 |   apiGroup: rbac.authorization.k8s.io
13 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/observatorium-cluster-role.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: rbac.authorization.k8s.io/v1
 2 | kind: ClusterRole
 3 | metadata:
 4 |   name: observatorium
 5 | rules:
 6 | - apiGroups:
 7 |   - authentication.k8s.io
 8 |   resources:
 9 |   - tokenreviews
10 |   verbs:
11 |   - create
12 | - apiGroups:
13 |   - authorization.k8s.io
14 |   resources:
15 |   - subjectaccessreviews
16 |   verbs:
17 |   - create
18 | - apiGroups:
19 |   - ""
20 |   resourceNames:
21 |   - observatorium-metrics
22 |   resources:
23 |   - namespaces
24 |   verbs:
25 |   - get
26 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/observatorium-logs-secret.yaml:
--------------------------------------------------------------------------------
 1 | kind: Secret
 2 | apiVersion: v1
 3 | metadata:
 4 |   name: rules-objstore-s3
 5 | data:
 6 |   aws_access_key_id: bWluaW8=
 7 |   aws_region: ZXUtY2VudHJhbC0x
 8 |   aws_secret_access_key: bWluaW8xMjM=
 9 |   bucket: cnVsZXM=
10 |   endpoint: bWluaW8ubWluaW8uc3ZjLmNsdXN0ZXIubG9jYWw6OTAwMA==
11 | type: Opaque
12 | ---
13 | kind: Secret
14 | apiVersion: v1
15 | metadata:
16 |   name: observatorium-logs-testing-s3
17 | data:
18 |   aws_access_key_id: bWluaW8=
19 |   aws_region: ZXUtY2VudHJhbC0x
20 |   aws_secret_access_key: bWluaW8xMjM=
21 |   bucket: bG9raQ==
22 |   endpoint: bWluaW8ubWluaW8uc3ZjLmNsdXN0ZXIubG9jYWw6OTAwMA==
23 | type: Opaque
24 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/observatorium-metrics-thanos-objectstorage-secret-template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   name: minio-secret
 5 | objects:
 6 | - apiVersion: v1
 7 |   kind: Secret
 8 |   metadata:
 9 |     name: ${THANOS_CONFIG_SECRET}
10 |     namespace: ${OBSERVATORIUM_METRICS_NAMESPACE}
11 |   stringData:
12 |     thanos.yaml: |
13 |       type: s3
14 |       config:
15 |         bucket: thanos
16 |         endpoint: minio.${MINIO_NAMESPACE}.svc.cluster.local:9000
17 |         insecure: true
18 |         access_key: minio
19 |         secret_key: minio123
20 |   type: Opaque
21 | - apiVersion: v1
22 |   kind: Secret
23 |   metadata:
24 |     name: ${THANOS_S3_SECRET}
25 |     namespace: ${OBSERVATORIUM_METRICS_NAMESPACE}
26 |   data:
27 |     aws_access_key_id: bWluaW8=
28 |     aws_secret_access_key: bWluaW8xMjM=
29 |   type: Opaque
30 | parameters:
31 | - name: MINIO_NAMESPACE
32 |   value: minio
33 | - name: OBSERVATORIUM_METRICS_NAMESPACE
34 |   value: observatorium-metrics
35 | - name: THANOS_CONFIG_SECRET
36 |   value: thanos-objectstorage
37 | - name: THANOS_S3_SECRET
38 |   value: thanos-test-s3
39 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/observatorium-parca-secret.yaml:
--------------------------------------------------------------------------------
1 | kind: Secret
2 | apiVersion: v1
3 | metadata:
4 |   name: conprof-proxy
5 | data:
6 |   session_secret: NjU2MDlmZTFhNWQ0NDgwMDliZTE3YjYxYTVlNjg5OGU=
7 | type: Opaque
8 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/observatorium-rhobs-tenant-secret.yaml:
--------------------------------------------------------------------------------
1 | kind: Secret
2 | apiVersion: v1
3 | metadata:
4 |   name: rhobs-tenant
5 | data:
6 |   client_id: b2JzZXJ2YXRvcml1bS1yaG9icy10ZXN0aW5n
7 |   client_secret: ZjA3OTIxOTctMmNjZS00NTZkLTlmYTItZTM4YTliMTI5NjVh
8 | type: Opaque
9 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/observatorium-rules-objstore-secret.yaml:
--------------------------------------------------------------------------------
 1 | kind: Secret
 2 | apiVersion: v1
 3 | metadata:
 4 |   name: rules-objstore
 5 | data:
 6 |   objstore.yaml: >-
 7 |     dHlwZTogUzMKY29uZmlnOgogIGJ1Y2tldDogInRlbGVtZXRlci1ydWxlcy1vYmpzdG9yZS10ZXN0aW5nIgogIGVuZHBvaW50OiAiczMudXMtZWFzdC0xLmFtYXpvbmF3cy5jb20iCiAgcmVnaW9uOiAidXMtZWFzdC0xIgogIHRyYWNlOgogICAgZW5hYmxlOiBmYWxzZQo=
 8 | type: Opaque
 9 | ---
10 | kind: Secret
11 | apiVersion: v1
12 | metadata:
13 |   name: rules-objstore-s3
14 | data:
15 |   aws_access_key_id: bWluaW8=
16 |   aws_region: ZXUtY2VudHJhbC0x
17 |   aws_secret_access_key: bWluaW8xMjM=
18 |   bucket: cnVsZXM=
19 |   endpoint: bWluaW8ubWluaW8uc3ZjLmNsdXN0ZXIubG9jYWw6OTAwMA==
20 | type: Opaque
21 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/observatorium-service-account.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ServiceAccount
3 | metadata:
4 |   name: observatorium
5 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/observatorium-tools-network-policy.yaml:
--------------------------------------------------------------------------------
 1 | kind: NetworkPolicy
 2 | apiVersion: networking.k8s.io/v1
 3 | metadata:
 4 |   name: allow-from-ingress-namespace
 5 |   namespace: observatorium-tools
 6 | spec:
 7 |   podSelector: {}
 8 |   ingress:
 9 |   - from:
10 |     - namespaceSelector:
11 |         matchLabels:
12 |           network.openshift.io/policy-group: ingress
13 |   policyTypes:
14 |   - Ingress
15 | ---
16 | kind: NetworkPolicy
17 | apiVersion: networking.k8s.io/v1
18 | metadata:
19 |   name: allow-from-openshift-logging-namespace
20 |   namespace: observatorium-tools
21 | spec:
22 |   podSelector: {}
23 |   ingress:
24 |   - from:
25 |     - namespaceSelector:
26 |         matchLabels:
27 |           kubernetes.io/metadata.name: openshift-logging
28 |   policyTypes:
29 |   - Ingress
30 | ---
31 | kind: NetworkPolicy
32 | apiVersion: networking.k8s.io/v1
33 | metadata:
34 |   name: allow-from-same-namespace
35 |   namespace: observatorium-tools
36 | spec:
37 |   podSelector: {}
38 |   ingress:
39 |   - from:
40 |     - podSelector: {}
41 |   policyTypes:
42 |   - Ingress
43 | 
44 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/rhelemeter_certs/ca.crt:
--------------------------------------------------------------------------------
1 | -----BEGIN CERTIFICATE-----
2 | MIIBDzCBtgIJANecoFgWJpRZMAoGCCqGSM49BAMCMA8xDTALBgNVBAMMBHRlc3Qw
3 | IBcNMjMwNzE5MDc0ODM2WhgPMzAwOTAzMTIwNzQ4MzZaMA8xDTALBgNVBAMMBHRl
4 | c3QwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAARYZDE4Kz0ys2KvRo7p3e6/P3Yo
5 | eSkDXJ1DpVWH5+XemuAriGE8pMwij7yTsbmUHHGlnMZNh0Uc+Uiplb5rbeaSMAoG
6 | CCqGSM49BAMCA0gAMEUCIBYKEb0GBppTsRXrVGJqfrzcgqQhpEXWwhg9LQPfiRce
7 | AiEAtpGaoRW5KYA30uNZNabK0U9rfrORYLZhN2ovhpm3+Ko=
8 | -----END CERTIFICATE-----
9 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/rhelemeter_certs/tls.crt:
--------------------------------------------------------------------------------
1 | -----BEGIN CERTIFICATE-----
2 | MIIBDzCBtgIJANecoFgWJpRZMAoGCCqGSM49BAMCMA8xDTALBgNVBAMMBHRlc3Qw
3 | IBcNMjMwNzE5MDc0ODM2WhgPMzAwOTAzMTIwNzQ4MzZaMA8xDTALBgNVBAMMBHRl
4 | c3QwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAARYZDE4Kz0ys2KvRo7p3e6/P3Yo
5 | eSkDXJ1DpVWH5+XemuAriGE8pMwij7yTsbmUHHGlnMZNh0Uc+Uiplb5rbeaSMAoG
6 | CCqGSM49BAMCA0gAMEUCIBYKEb0GBppTsRXrVGJqfrzcgqQhpEXWwhg9LQPfiRce
7 | AiEAtpGaoRW5KYA30uNZNabK0U9rfrORYLZhN2ovhpm3+Ko=
8 | -----END CERTIFICATE-----
9 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/rhelemeter_certs/tls.key:
--------------------------------------------------------------------------------
1 | -----BEGIN EC PRIVATE KEY-----
2 | MHcCAQEEIO5yfP9d0RcEzTTeM732EWnGEqWYlvu+JaOEpRXYsHaloAoGCCqGSM49
3 | AwEHoUQDQgAEWGQxOCs9MrNir0aO6d3uvz92KHkpA1ydQ6VVh+fl3prgK4hhPKTM
4 | Io+8k7G5lBxxpZzGTYdFHPlIqZW+a23mkg==
5 | -----END EC PRIVATE KEY-----
6 | 


--------------------------------------------------------------------------------
/tests/deploy/manifests/telemeter-token-refersher-oidc-secret.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Secret
 3 | metadata:
 4 |   labels:
 5 |     k8s-app: telemeter-server
 6 |   name: token-refresher-oidc
 7 | type: Opaque
 8 | stringData:
 9 |   audience: test
10 |   clientID: test
11 |   clientSecret: ZXhhbXBsZS1hcHAtc2VjcmV0
12 |   issuerURL: http://dex.dex.svc.cluster.local:5556/dex
13 | 


--------------------------------------------------------------------------------
/tests/deploy/testdata/client-info.json:
--------------------------------------------------------------------------------
1 | {
2 |   "secret": "super-secret",
3 |   "config": {
4 |     "secret_header": "x-secret",
5 |     "common_name_header": "x-common-name",
6 |     "issuer_header": "x-issuer"
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/tests/integration_tests/Dockerfile:
--------------------------------------------------------------------------------
 1 | # This is a workaround since `up` images are now built with scratch,
 2 | # meaning we cannot execture other commands. This copies `up` binary
 3 | # and runs the tests in an Alpine-based container.
 4 | FROM quay.io/observatorium/up:master-2022-07-13-7f0630b as source
 5 | 
 6 | FROM quay.io/app-sre/ubi8-ubi-minimal
 7 | 
 8 | COPY --from=source /usr/bin/up /usr/bin/up
 9 | 
10 | RUN microdnf update -y &&\
11 |     microdnf install -y curl jq
12 | 
13 | COPY ./tests/integration_tests/runtest.sh /tests/runtest.sh
14 | 
15 | WORKDIR /tests
16 | ENTRYPOINT ["/bin/sh", "runtest.sh"]
17 | 


--------------------------------------------------------------------------------
/tests/integration_tests/README.md:
--------------------------------------------------------------------------------
1 | ## RHOBS post-deploy job
2 | 
3 | This directory includes definition for a post-deploy job that is supposed to be run after each deployment.
4 | It consists of:
5 | 1) Post-deploy OpenShift job template (`post-deploy-job-template.yaml`) that is leveraged by AppSRE Interface. The usage is defined in an Saas file [here](https://gitlab.cee.redhat.com/service/app-interface/-/tree/master/data/services/rhobs/observatorium/cicd/saas-post-deploy-test.yaml).
6 | 2) Dockerfile to run the test in a container. The tests are based on the `up` Docker image, with few additions to be able to `curl` to obtain a bearer token for the test. The Docker image is built and pushed with help of AppSRE Interface integration, see the relevant [Jenkins config file]().
7 | 3) The actual test is specified in the `runtest.sh` script. Currently, this is a bare-bones, simple `up` run, which means the test will try to write a couple of requests and subsequently read those metrics.
8 | 
9 | To see the exact template usage, check the [Saas file definition](https://gitlab.cee.redhat.com/service/app-interface/-/tree/master/data/services/rhobs/observatorium/cicd/saas-post-deploy-test.yaml) in AppSRE Interface. The tests are currently set up to run only in `observatorium-stage`. So far, no automatic deployment promotion has been enabled, as we'll first test and assess how the post-deploy job is functioining in the staging environment.


--------------------------------------------------------------------------------
/tests/integration_tests/build_deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -exv
 4 | 
 5 | # We need to find absolute path since CI is running the script from repo's root directory.
 6 | ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 7 | 
 8 | IMAGE="quay.io/app-sre/rhobs-e2e"
 9 | IMAGE_TAG=$(git rev-parse --short=7 HEAD)
10 | 
11 | docker build -t "${IMAGE}:${IMAGE_TAG}" -f "${ABSOLUTE_PATH}/Dockerfile" .
12 | 
13 | if [[ -n "$QUAY_USER" && -n "$QUAY_TOKEN" ]]; then
14 |     DOCKER_CONF="$PWD/.docker"
15 |     mkdir -p "$DOCKER_CONF"
16 |     docker --config="$DOCKER_CONF" login -u="$QUAY_USER" -p="$QUAY_TOKEN" quay.io
17 |     docker --config="$DOCKER_CONF" push "${IMAGE}:${IMAGE_TAG}"
18 | fi


--------------------------------------------------------------------------------
/tests/integration_tests/framework/.bingo/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | # Ignore everything
 3 | *
 4 | 
 5 | # But not these files:
 6 | !.gitignore
 7 | !*.mod
 8 | !*.sum
 9 | !README.md
10 | !Variables.mk
11 | !variables.env
12 | 
13 | *tmp.mod
14 | 


--------------------------------------------------------------------------------
/tests/integration_tests/framework/.bingo/README.md:
--------------------------------------------------------------------------------
 1 | # Project Development Dependencies.
 2 | 
 3 | This is directory which stores Go modules with pinned buildable package that is used within this repository, managed by https://github.com/bwplotka/bingo.
 4 | 
 5 | * Run `bingo get` to install all tools having each own module file in this directory.
 6 | * Run `bingo get <tool>` to install <tool> that have own module file in this directory.
 7 | * For Makefile: Make sure to put `include .bingo/Variables.mk` in your Makefile, then use $(<upper case tool name>) variable where <tool> is the .bingo/<tool>.mod.
 8 | * For shell: Run `source .bingo/variables.env` to source all environment variable for each tool.
 9 | * For go: Import `.bingo/variables.go` to for variable names.
10 | * See https://github.com/bwplotka/bingo or -h on how to add, remove or change binaries dependencies.
11 | 
12 | ## Requirements
13 | 
14 | * Go 1.14+
15 | 


--------------------------------------------------------------------------------
/tests/integration_tests/framework/.bingo/go.mod:
--------------------------------------------------------------------------------
1 | module _ // Fake go.mod auto-created by 'bingo' for go -moddir compatibility with non-Go projects. Commit this file, together with other .mod files.


--------------------------------------------------------------------------------
/tests/integration_tests/framework/.bingo/gojsontoyaml.mod:
--------------------------------------------------------------------------------
1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
2 | 
3 | go 1.19
4 | 
5 | require github.com/brancz/gojsontoyaml v0.1.0
6 | 


--------------------------------------------------------------------------------
/tests/integration_tests/framework/.bingo/gojsontoyaml.sum:
--------------------------------------------------------------------------------
1 | github.com/brancz/gojsontoyaml v0.1.0 h1:SdzR3+BCVOqaI42nFGTeaB7/2DgDM4fhuvRLqxatA8M=
2 | github.com/brancz/gojsontoyaml v0.1.0/go.mod h1:+ycZY94+V11XZBUaDEsbLr3hPNS/ZPrDVKKNUg3Sgvg=
3 | github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
4 | github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
5 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
6 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
7 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
8 | 


--------------------------------------------------------------------------------
/tests/integration_tests/framework/.bingo/jsonnet-lint.mod:
--------------------------------------------------------------------------------
1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
2 | 
3 | go 1.19
4 | 
5 | require github.com/google/go-jsonnet v0.20.0 // cmd/jsonnet-lint
6 | 


--------------------------------------------------------------------------------
/tests/integration_tests/framework/.bingo/jsonnet-lint.sum:
--------------------------------------------------------------------------------
 1 | github.com/fatih/color v1.12.0 h1:mRhaKNwANqRgUBGKmnI5ZxEk7QXmjQeCcuYFMX2bfcc=
 2 | github.com/fatih/color v1.12.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM=
 3 | github.com/google/go-jsonnet v0.20.0 h1:WG4TTSARuV7bSm4PMB4ohjxe33IHT5WVTrJSU33uT4g=
 4 | github.com/google/go-jsonnet v0.20.0/go.mod h1:VbgWF9JX7ztlv770x/TolZNGGFfiHEVx9G6ca2eUmeA=
 5 | github.com/mattn/go-colorable v0.1.8 h1:c1ghPdyEDarC70ftn0y+A/Ee++9zz8ljHG1b13eJ0s8=
 6 | github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
 7 | github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY=
 8 | github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
 9 | golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
10 | golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
11 | golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
12 | golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
13 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
14 | gopkg.in/yaml.v2 v2.2.7 h1:VUgggvou5XRW9mHwD/yXxIYSMtY0zoKQf/v226p2nyo=
15 | gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
16 | sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs=
17 | sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=
18 | 


--------------------------------------------------------------------------------
/tests/integration_tests/framework/.bingo/jsonnet.mod:
--------------------------------------------------------------------------------
1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
2 | 
3 | go 1.19
4 | 
5 | require github.com/google/go-jsonnet v0.20.0 // cmd/jsonnet
6 | 


--------------------------------------------------------------------------------
/tests/integration_tests/framework/.bingo/jsonnet.sum:
--------------------------------------------------------------------------------
 1 | github.com/fatih/color v1.12.0 h1:mRhaKNwANqRgUBGKmnI5ZxEk7QXmjQeCcuYFMX2bfcc=
 2 | github.com/fatih/color v1.12.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM=
 3 | github.com/google/go-jsonnet v0.20.0 h1:WG4TTSARuV7bSm4PMB4ohjxe33IHT5WVTrJSU33uT4g=
 4 | github.com/google/go-jsonnet v0.20.0/go.mod h1:VbgWF9JX7ztlv770x/TolZNGGFfiHEVx9G6ca2eUmeA=
 5 | github.com/mattn/go-colorable v0.1.8 h1:c1ghPdyEDarC70ftn0y+A/Ee++9zz8ljHG1b13eJ0s8=
 6 | github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
 7 | github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY=
 8 | github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
 9 | golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
10 | golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
11 | golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
12 | golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
13 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
14 | gopkg.in/yaml.v2 v2.2.7 h1:VUgggvou5XRW9mHwD/yXxIYSMtY0zoKQf/v226p2nyo=
15 | gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
16 | sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs=
17 | sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=
18 | 


--------------------------------------------------------------------------------
/tests/integration_tests/framework/.bingo/jsonnetfmt.mod:
--------------------------------------------------------------------------------
1 | module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT
2 | 
3 | go 1.19
4 | 
5 | require github.com/google/go-jsonnet v0.20.0 // cmd/jsonnetfmt
6 | 


--------------------------------------------------------------------------------
/tests/integration_tests/framework/.bingo/jsonnetfmt.sum:
--------------------------------------------------------------------------------
 1 | github.com/fatih/color v1.12.0 h1:mRhaKNwANqRgUBGKmnI5ZxEk7QXmjQeCcuYFMX2bfcc=
 2 | github.com/fatih/color v1.12.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM=
 3 | github.com/google/go-jsonnet v0.20.0 h1:WG4TTSARuV7bSm4PMB4ohjxe33IHT5WVTrJSU33uT4g=
 4 | github.com/google/go-jsonnet v0.20.0/go.mod h1:VbgWF9JX7ztlv770x/TolZNGGFfiHEVx9G6ca2eUmeA=
 5 | github.com/mattn/go-colorable v0.1.8 h1:c1ghPdyEDarC70ftn0y+A/Ee++9zz8ljHG1b13eJ0s8=
 6 | github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
 7 | github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY=
 8 | github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
 9 | golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
10 | golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
11 | golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
12 | golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
13 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
14 | gopkg.in/yaml.v2 v2.2.7 h1:VUgggvou5XRW9mHwD/yXxIYSMtY0zoKQf/v226p2nyo=
15 | gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
16 | sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs=
17 | sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=
18 | 


--------------------------------------------------------------------------------
/tests/integration_tests/framework/.bingo/variables.env:
--------------------------------------------------------------------------------
 1 | # Auto generated binary variables helper managed by https://github.com/bwplotka/bingo v0.8. DO NOT EDIT.
 2 | # All tools are designed to be build inside $GOBIN.
 3 | # Those variables will work only until 'bingo get' was invoked, or if tools were installed via Makefile's Variables.mk.
 4 | GOBIN=${GOBIN:=$(go env GOBIN)}
 5 | 
 6 | if [ -z "$GOBIN" ]; then
 7 | 	GOBIN="$(go env GOPATH)/bin"
 8 | fi
 9 | 
10 | 
11 | GOJSONTOYAML="${GOBIN}/gojsontoyaml-v0.1.0"
12 | 
13 | JSONNET_LINT="${GOBIN}/jsonnet-lint-v0.20.0"
14 | 
15 | JSONNET="${GOBIN}/jsonnet-v0.20.0"
16 | 
17 | JSONNETFMT="${GOBIN}/jsonnetfmt-v0.20.0"
18 | 
19 | 


--------------------------------------------------------------------------------
/tests/integration_tests/framework/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM golang:1.20-alpine
 2 | RUN apk update && apk add git
 3 | 
 4 | WORKDIR /integration-tests
 5 | 
 6 | COPY go.mod go.sum ./
 7 | RUN go mod download
 8 | 
 9 | COPY . .
10 | 
11 | RUN go build ./cmd/rhobs-test
12 | ENTRYPOINT [ "./rhobs-test" ]
13 | 


--------------------------------------------------------------------------------
/tests/integration_tests/framework/examples/manifests/dev/test-job.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | items:
 3 | - apiVersion: batch/v1
 4 |   kind: Job
 5 |   metadata:
 6 |     labels:
 7 |       app.kubernetes.io/component: test
 8 |       app.kubernetes.io/instance: rhobs-test
 9 |       app.kubernetes.io/name: rhobs-test-job
10 |     name: rhobs-test-job
11 |   spec:
12 |     template:
13 |       metadata:
14 |         labels:
15 |           app.kubernetes.io/component: test
16 |           app.kubernetes.io/instance: rhobs-test
17 |           app.kubernetes.io/name: rhobs-test-job
18 |       spec:
19 |         containers:
20 |         - args:
21 |           - --namespaces=prometheus-example
22 |           - --interval=5s
23 |           - --timeout=60s
24 |           image: localhost:5001/rhobs-test:latest
25 |           name: rhobs-test-job
26 |           resources: {}
27 |           volumeMounts: []
28 |         initContainers: []
29 |         restartPolicy: OnFailure
30 |         serviceAccountName: rhobs-test-job
31 |         volumes: []
32 | kind: List
33 | 


--------------------------------------------------------------------------------
/tests/integration_tests/framework/examples/manifests/openshift/rhobs-test-job-template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: template.openshift.io/v1
 2 | kind: Template
 3 | metadata:
 4 |   name: rhobs-test-job
 5 | objects:
 6 | - apiVersion: batch/v1
 7 |   kind: Job
 8 |   metadata:
 9 |     labels:
10 |       app.kubernetes.io/component: test
11 |       app.kubernetes.io/instance: rhobs-test
12 |       app.kubernetes.io/name: ${JOB_NAME}
13 |     name: ${JOB_NAME}
14 |   spec:
15 |     template:
16 |       metadata:
17 |         labels:
18 |           app.kubernetes.io/component: test
19 |           app.kubernetes.io/instance: rhobs-test
20 |           app.kubernetes.io/name: ${JOB_NAME}
21 |       spec:
22 |         containers:
23 |         - args:
24 |           - --namespaces=${JOB_NAMESPACES}
25 |           - --interval=${JOB_INTERVAL}
26 |           - --timeout=${JOB_TIMEOUT}
27 |           image: ${JOB_IMAGE}:${JOB_IMAGE_TAG}
28 |           name: ${JOB_NAME}
29 |           resources: {}
30 |           volumeMounts: []
31 |         initContainers: []
32 |         restartPolicy: OnFailure
33 |         serviceAccountName: ${SERVICE_ACCOUNT_NAME}
34 |         volumes: []
35 | parameters:
36 | - name: JOB_NAMESPACES
37 |   value: observatorium,observatorium-metrics,observatorium-logs,minio,dex,telemeter
38 | - name: JOB_NAME
39 |   value: rhobs-test-job
40 | - name: JOB_INTERVAL
41 |   value: 10s
42 | - name: JOB_TIMEOUT
43 |   value: 1m
44 | - name: JOB_IMAGE
45 |   value: quay.io/app-sre/rhobs-test
46 | - name: JOB_IMAGE_TAG
47 |   value: latest
48 | - name: SERVICE_ACCOUNT_NAME
49 |   value: rhobs-test-job
50 | 


--------------------------------------------------------------------------------
/tests/integration_tests/framework/integration-test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rhobs/configuration/6d2b136e48c291c1a415394fed40219188620665/tests/integration_tests/framework/integration-test.png


--------------------------------------------------------------------------------
/tests/integration_tests/framework/pkg/client/client.go:
--------------------------------------------------------------------------------
 1 | package client
 2 | 
 3 | import (
 4 | 	"github.com/rhobs/configuration/tests/integration_tests/framework/pkg/logger"
 5 | 
 6 | 	"k8s.io/client-go/kubernetes"
 7 | 	"k8s.io/client-go/rest"
 8 | 	"k8s.io/client-go/tools/clientcmd"
 9 | )
10 | 
11 | func client(kubeconfig string) *kubernetes.Clientset {
12 | 	var (
13 | 		config *rest.Config
14 | 		err    error
15 | 	)
16 | 	// If no kubeconfig file specified, then use in-cluster config
17 | 	if kubeconfig == "" {
18 | 		config, err = rest.InClusterConfig()
19 | 		if err != nil {
20 | 			logger.AppLog.LogFatal("Error getting in-cluster config: %v\n", err)
21 | 		}
22 | 	} else {
23 | 		// If kubeconfig file is specified, then use it
24 | 		config, err = clientcmd.BuildConfigFromFlags("", kubeconfig)
25 | 		if err != nil {
26 | 			logger.AppLog.LogFatal("Error building kubeconfig from file %s: %v\n", kubeconfig, err)
27 | 		}
28 | 	}
29 | 	// Create Kubernetes clientset
30 | 	clientset, err := kubernetes.NewForConfig(config)
31 | 	if err != nil {
32 | 		logger.AppLog.LogFatal("Error creating Kubernetes clientset: %v\n", err)
33 | 	}
34 | 	return clientset
35 | }
36 | func GetClient(kubeconfig string) *kubernetes.Clientset {
37 | 	return client(kubeconfig)
38 | }
39 | 


--------------------------------------------------------------------------------