├── VERSION ├── OWNERS ├── cmd └── OWNERS ├── docs ├── OWNERS ├── design │ ├── assets │ │ ├── release.png │ │ └── release.uml │ └── release.md ├── user-guides │ ├── federation │ │ ├── manifests │ │ │ ├── 00-ns.yaml │ │ │ ├── 11-crb.yaml │ │ │ ├── 10-ms.yaml │ │ │ └── 20-smon-cmo.yaml │ │ └── assets │ │ │ └── cmo-obo-federation.png │ ├── perses-dashboards │ │ ├── dashboard │ │ │ ├── 00-namespace.yaml │ │ │ ├── 01-datasource.yaml │ │ │ └── 02-dashboard.yaml │ │ ├── assets │ │ │ └── console.png │ │ └── plugin │ │ │ └── 00-plugin.yaml │ ├── thanos_querier │ │ ├── console │ │ │ ├── 00-plugin.yaml │ │ │ ├── 01-datasource.yaml │ │ │ └── 03_dashboard.yaml │ │ └── install │ │ │ ├── 03-thanosquerier.yaml │ │ │ ├── 00-namespaces.yaml │ │ │ ├── 01-monitoringstacks.yaml │ │ │ ├── 05-monitors.yaml │ │ │ ├── 06_load_generator.yaml │ │ │ └── 04-applications.yaml │ ├── README.md │ └── thanos_querier.md └── assess-resources.md ├── pkg ├── OWNERS ├── apis │ ├── monitoring │ │ ├── OWNERS │ │ └── v1alpha1 │ │ │ └── register.go │ ├── observability │ │ ├── OWNERS │ │ └── v1alpha1 │ │ │ ├── register.go │ │ │ └── tracing.go │ ├── go.mod │ └── uiplugin │ │ └── v1alpha1 │ │ └── register.go ├── controllers │ ├── monitoring │ │ ├── OWNERS │ │ └── monitoring-stack │ │ │ ├── testdata │ │ │ ├── no-tls │ │ │ └── tls │ │ │ └── components_test.go │ ├── uiplugin │ │ ├── OWNERS │ │ ├── config │ │ │ └── korrel8r.yaml │ │ ├── components_test.go │ │ ├── dashboards.go │ │ ├── plugin_info_builder.go │ │ └── distributed_tracing.go │ ├── util │ │ └── common.go │ ├── observability │ │ ├── olm_components.go │ │ └── collector.yaml │ └── operator │ │ ├── components.go │ │ └── controller.go ├── assets │ └── loader.go ├── reconciler │ ├── create_update_reconciler.go │ └── reconciler.go └── operator │ └── scheme.go ├── test ├── OWNERS ├── e2e │ ├── traces_telemetrygen.yaml │ ├── operator_metrics_test.go │ ├── framework │ │ ├── monitoring_stack.go │ │ └── prometheus_client.go │ ├── traces_minio.yaml │ ├── po_admission_webhook_test.go │ ├── traces_tempo_readiness.yaml │ ├── traces_verify.yaml │ ├── uiplugin_test.go │ └── main_test.go ├── osd-e2e.sh ├── Dockerfile ├── lib │ └── utils.bash └── run-e2e-ocp.sh ├── deploy ├── package-operator │ ├── package.Containerfile │ ├── crds │ │ ├── namespace.yaml │ │ └── kustomization.yaml │ ├── dependencies │ │ ├── patches │ │ │ └── admission-webhook-service-namespace.yaml │ │ └── kustomization.yaml │ ├── operator-kubeconfig │ │ ├── kustomization.yaml │ │ └── patches │ │ │ └── kubeconfig-volume.yml │ ├── dependencies-kubeconfig │ │ ├── kustomization.yaml │ │ └── patches │ │ │ └── kubeconfig-volume.yml │ ├── operator │ │ └── kustomization.yaml │ └── package │ │ └── manifest.yaml ├── perses │ ├── perses-service-account.yaml │ ├── perses-operator-service-account.yaml │ ├── crds │ │ └── kustomization.yaml │ ├── perses-operator-cluster-role-binding.yaml │ ├── kustomization.yaml │ ├── persesdashboard_viewer_role.yaml │ ├── persesdatasource_viewer_role.yaml │ ├── persesdashboard_editor_role.yaml │ ├── persesdatasource_editor_role.yaml │ ├── perses-operator-deployment.yaml │ └── perses-operator-cluster-role.yaml ├── operator │ ├── observability-operator-service-account.yaml │ ├── observability-operator-service.yaml │ ├── observability-operator-cluster-role-binding.yaml │ ├── kustomization.yaml │ └── observability-operator-deployment.yaml ├── monitoring │ ├── kustomization.yaml │ └── observability-operator-rules.yaml ├── samples │ ├── kustomization.yaml │ ├── package.yaml │ ├── thanos-querier.yaml │ ├── ui-monitoring-plugin.yaml │ ├── monitoring-stack.yaml │ ├── multi-ns-stack.yaml │ └── observability-installer.yaml ├── scorecard │ ├── bases │ │ └── config.yaml │ ├── patches │ │ ├── basic.config.yaml │ │ └── olm.config.yaml │ └── kustomization.yaml ├── dependencies │ ├── patches │ │ ├── api-support-techpreview-annotation.yaml │ │ └── api-support-experimental-annotation.yaml │ └── admission-webhook │ │ ├── cluster-role.yaml │ │ ├── cluster-role-binding.yaml │ │ ├── prometheus-rule-validating-webhook.yaml │ │ └── alertmanager-config-validating-webhook.yaml ├── crds │ ├── common │ │ └── kustomization.yaml │ └── kubernetes │ │ ├── kustomization.yaml │ │ └── monitoring.coreos.com_prometheusrules.yaml └── olm │ └── kustomization.yaml ├── hack ├── kind │ ├── audit-policy.yaml │ ├── operator-metrics-service.yaml │ ├── setup.sh │ ├── registry.yaml │ └── config.yaml ├── olm │ ├── k8s │ │ ├── subscription.yaml │ │ └── catalog-src.yaml │ ├── subscription.yaml │ └── catalog-src.yaml ├── boilerplate.go.txt ├── loadtest │ └── test.sh ├── update-obo-prometheus-operator.sh ├── dev-deploy.sh └── README.md ├── bundle ├── manifests │ ├── perses_v1_serviceaccount.yaml │ ├── observability-operator_rbac.authorization.k8s.io_v1_rolebinding.yaml │ ├── obo-prometheus-operator-admission-webhook_policy_v1_poddisruptionbudget.yaml │ ├── obo-prometheus-operator-admission-webhook_v1_service.yaml │ ├── observability-operator_v1_service.yaml │ ├── obo-prometheus-operator_v1_service.yaml │ ├── persesdashboard-viewer-role_rbac.authorization.k8s.io_v1_clusterrole.yaml │ ├── persesdatasource-viewer-role_rbac.authorization.k8s.io_v1_clusterrole.yaml │ ├── persesdashboard-editor-role_rbac.authorization.k8s.io_v1_clusterrole.yaml │ ├── persesdatasource-editor-role_rbac.authorization.k8s.io_v1_clusterrole.yaml │ └── observability-operator_monitoring.coreos.com_v1_prometheusrule.yaml ├── metadata │ └── annotations.yaml └── tests │ └── scorecard │ └── config.yaml ├── .gitignore ├── dashboards └── README.md ├── .github ├── tools ├── workflows │ ├── package-operator-stable.yaml │ ├── olm-stable.yaml │ └── pr-checks.yaml ├── dependabot.yml ├── tools-cache │ └── action.yaml ├── osd-test-harness-publish │ └── action.yaml ├── package-operator-publish │ └── action.yaml ├── e2e-tests-olm │ └── action.yaml └── olm-publish │ └── action.yaml ├── .golangci.yml ├── OWNERS_ALIASES ├── commitlint.config.mjs ├── .versionrc ├── must-gather ├── collection-scripts │ ├── common.sh │ └── gather └── README.md ├── bundle.Dockerfile ├── DEPENDENCY_CONSTRAINTS.md └── README.md /VERSION: -------------------------------------------------------------------------------- 1 | 1.3.0 -------------------------------------------------------------------------------- /OWNERS: -------------------------------------------------------------------------------- 1 | approvers: 2 | - maintainers 3 | reviewers: 4 | - maintainers 5 | -------------------------------------------------------------------------------- /cmd/OWNERS: -------------------------------------------------------------------------------- 1 | approvers: 2 | - contributors 3 | reviewers: 4 | - contributors 5 | -------------------------------------------------------------------------------- /docs/OWNERS: -------------------------------------------------------------------------------- 1 | approvers: 2 | - contributors 3 | reviewers: 4 | - contributors 5 | -------------------------------------------------------------------------------- /pkg/OWNERS: -------------------------------------------------------------------------------- 1 | approvers: 2 | - contributors 3 | reviewers: 4 | - contributors 5 | -------------------------------------------------------------------------------- /pkg/apis/monitoring/OWNERS: -------------------------------------------------------------------------------- 1 | approvers: 2 | - mon 3 | reviewers: 4 | - mon 5 | -------------------------------------------------------------------------------- /test/OWNERS: -------------------------------------------------------------------------------- 1 | approvers: 2 | - contributors 3 | reviewers: 4 | - contributors 5 | -------------------------------------------------------------------------------- /deploy/package-operator/package.Containerfile: -------------------------------------------------------------------------------- 1 | FROM scratch 2 | 3 | ADD . /package 4 | -------------------------------------------------------------------------------- /pkg/controllers/monitoring/OWNERS: -------------------------------------------------------------------------------- 1 | approvers: 2 | - mon 3 | reviewers: 4 | - mon 5 | -------------------------------------------------------------------------------- /pkg/apis/observability/OWNERS: -------------------------------------------------------------------------------- 1 | approvers: 2 | - cluster-obs 3 | reviewers: 4 | - cluster-obs 5 | -------------------------------------------------------------------------------- /hack/kind/audit-policy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: audit.k8s.io/v1 2 | kind: Policy 3 | rules: 4 | - level: Metadata -------------------------------------------------------------------------------- /docs/design/assets/release.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhobs/observability-operator/HEAD/docs/design/assets/release.png -------------------------------------------------------------------------------- /docs/user-guides/federation/manifests/00-ns.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: federate-cmo 5 | -------------------------------------------------------------------------------- /pkg/controllers/uiplugin/OWNERS: -------------------------------------------------------------------------------- 1 | filters: 2 | ".*": 3 | approvers: 4 | - ui 5 | reviewers: 6 | - ui 7 | -------------------------------------------------------------------------------- /deploy/perses/perses-service-account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | creationTimestamp: null 5 | name: perses 6 | -------------------------------------------------------------------------------- /bundle/manifests/perses_v1_serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | creationTimestamp: null 5 | name: perses 6 | -------------------------------------------------------------------------------- /docs/user-guides/perses-dashboards/dashboard/00-namespace.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: perses-example 6 | -------------------------------------------------------------------------------- /deploy/operator/observability-operator-service-account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: observability-operator-sa 5 | -------------------------------------------------------------------------------- /deploy/monitoring/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - observability-operator-rules.yaml 5 | -------------------------------------------------------------------------------- /deploy/perses/perses-operator-service-account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: perses-operator 5 | namespace: default 6 | -------------------------------------------------------------------------------- /docs/user-guides/perses-dashboards/assets/console.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhobs/observability-operator/HEAD/docs/user-guides/perses-dashboards/assets/console.png -------------------------------------------------------------------------------- /deploy/samples/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - monitoring-stack.yaml 5 | - thanos-querier.yaml 6 | -------------------------------------------------------------------------------- /docs/user-guides/federation/assets/cmo-obo-federation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhobs/observability-operator/HEAD/docs/user-guides/federation/assets/cmo-obo-federation.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /tmp 2 | RELEASE_BODY.md 3 | jsonnet/vendor 4 | deploy/package-operator/package/crds 5 | deploy/package-operator/package/dependencies 6 | deploy/package-operator/package/operator 7 | -------------------------------------------------------------------------------- /deploy/package-operator/crds/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: observability-operator 5 | annotations: 6 | package-operator.run/phase: crds 7 | -------------------------------------------------------------------------------- /deploy/package-operator/dependencies/patches/admission-webhook-service-namespace.yaml: -------------------------------------------------------------------------------- 1 | - op: replace 2 | path: /webhooks/0/clientConfig/service/namespace 3 | value: observability-operator 4 | -------------------------------------------------------------------------------- /deploy/scorecard/bases/config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: scorecard.operatorframework.io/v1alpha3 2 | kind: Configuration 3 | metadata: 4 | name: config 5 | stages: 6 | - parallel: true 7 | tests: [] 8 | -------------------------------------------------------------------------------- /deploy/samples/package.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: package-operator.run/v1alpha1 2 | kind: ClusterPackage 3 | metadata: 4 | name: example 5 | spec: 6 | image: quay.io/observability-operator/observability-operator-package:latest 7 | -------------------------------------------------------------------------------- /deploy/samples/thanos-querier.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.rhobs/v1alpha1 2 | kind: ThanosQuerier 3 | metadata: 4 | name: example-thanos 5 | spec: 6 | selector: 7 | matchLabels: 8 | mso: example 9 | 10 | 11 | -------------------------------------------------------------------------------- /dashboards/README.md: -------------------------------------------------------------------------------- 1 | This directory holds dashboards deployed to the App-SRE Grafana instances. 2 | 3 | See https://gitlab.cee.redhat.com/service/app-interface/-/blob/master/docs/app-sre/monitoring.md#adding-dashboards for more details. 4 | -------------------------------------------------------------------------------- /deploy/dependencies/patches/api-support-techpreview-annotation.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiextensions.k8s.io/v1 2 | kind: CustomResourceDefinition 3 | metadata: 4 | name: IGNORED 5 | annotations: 6 | observability.openshift.io/api-support: "TechPreview" 7 | -------------------------------------------------------------------------------- /.github/tools: -------------------------------------------------------------------------------- 1 | # DO NOT EDIT! Autogenerated by make tools 2 | 3 | golangci-lint v1.63.4 4 | controller-gen v0.19.0 5 | kustomize v5.5.0 6 | oc v4.8.11 7 | operator-sdk v1.41.1 8 | opm v1.57.0 9 | promq v0.0.1 10 | crdoc v0.6.2 11 | shellcheck 0.11.0 12 | -------------------------------------------------------------------------------- /deploy/dependencies/patches/api-support-experimental-annotation.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiextensions.k8s.io/v1 2 | kind: CustomResourceDefinition 3 | metadata: 4 | name: IGNORED 5 | annotations: 6 | observability.openshift.io/api-support: "Experimental-SSA" 7 | -------------------------------------------------------------------------------- /deploy/package-operator/operator-kubeconfig/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - ../operator/ 5 | patches: 6 | - path: patches/kubeconfig-volume.yml 7 | target: 8 | kind: Deployment 9 | -------------------------------------------------------------------------------- /docs/user-guides/thanos_querier/console/00-plugin.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: observability.openshift.io/v1alpha1 2 | kind: UIPlugin 3 | metadata: 4 | name: monitoring 5 | spec: 6 | type: Monitoring 7 | monitoring: 8 | perses: 9 | enabled: true 10 | -------------------------------------------------------------------------------- /docs/user-guides/perses-dashboards/plugin/00-plugin.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: observability.openshift.io/v1alpha1 2 | kind: UIPlugin 3 | metadata: 4 | name: monitoring 5 | spec: 6 | type: Monitoring 7 | monitoring: 8 | perses: 9 | enabled: true 10 | -------------------------------------------------------------------------------- /deploy/package-operator/dependencies-kubeconfig/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | patches: 4 | - path: patches/kubeconfig-volume.yml 5 | target: 6 | kind: Deployment 7 | resources: 8 | - ../dependencies/ 9 | -------------------------------------------------------------------------------- /deploy/package-operator/crds/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: observability-operator 4 | resources: 5 | - namespace.yaml 6 | - ../../crds/kubernetes 7 | commonAnnotations: 8 | package-operator.run/phase: crds 9 | -------------------------------------------------------------------------------- /deploy/samples/ui-monitoring-plugin.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: observability.openshift.io/v1alpha1 2 | kind: UIPlugin 3 | metadata: 4 | name: monitoring 5 | spec: 6 | type: Monitoring 7 | monitoring: 8 | perses: 9 | enabled: true 10 | incidents: 11 | enabled: true 12 | -------------------------------------------------------------------------------- /deploy/samples/monitoring-stack.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.rhobs/v1alpha1 2 | kind: MonitoringStack 3 | metadata: 4 | name: sample-monitoring-stack 5 | labels: 6 | mso: example 7 | spec: 8 | logLevel: debug 9 | retention: 1d 10 | resourceSelector: 11 | matchLabels: 12 | app: demo 13 | -------------------------------------------------------------------------------- /deploy/perses/crds/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - perses.dev_perses.yaml 5 | - perses.dev_persesdashboards.yaml 6 | - perses.dev_persesdatasources.yaml 7 | 8 | commonAnnotations: 9 | observability.openshift.io/api-support: DevPreview 10 | -------------------------------------------------------------------------------- /deploy/scorecard/patches/basic.config.yaml: -------------------------------------------------------------------------------- 1 | - op: add 2 | path: /stages/0/tests/- 3 | value: 4 | entrypoint: 5 | - scorecard-test 6 | - basic-check-spec 7 | image: quay.io/operator-framework/scorecard-test:v1.13.0 8 | labels: 9 | suite: basic 10 | test: basic-check-spec-test 11 | -------------------------------------------------------------------------------- /deploy/crds/common/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - monitoring.rhobs_monitoringstacks.yaml 5 | - monitoring.rhobs_thanosqueriers.yaml 6 | - observability.openshift.io_uiplugins.yaml 7 | - observability.openshift.io_observabilityinstallers.yaml 8 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | run: 2 | timeout: 10m 3 | 4 | linters: 5 | enable: 6 | - gci 7 | 8 | issues: 9 | exclude-rules: 10 | - path: _test.go 11 | linters: 12 | - errcheck 13 | 14 | linters-settings: 15 | gci: 16 | sections: 17 | - standard 18 | - default 19 | - prefix(github.com/rhobs/observability-operator) 20 | -------------------------------------------------------------------------------- /docs/user-guides/README.md: -------------------------------------------------------------------------------- 1 | # User Guides 2 | 3 | * [Using SSA to customize Prometheus](server-side-apply.md) 4 | * [Federating OpenShift In-Cluster Prometheus](federation.md) 5 | * [User interface (UI) plugins](observability-ui-plugins.md) 6 | * [Deploying ThanosQuerier for multiple MonitoringStacks](thanos_querier.md) 7 | * [Perses dashboards](perses-dashboards.md) 8 | -------------------------------------------------------------------------------- /deploy/package-operator/operator/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: observability-operator 4 | images: 5 | - name: observability-operator 6 | newName: observability-operator 7 | newTag: 1.3.0 8 | commonAnnotations: 9 | package-operator.run/phase: operator 10 | resources: 11 | - ../../operator/ 12 | -------------------------------------------------------------------------------- /deploy/samples/multi-ns-stack.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.rhobs/v1alpha1 2 | kind: MonitoringStack 3 | metadata: 4 | name: multi-ns 5 | labels: 6 | thanos-querier: the-querier 7 | spec: 8 | logLevel: debug 9 | retention: 2h 10 | resourceSelector: 11 | matchLabels: 12 | app: demo 13 | namespaceSelector: 14 | matchLabels: 15 | monitoring.rhobs/stack: multi-ns 16 | -------------------------------------------------------------------------------- /bundle/manifests/observability-operator_rbac.authorization.k8s.io_v1_rolebinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | creationTimestamp: null 5 | name: observability-operator 6 | roleRef: 7 | apiGroup: rbac.authorization.k8s.io 8 | kind: Role 9 | name: observability-operator 10 | subjects: 11 | - kind: ServiceAccount 12 | name: observability-operator-sa 13 | namespace: operators 14 | -------------------------------------------------------------------------------- /hack/olm/k8s/subscription.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: operators.coreos.com/v1alpha1 3 | kind: Subscription 4 | metadata: 5 | labels: 6 | operators.coreos.com/observability-operator.openshift-operators: "" 7 | name: observability-operator 8 | namespace: operators 9 | spec: 10 | channel: development 11 | installPlanApproval: Automatic 12 | name: observability-operator 13 | source: observability-operator 14 | sourceNamespace: operators 15 | -------------------------------------------------------------------------------- /deploy/perses/perses-operator-cluster-role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | labels: 5 | app.kubernetes.io/component: controller 6 | app.kubernetes.io/name: perses-operator 7 | name: perses-operator 8 | roleRef: 9 | apiGroup: rbac.authorization.k8s.io 10 | kind: ClusterRole 11 | name: perses-operator 12 | subjects: 13 | - kind: ServiceAccount 14 | name: perses-operator 15 | -------------------------------------------------------------------------------- /hack/olm/subscription.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: operators.coreos.com/v1alpha1 3 | kind: Subscription 4 | metadata: 5 | labels: 6 | operators.coreos.com/observability-operator.openshift-operators: "" 7 | name: observability-operator 8 | namespace: openshift-operators 9 | spec: 10 | channel: development 11 | installPlanApproval: Automatic 12 | name: observability-operator 13 | source: observability-operator 14 | sourceNamespace: openshift-marketplace 15 | -------------------------------------------------------------------------------- /deploy/perses/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | resources: 5 | - crds 6 | - perses-operator-deployment.yaml 7 | - perses-operator-cluster-role.yaml 8 | - perses-operator-service-account.yaml 9 | - perses-operator-cluster-role-binding.yaml 10 | - persesdashboard_editor_role.yaml 11 | - persesdashboard_viewer_role.yaml 12 | - persesdatasource_editor_role.yaml 13 | - persesdatasource_viewer_role.yaml 14 | - perses-service-account.yaml 15 | -------------------------------------------------------------------------------- /hack/kind/operator-metrics-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: operator-metrics-service 5 | labels: 6 | app: operator-metrics 7 | spec: 8 | # NOTE: 30001 must be exposed: see hack/kind/config.yaml 9 | type: NodePort 10 | ports: 11 | - protocol: TCP 12 | port: 30001 13 | targetPort: 8080 14 | nodePort: 30001 15 | selector: 16 | app.kubernetes.io/component: operator 17 | app.kubernetes.io/name: observability-operator 18 | -------------------------------------------------------------------------------- /deploy/scorecard/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - bases/config.yaml 5 | patches: 6 | - path: patches/basic.config.yaml 7 | target: 8 | group: scorecard.operatorframework.io 9 | kind: Configuration 10 | name: config 11 | version: v1alpha3 12 | - path: patches/olm.config.yaml 13 | target: 14 | group: scorecard.operatorframework.io 15 | kind: Configuration 16 | name: config 17 | version: v1alpha3 18 | -------------------------------------------------------------------------------- /deploy/crds/kubernetes/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - ../common 5 | - monitoring.coreos.com_alertmanagerconfigs.yaml 6 | - monitoring.coreos.com_alertmanagers.yaml 7 | - monitoring.coreos.com_podmonitors.yaml 8 | - monitoring.coreos.com_probes.yaml 9 | - monitoring.coreos.com_prometheuses.yaml 10 | - monitoring.coreos.com_prometheusrules.yaml 11 | - monitoring.coreos.com_servicemonitors.yaml 12 | - monitoring.coreos.com_thanosrulers.yaml 13 | -------------------------------------------------------------------------------- /docs/user-guides/thanos_querier/install/03-thanosquerier.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: monitoring.rhobs/v1alpha1 3 | kind: ThanosQuerier 4 | metadata: 5 | name: metrics-api 6 | namespace: project-d 7 | labels: 8 | app.kubernetes.io/name: metrics-api 9 | app.kubernetes.io/part-of: monitoring 10 | spec: 11 | selector: 12 | matchLabels: 13 | app.kubernetes.io/part-of: monitoring 14 | namespaceSelector: 15 | matchNames: 16 | - project-a 17 | - project-b 18 | - project-c 19 | -------------------------------------------------------------------------------- /OWNERS_ALIASES: -------------------------------------------------------------------------------- 1 | aliases: 2 | ui: 3 | - jgbernalp 4 | - peteryurkovich 5 | - zhuje 6 | mon: 7 | - jan--f 8 | - simonpasquier 9 | - slashpai 10 | - machine424 11 | - rexagod 12 | - marioferh 13 | korrel8r: 14 | - alanconway 15 | cluster-obs: 16 | - pavolloffay 17 | cluster-health: 18 | - tremes 19 | maintainers: 20 | - jan--f 21 | - danielmellado 22 | - simonpasquier 23 | - slashpai 24 | - marioferh 25 | - jgbernalp 26 | - lihongyan1 27 | -------------------------------------------------------------------------------- /deploy/samples/observability-installer.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: observability.openshift.io/v1alpha1 2 | kind: ObservabilityInstaller 3 | metadata: 4 | name: sample-observability 5 | namespace: observability 6 | spec: 7 | capabilities: 8 | tracing: 9 | enabled: true 10 | storage: 11 | objectStorage: 12 | s3: 13 | bucket: tempo 14 | endpoint: http://minio.minio.svc:9000 15 | accessKeyID: tempo 16 | accessKeySecret: 17 | name: minio-secret 18 | key: access_key_secret -------------------------------------------------------------------------------- /deploy/dependencies/admission-webhook/cluster-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | labels: 5 | app.kubernetes.io/component: admission-webhook 6 | app.kubernetes.io/name: prometheus-operator-admission-webhook 7 | app.kubernetes.io/version: 0.61.1-rhobs1 8 | name: prometheus-operator-admission-webhook 9 | rules: 10 | - apiGroups: 11 | - security.openshift.io 12 | resourceNames: 13 | - nonroot-v2 14 | - nonroot 15 | resources: 16 | - securitycontextconstraints 17 | verbs: 18 | - use 19 | -------------------------------------------------------------------------------- /commitlint.config.mjs: -------------------------------------------------------------------------------- 1 | const Configuration = { 2 | /* 3 | * Resolve and load @commitlint/config-conventional from node_modules. 4 | */ 5 | extends: ['@commitlint/config-conventional'], 6 | /* 7 | * Ignore dependabot commit messages until https://github.com/dependabot/dependabot-core/issues/2445 is fixed. 8 | */ 9 | ignores: [(msg) => /Signed-off-by: dependabot\[bot]/m.test(msg)], 10 | /* 11 | * Ignore konflux commit messages 12 | */ 13 | ignores: [(msg) => /Signed-off-by: red-hat-konflux/m.test(msg)], 14 | }; 15 | 16 | export default Configuration 17 | -------------------------------------------------------------------------------- /deploy/package-operator/package/manifest.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: manifests.package-operator.run/v1alpha1 2 | kind: PackageManifest 3 | metadata: 4 | name: observability-operator 5 | spec: 6 | scopes: 7 | - Cluster 8 | phases: 9 | - name: crds 10 | - name: dependencies 11 | - name: operator 12 | availabilityProbes: 13 | - probes: 14 | - condition: 15 | type: Available 16 | status: "True" 17 | selector: 18 | kind: 19 | group: apps 20 | kind: Deployment 21 | selector: 22 | app.kubernetes.io/name: observability-operator 23 | -------------------------------------------------------------------------------- /deploy/operator/observability-operator-service.yaml: -------------------------------------------------------------------------------- 1 | kind: Service 2 | apiVersion: v1 3 | metadata: 4 | name: observability-operator 5 | labels: 6 | app.kubernetes.io/component: operator 7 | app.kubernetes.io/name: observability-operator 8 | app.kubernetes.io/part-of: observability-operator 9 | annotations: 10 | service.beta.openshift.io/serving-cert-secret-name: observability-operator-tls 11 | spec: 12 | selector: 13 | app.kubernetes.io/name: observability-operator 14 | app.kubernetes.io/component: operator 15 | ports: 16 | - name: metrics 17 | port: 8080 18 | -------------------------------------------------------------------------------- /bundle/manifests/obo-prometheus-operator-admission-webhook_policy_v1_poddisruptionbudget.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: policy/v1 2 | kind: PodDisruptionBudget 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: prometheus-operator-admission-webhook 6 | app.kubernetes.io/part-of: observability-operator 7 | app.kubernetes.io/version: 0.87.0-rhobs1 8 | name: obo-prometheus-operator-admission-webhook 9 | spec: 10 | minAvailable: 1 11 | selector: 12 | matchLabels: 13 | app.kubernetes.io/name: prometheus-operator-admission-webhook 14 | app.kubernetes.io/part-of: observability-operator 15 | -------------------------------------------------------------------------------- /.versionrc: -------------------------------------------------------------------------------- 1 | { 2 | "commitUrlFormat": "https://github.com/rhobs/monitoring-stack-operator/commit/{{hash}}", 3 | "compareUrlFormat": "https://github.com/rhobs/monitoring-stack-operator/commit/{{hash}}", 4 | "issueUrlFormat": "https://github.com/rhobs/monitoring-stack-operator/issues/{{id}}", 5 | "bumpFiles": [ 6 | { 7 | "filename": "VERSION", 8 | "type": "plain-text" 9 | } 10 | ], 11 | "scripts": { 12 | "postbump": "make bundle && git add bundle deploy/package-operator/operator/kustomization.yaml deploy/olm/kustomization.yaml deploy/operator/kustomization.yaml" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /docs/user-guides/thanos_querier/console/01-datasource.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: perses.dev/v1alpha1 2 | kind: PersesDatasource 3 | metadata: 4 | name: metrics-api 5 | namespace: project-d 6 | labels: 7 | app.kubernetes.io/name: datasource 8 | app.kubernetes.io/part-of: monitoring 9 | spec: 10 | config: 11 | display: 12 | name: "Thanos Querier (myapp)" 13 | default: true 14 | plugin: 15 | kind: PrometheusDatasource 16 | spec: 17 | proxy: 18 | kind: HTTPProxy 19 | spec: 20 | url: "http://thanos-querier-metrics-api.project-d.svc:10902" 21 | -------------------------------------------------------------------------------- /test/e2e/traces_telemetrygen.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: generate-traces-grpc 5 | namespace: tracing-observability 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: telemetrygen 11 | image: ghcr.io/open-telemetry/opentelemetry-collector-contrib/telemetrygen:v0.129.0 12 | args: 13 | - traces 14 | - --otlp-endpoint=coo-collector.tracing-observability.svc.cluster.local:4317 15 | - --service=grpc 16 | - --otlp-insecure 17 | - --traces=10 18 | restartPolicy: Never -------------------------------------------------------------------------------- /hack/olm/k8s/catalog-src.yaml: -------------------------------------------------------------------------------- 1 | # Apply this to add a new catalog to OLM 2 | --- 3 | apiVersion: operators.coreos.com/v1alpha1 4 | kind: CatalogSource 5 | metadata: 6 | annotations: 7 | name: observability-operator 8 | namespace: operators 9 | spec: 10 | displayName: Observability Operator - Test 11 | icon: 12 | base64data: "" 13 | mediatype: "" 14 | image: quay.io/rhobs/observability-operator-catalog:latest 15 | publisher: Sunil Thaha 16 | sourceType: grpc 17 | grpcPodConfig: 18 | securityContextConfig: restricted 19 | updateStrategy: 20 | registryPoll: 21 | interval: 1m0s 22 | -------------------------------------------------------------------------------- /.github/workflows/package-operator-stable.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Publish package-operator release 3 | 4 | on: 5 | release: 6 | types: 7 | - released 8 | 9 | jobs: 10 | release: 11 | runs-on: ubuntu-latest 12 | environment: quay 13 | steps: 14 | - uses: actions/checkout@v6 15 | with: 16 | fetch-depth: 0 17 | 18 | - name: publish 19 | uses: ./.github/package-operator-publish 20 | env: 21 | IMG_BASE: ${{ vars.IMG_BASE }} 22 | with: 23 | quay_login: ${{ secrets.QUAY_LOGIN }} 24 | quay_token: ${{ secrets.QUAY_TOKEN }} 25 | -------------------------------------------------------------------------------- /hack/boilerplate.go.txt: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ -------------------------------------------------------------------------------- /hack/olm/catalog-src.yaml: -------------------------------------------------------------------------------- 1 | # Apply this to add a new catalog to OLM 2 | --- 3 | apiVersion: operators.coreos.com/v1alpha1 4 | kind: CatalogSource 5 | metadata: 6 | annotations: 7 | name: observability-operator 8 | namespace: openshift-marketplace 9 | spec: 10 | displayName: Observability Operator - Test 11 | icon: 12 | base64data: "" 13 | mediatype: "" 14 | image: quay.io/rhobs/observability-operator-catalog:latest 15 | publisher: Sunil Thaha 16 | sourceType: grpc 17 | grpcPodConfig: 18 | securityContextConfig: restricted 19 | updateStrategy: 20 | registryPoll: 21 | interval: 1m0s 22 | -------------------------------------------------------------------------------- /deploy/dependencies/admission-webhook/cluster-role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | labels: 5 | app.kubernetes.io/component: admission-webhook 6 | app.kubernetes.io/name: prometheus-operator-admission-webhook 7 | app.kubernetes.io/version: 0.61.1-rhobs1 8 | name: prometheus-operator-admission-webhook 9 | roleRef: 10 | apiGroup: rbac.authorization.k8s.io 11 | kind: ClusterRole 12 | name: prometheus-operator-admission-webhook 13 | subjects: 14 | - kind: ServiceAccount 15 | name: prometheus-operator-admission-webhook 16 | namespace: default 17 | -------------------------------------------------------------------------------- /docs/user-guides/federation/manifests/11-crb.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: federate-cmo-ms-view 5 | labels: 6 | kubernetes.io/part-of: federate-cmo-ms 7 | monitoring.rhobs/stack: federate-cmo-ms 8 | 9 | roleRef: 10 | apiGroup: rbac.authorization.k8s.io 11 | kind: ClusterRole 12 | name: cluster-monitoring-view 13 | subjects: 14 | - kind: ServiceAccount 15 | # 👇 ServiceAccount used in the prometheus deployed by ObO. 16 | # SA name follows -prometheus nomenclature 17 | name: federate-cmo-ms-prometheus 18 | namespace: federate-cmo 19 | -------------------------------------------------------------------------------- /bundle/manifests/obo-prometheus-operator-admission-webhook_v1_service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | creationTimestamp: null 5 | labels: 6 | app.kubernetes.io/name: prometheus-operator-admission-webhook 7 | app.kubernetes.io/part-of: observability-operator 8 | app.kubernetes.io/version: 0.87.0-rhobs1 9 | name: obo-prometheus-operator-admission-webhook 10 | spec: 11 | ports: 12 | - name: https 13 | port: 443 14 | targetPort: https 15 | selector: 16 | app.kubernetes.io/name: prometheus-operator-admission-webhook 17 | app.kubernetes.io/part-of: observability-operator 18 | status: 19 | loadBalancer: {} 20 | -------------------------------------------------------------------------------- /bundle/manifests/observability-operator_v1_service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | annotations: 5 | service.beta.openshift.io/serving-cert-secret-name: observability-operator-tls 6 | creationTimestamp: null 7 | labels: 8 | app.kubernetes.io/component: operator 9 | app.kubernetes.io/name: observability-operator 10 | app.kubernetes.io/part-of: observability-operator 11 | name: observability-operator 12 | spec: 13 | ports: 14 | - name: metrics 15 | port: 8080 16 | targetPort: 0 17 | selector: 18 | app.kubernetes.io/component: operator 19 | app.kubernetes.io/name: observability-operator 20 | status: 21 | loadBalancer: {} 22 | -------------------------------------------------------------------------------- /.github/workflows/olm-stable.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: OLM Stable Channel 3 | 4 | on: 5 | release: 6 | types: 7 | - released 8 | 9 | jobs: 10 | release: 11 | runs-on: ubuntu-latest 12 | environment: quay 13 | steps: 14 | - uses: actions/checkout@v6 15 | with: 16 | fetch-depth: 0 17 | 18 | - name: publish 19 | uses: ./.github/olm-publish 20 | env: 21 | IMG_BASE: ${{ vars.IMG_BASE }} 22 | CHANNELS: stable,candidate,development 23 | DEFAULT_CHANNEL: stable 24 | RELEASE_SHA: ${GITHUB_SHA} 25 | with: 26 | quay_login: ${{ secrets.QUAY_LOGIN }} 27 | quay_token: ${{ secrets.QUAY_TOKEN }} 28 | -------------------------------------------------------------------------------- /bundle/manifests/obo-prometheus-operator_v1_service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | creationTimestamp: null 5 | labels: 6 | app.kubernetes.io/component: controller 7 | app.kubernetes.io/name: prometheus-operator 8 | app.kubernetes.io/part-of: observability-operator 9 | app.kubernetes.io/version: 0.87.0-rhobs1 10 | name: obo-prometheus-operator 11 | spec: 12 | clusterIP: None 13 | ports: 14 | - name: http 15 | port: 8080 16 | targetPort: http 17 | selector: 18 | app.kubernetes.io/component: controller 19 | app.kubernetes.io/name: prometheus-operator 20 | app.kubernetes.io/part-of: observability-operator 21 | status: 22 | loadBalancer: {} 23 | -------------------------------------------------------------------------------- /docs/user-guides/perses-dashboards/dashboard/01-datasource.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: perses.dev/v1alpha1 2 | kind: PersesDatasource 3 | metadata: 4 | name: openshift-monitoring 5 | namespace: perses-example 6 | spec: 7 | config: 8 | display: 9 | name: "OpenShift Monitoring Datasource" 10 | default: true 11 | plugin: 12 | kind: "PrometheusDatasource" 13 | spec: 14 | proxy: 15 | kind: HTTPProxy 16 | spec: 17 | url: https://thanos-querier.openshift-monitoring.svc.cluster.local:9091 18 | secret: openshift-monitoring-secret 19 | client: 20 | tls: 21 | enable: true 22 | caCert: 23 | type: file 24 | certPath: /ca/service-ca.crt 25 | -------------------------------------------------------------------------------- /pkg/controllers/util/common.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "sigs.k8s.io/controller-runtime/pkg/client" 5 | ) 6 | 7 | const ( 8 | ResourceLabel = "app.kubernetes.io/managed-by" 9 | OpName = "observability-operator" 10 | ) 11 | 12 | func AddCommonLabels(obj client.Object, name string) client.Object { 13 | labels := obj.GetLabels() 14 | want := map[string]string{ 15 | "app.kubernetes.io/part-of": name, 16 | "app.kubernetes.io/name": obj.GetName(), 17 | ResourceLabel: OpName, 18 | } 19 | if labels == nil { 20 | obj.SetLabels(want) 21 | return obj 22 | } 23 | for name, val := range want { 24 | if _, ok := labels[name]; !ok { 25 | labels[name] = val 26 | } 27 | } 28 | return obj 29 | } 30 | -------------------------------------------------------------------------------- /deploy/package-operator/operator-kubeconfig/patches/kubeconfig-volume.yml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | app.kubernetes.io/part-of: observability-operator 6 | name: observability-operator 7 | spec: 8 | template: 9 | spec: 10 | volumes: 11 | - name: kubeconfig 12 | secret: 13 | defaultMode: 400 14 | secretName: admin-kubeconfig 15 | containers: 16 | - name: operator 17 | volumeMounts: 18 | - mountPath: /etc/openshift/kubeconfig 19 | name: kubeconfig 20 | readOnly: true 21 | env: 22 | - name: KUBECONFIG 23 | value: /etc/openshift/kubeconfig/kubeconfig 24 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "gomod" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "daily" 12 | - package-ecosystem: "github-actions" # See documentation for possible values 13 | directory: "/" # Location of package manifests 14 | schedule: 15 | interval: "weekly" 16 | -------------------------------------------------------------------------------- /.github/tools-cache/action.yaml: -------------------------------------------------------------------------------- 1 | name: tools-cache 2 | description: Caches development tools 3 | runs: 4 | using: composite 5 | steps: 6 | - uses: actions/cache@v4 7 | id: tools-cache 8 | with: 9 | path: ./tmp/bin 10 | key: ${{ runner.os }}-tools-${{ hashFiles('.github/tools') }} 11 | 12 | - name: Install Dependencies 13 | if: steps.tools-cache.outputs.cache-hit != 'true' 14 | shell: bash 15 | run: make tools 16 | 17 | - name: Show versions of the installed tools 18 | shell: bash 19 | run: | 20 | ls ./tmp/bin 21 | make validate-tools 22 | 23 | - name: Add tmp/bin to PATH 24 | shell: bash 25 | run: | 26 | echo "tmp/bin" >> "$GITHUB_PATH" 27 | -------------------------------------------------------------------------------- /docs/design/release.md: -------------------------------------------------------------------------------- 1 | # Release Worflow 2 | 3 | ## Important pointers 4 | 5 | * `olm-catalog` branch is the "release" branch 6 | * Due to the nature of File Based Catalogs, the catalog's [index](https://github.com/rhobs/observability-operator/blob/olm-catalog/olm/observability-operator-index/index.yaml) file 7 | is stored under [`olm-catalog` branch](https://github.com/rhobs/observability-operator/tree/olm-catalog) 8 | * `update-channels.sh` script is kept under the [olm directory](https://github.com/rhobs/observability-operator/blob/olm-catalog/olm/update-channels.sh) 9 | 10 | ## Release Workflow 11 | 12 | ![Release Workflow](./assets/release.png) 13 | 14 | NOTE: the source for the UML can be found under [assets directory](./assets/release.uml) 15 | -------------------------------------------------------------------------------- /deploy/operator/observability-operator-cluster-role-binding.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRoleBinding 4 | metadata: 5 | name: observability-operator 6 | roleRef: 7 | apiGroup: rbac.authorization.k8s.io 8 | kind: ClusterRole 9 | name: observability-operator 10 | subjects: 11 | - kind: ServiceAccount 12 | name: observability-operator-sa 13 | namespace: default 14 | --- 15 | apiVersion: rbac.authorization.k8s.io/v1 16 | kind: RoleBinding 17 | metadata: 18 | name: observability-operator 19 | roleRef: 20 | apiGroup: rbac.authorization.k8s.io 21 | kind: Role 22 | name: observability-operator 23 | subjects: 24 | - kind: ServiceAccount 25 | name: observability-operator-sa 26 | namespace: default 27 | -------------------------------------------------------------------------------- /deploy/package-operator/dependencies-kubeconfig/patches/kubeconfig-volume.yml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | app.kubernetes.io/part-of: observability-operator 6 | name: prometheus-operator 7 | spec: 8 | template: 9 | spec: 10 | volumes: 11 | - name: kubeconfig 12 | secret: 13 | defaultMode: 400 14 | secretName: admin-kubeconfig 15 | containers: 16 | - name: prometheus-operator 17 | volumeMounts: 18 | - mountPath: /etc/openshift/kubeconfig 19 | name: kubeconfig 20 | readOnly: true 21 | env: 22 | - name: KUBECONFIG 23 | value: /etc/openshift/kubeconfig/kubeconfig 24 | -------------------------------------------------------------------------------- /bundle/manifests/persesdashboard-viewer-role_rbac.authorization.k8s.io_v1_clusterrole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | creationTimestamp: null 5 | labels: 6 | app.kubernetes.io/component: rbac 7 | app.kubernetes.io/created-by: perses-operator 8 | app.kubernetes.io/instance: persesdashboard-viewer-role 9 | app.kubernetes.io/name: clusterrole 10 | app.kubernetes.io/part-of: perses-operator 11 | name: persesdashboard-viewer-role 12 | rules: 13 | - apiGroups: 14 | - perses.dev 15 | resources: 16 | - persesdashboards 17 | verbs: 18 | - get 19 | - list 20 | - watch 21 | - apiGroups: 22 | - perses.dev 23 | resources: 24 | - persesdashboards/status 25 | verbs: 26 | - get 27 | -------------------------------------------------------------------------------- /bundle/manifests/persesdatasource-viewer-role_rbac.authorization.k8s.io_v1_clusterrole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | creationTimestamp: null 5 | labels: 6 | app.kubernetes.io/component: rbac 7 | app.kubernetes.io/created-by: perses-operator 8 | app.kubernetes.io/instance: persesdatasource-viewer-role 9 | app.kubernetes.io/name: clusterrole 10 | app.kubernetes.io/part-of: perses-operator 11 | name: persesdatasource-viewer-role 12 | rules: 13 | - apiGroups: 14 | - perses.dev 15 | resources: 16 | - persesdatasources 17 | verbs: 18 | - get 19 | - list 20 | - watch 21 | - apiGroups: 22 | - perses.dev 23 | resources: 24 | - persesdatasources/status 25 | verbs: 26 | - get 27 | -------------------------------------------------------------------------------- /deploy/operator/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | resources: 4 | - observability-operator-deployment.yaml 5 | - observability-operator-service-account.yaml 6 | - observability-operator-cluster-role.yaml 7 | - observability-operator-cluster-role-binding.yaml 8 | - observability-operator-service.yaml 9 | 10 | images: 11 | - name: observability-operator 12 | newName: observability-operator 13 | newTag: 0.0.29 14 | namespace: operators 15 | 16 | patches: 17 | - patch: |- 18 | - op: add 19 | path: /spec/template/spec/containers/0/args/- 20 | value: --images=perses=quay.io/openshift-observability-ui/perses:v0.51.1-go-1.23 21 | target: 22 | group: apps 23 | kind: Deployment 24 | version: v1 25 | -------------------------------------------------------------------------------- /deploy/perses/persesdashboard_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view persesdashboards. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: clusterrole 7 | app.kubernetes.io/instance: persesdashboard-viewer-role 8 | app.kubernetes.io/component: rbac 9 | app.kubernetes.io/created-by: perses-operator 10 | app.kubernetes.io/part-of: perses-operator 11 | name: persesdashboard-viewer-role 12 | rules: 13 | - apiGroups: 14 | - perses.dev 15 | resources: 16 | - persesdashboards 17 | verbs: 18 | - get 19 | - list 20 | - watch 21 | - apiGroups: 22 | - perses.dev 23 | resources: 24 | - persesdashboards/status 25 | verbs: 26 | - get 27 | -------------------------------------------------------------------------------- /docs/user-guides/thanos_querier/install/00-namespaces.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: project-a 6 | labels: 7 | app.kubernetes.io/name: api-service 8 | app.kubernetes.io/part-of: myapp 9 | --- 10 | apiVersion: v1 11 | kind: Namespace 12 | metadata: 13 | name: project-b 14 | labels: 15 | app.kubernetes.io/name: api-service 16 | app.kubernetes.io/part-of: myapp 17 | --- 18 | apiVersion: v1 19 | kind: Namespace 20 | metadata: 21 | name: project-c 22 | labels: 23 | app.kubernetes.io/name: backend 24 | app.kubernetes.io/part-of: myapp 25 | --- 26 | apiVersion: v1 27 | kind: Namespace 28 | metadata: 29 | name: project-d 30 | labels: 31 | app.kubernetes.io/name: query 32 | app.kubernetes.io/part-of: monitoring 33 | -------------------------------------------------------------------------------- /deploy/perses/persesdatasource_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view persesdatasources. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: clusterrole 7 | app.kubernetes.io/instance: persesdatasource-viewer-role 8 | app.kubernetes.io/component: rbac 9 | app.kubernetes.io/created-by: perses-operator 10 | app.kubernetes.io/part-of: perses-operator 11 | name: persesdatasource-viewer-role 12 | rules: 13 | - apiGroups: 14 | - perses.dev 15 | resources: 16 | - persesdatasources 17 | verbs: 18 | - get 19 | - list 20 | - watch 21 | - apiGroups: 22 | - perses.dev 23 | resources: 24 | - persesdatasources/status 25 | verbs: 26 | - get 27 | -------------------------------------------------------------------------------- /docs/user-guides/federation/manifests/10-ms.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.rhobs/v1alpha1 2 | kind: MonitoringStack 3 | metadata: 4 | name: federate-cmo-ms 5 | namespace: federate-cmo 6 | spec: 7 | # 👇 Used to select the ServiceMonitor in the federate-cmo namespace 8 | # NOTE: there isn't a need for namespaceSelector 9 | resourceSelector: 10 | matchLabels: 11 | monitoring.rhobs/stack: federate-cmo-ms 12 | 13 | logLevel: info # 👈 use debug for verbose logs 14 | retention: 3h 15 | 16 | prometheusConfig: 17 | replicas: 2 # 👈 ensures that at least one prometheus is running during upgrade 18 | 19 | alertmanagerConfig: 20 | disabled: true 21 | 22 | resources: # 👈 ensure that you provide sufficient amount of resources 23 | requests: 24 | cpu: 500m 25 | memory: 1Gi 26 | -------------------------------------------------------------------------------- /hack/kind/setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e -u -o pipefail 3 | 4 | # This script is now a wrapper around the unified setup script 5 | # It maintains backward compatibility while using the new unified approach 6 | 7 | SCRIPT_PATH=$(readlink -f "$0") 8 | declare -r SCRIPT_PATH 9 | SCRIPT_DIR=$(cd "$(dirname "$SCRIPT_PATH")" && pwd) 10 | declare -r SCRIPT_DIR 11 | declare -r PROJECT_ROOT_DIR="$SCRIPT_DIR/../../" 12 | 13 | # Print deprecation notice 14 | echo "⚠️ DEPRECATION NOTICE: hack/kind/setup.sh is deprecated" 15 | echo " Please use the new unified setup script: hack/setup-e2e-env.sh" 16 | echo " This script will forward to the new one for backward compatibility" 17 | echo "" 18 | 19 | # Forward to the new unified script with appropriate options 20 | exec "$PROJECT_ROOT_DIR/hack/setup-e2e-env.sh" "$@" 21 | -------------------------------------------------------------------------------- /bundle/manifests/persesdashboard-editor-role_rbac.authorization.k8s.io_v1_clusterrole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | creationTimestamp: null 5 | labels: 6 | app.kubernetes.io/component: rbac 7 | app.kubernetes.io/created-by: perses-operator 8 | app.kubernetes.io/instance: persesdashboard-editor-role 9 | app.kubernetes.io/name: clusterrole 10 | app.kubernetes.io/part-of: perses-operator 11 | name: persesdashboard-editor-role 12 | rules: 13 | - apiGroups: 14 | - perses.dev 15 | resources: 16 | - persesdashboards 17 | verbs: 18 | - create 19 | - delete 20 | - get 21 | - list 22 | - patch 23 | - update 24 | - watch 25 | - apiGroups: 26 | - perses.dev 27 | resources: 28 | - persesdashboards/status 29 | verbs: 30 | - get 31 | -------------------------------------------------------------------------------- /bundle/manifests/persesdatasource-editor-role_rbac.authorization.k8s.io_v1_clusterrole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | creationTimestamp: null 5 | labels: 6 | app.kubernetes.io/component: rbac 7 | app.kubernetes.io/created-by: perses-operator 8 | app.kubernetes.io/instance: persesdatasource-editor-role 9 | app.kubernetes.io/name: clusterrole 10 | app.kubernetes.io/part-of: perses-operator 11 | name: persesdatasource-editor-role 12 | rules: 13 | - apiGroups: 14 | - perses.dev 15 | resources: 16 | - persesdatasources 17 | verbs: 18 | - create 19 | - delete 20 | - get 21 | - list 22 | - patch 23 | - update 24 | - watch 25 | - apiGroups: 26 | - perses.dev 27 | resources: 28 | - persesdatasources/status 29 | verbs: 30 | - get 31 | -------------------------------------------------------------------------------- /deploy/perses/persesdashboard_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit persesdashboards. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: clusterrole 7 | app.kubernetes.io/instance: persesdashboard-editor-role 8 | app.kubernetes.io/component: rbac 9 | app.kubernetes.io/created-by: perses-operator 10 | app.kubernetes.io/part-of: perses-operator 11 | name: persesdashboard-editor-role 12 | rules: 13 | - apiGroups: 14 | - perses.dev 15 | resources: 16 | - persesdashboards 17 | verbs: 18 | - create 19 | - delete 20 | - get 21 | - list 22 | - patch 23 | - update 24 | - watch 25 | - apiGroups: 26 | - perses.dev 27 | resources: 28 | - persesdashboards/status 29 | verbs: 30 | - get 31 | -------------------------------------------------------------------------------- /deploy/perses/persesdatasource_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit persesdatasources. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: clusterrole 7 | app.kubernetes.io/instance: persesdatasource-editor-role 8 | app.kubernetes.io/component: rbac 9 | app.kubernetes.io/created-by: perses-operator 10 | app.kubernetes.io/part-of: perses-operator 11 | name: persesdatasource-editor-role 12 | rules: 13 | - apiGroups: 14 | - perses.dev 15 | resources: 16 | - persesdatasources 17 | verbs: 18 | - create 19 | - delete 20 | - get 21 | - list 22 | - patch 23 | - update 24 | - watch 25 | - apiGroups: 26 | - perses.dev 27 | resources: 28 | - persesdatasources/status 29 | verbs: 30 | - get 31 | -------------------------------------------------------------------------------- /bundle/metadata/annotations.yaml: -------------------------------------------------------------------------------- 1 | annotations: 2 | # Core bundle annotations. 3 | operators.operatorframework.io.bundle.mediatype.v1: registry+v1 4 | operators.operatorframework.io.bundle.manifests.v1: manifests/ 5 | operators.operatorframework.io.bundle.metadata.v1: metadata/ 6 | operators.operatorframework.io.bundle.package.v1: observability-operator 7 | operators.operatorframework.io.bundle.channels.v1: development 8 | operators.operatorframework.io.bundle.channel.default.v1: development 9 | operators.operatorframework.io.metrics.builder: operator-sdk-v1.41.1 10 | operators.operatorframework.io.metrics.mediatype.v1: metrics+v1 11 | operators.operatorframework.io.metrics.project_layout: unknown 12 | 13 | # Annotations for testing. 14 | operators.operatorframework.io.test.mediatype.v1: scorecard+v1 15 | operators.operatorframework.io.test.config.v1: tests/scorecard/ 16 | -------------------------------------------------------------------------------- /test/osd-e2e.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e -u -o pipefail 3 | 4 | # NOTE: this script is meant to be run inside osd-test-harness and 5 | # assumes all requried binaries are in the same directory as the script 6 | 7 | declare -r TEST_RESULT_DIR="/test-run-results" 8 | 9 | main() { 10 | 11 | set -x 12 | # skip UIPlugin related tests for the script is used to test ObO upstream release on ROSA 13 | # Upstream ObO release disabled UIPlugin by default 14 | ./e2e.test -test.v -test.skip UIPlugin 2>"$TEST_RESULT_DIR/errors.log" | 15 | tee "$TEST_RESULT_DIR/tests.log" | 16 | ./go-junit-report -set-exit-code >"$TEST_RESULT_DIR/junit-obo.xml" 17 | 18 | # HACK: create an empty json file until we know what the addon-metadata 19 | # should contain 20 | # SEE: https://github.com/openshift/osde2e-example-test-harness 21 | echo "{}" >"$TEST_RESULT_DIR/addon-metadata.json" 22 | } 23 | 24 | main "$@" 25 | -------------------------------------------------------------------------------- /must-gather/collection-scripts/common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # safeguards 4 | set -o nounset 5 | set -o errexit 6 | set -o pipefail 7 | 8 | get_first_ready_prom_pod() { 9 | local ns="$1"; shift 10 | local name="$1"; shift 11 | readarray -t READY_PROM_PODS < <( 12 | oc get pods -n "$ns" -l app.kubernetes.io/part-of="$name",app.kubernetes.io/component=prometheus --field-selector=status.phase==Running \ 13 | --no-headers -o custom-columns=":metadata.name" 14 | ) 15 | echo "${READY_PROM_PODS[0]}" 16 | } 17 | 18 | get_first_ready_alertmanager_pod() { 19 | local ns="$1"; shift 20 | local name="$1"; shift 21 | readarray -t READY_AM_PODS < <( 22 | oc get pods -n "$ns" -l app.kubernetes.io/part-of="$name",app.kubernetes.io/component=alertmanager --field-selector=status.phase==Running \ 23 | --no-headers -o custom-columns=":metadata.name" 24 | ) 25 | echo "${READY_AM_PODS[0]}" 26 | } 27 | -------------------------------------------------------------------------------- /.github/osd-test-harness-publish/action.yaml: -------------------------------------------------------------------------------- 1 | name: 'Publish OSD test harness image' 2 | description: 'Publishes the OSD test harness image' 3 | inputs: 4 | quay_login: 5 | description: "Quay login" 6 | required: true 7 | quay_token: 8 | description: "Quay token" 9 | required: true 10 | runs: 11 | using: composite 12 | steps: 13 | - name: Setup Go environment 14 | uses: actions/setup-go@v5 15 | with: 16 | go-version-file: 'go.mod' 17 | check-latest: true 18 | 19 | - name: Install tools 20 | uses: ./.github/tools-cache 21 | 22 | - name: Registry Login 23 | uses: docker/login-action@v2 24 | with: 25 | registry: quay.io 26 | username: ${{ inputs.quay_login }} 27 | password: ${{ inputs.quay_token }} 28 | 29 | - uses: actions/checkout@v3 30 | with: 31 | fetch-depth: 0 32 | 33 | - name: Build and publish test harness image 34 | shell: bash 35 | run: | 36 | make osd-e2e-test-push 37 | -------------------------------------------------------------------------------- /pkg/controllers/observability/olm_components.go: -------------------------------------------------------------------------------- 1 | package observability 2 | 3 | import ( 4 | olmv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" 5 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 6 | ) 7 | 8 | func subscription(config OperatorInstallConfig) *olmv1alpha1.Subscription { 9 | return &olmv1alpha1.Subscription{ 10 | TypeMeta: metav1.TypeMeta{ 11 | Kind: olmv1alpha1.SubscriptionKind, 12 | APIVersion: olmv1alpha1.SchemeGroupVersion.String(), 13 | }, 14 | ObjectMeta: metav1.ObjectMeta{ 15 | Name: config.PackageName, 16 | Namespace: config.Namespace, 17 | }, 18 | Spec: &olmv1alpha1.SubscriptionSpec{ 19 | CatalogSource: "redhat-operators", 20 | CatalogSourceNamespace: "openshift-marketplace", 21 | Package: config.PackageName, 22 | Channel: config.Channel, 23 | StartingCSV: config.StartingCSV, 24 | InstallPlanApproval: olmv1alpha1.ApprovalAutomatic, 25 | }, 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /docs/user-guides/thanos_querier/install/01-monitoringstacks.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: monitoring.rhobs/v1alpha1 3 | kind: MonitoringStack 4 | metadata: 5 | name: api-monitoring 6 | namespace: project-a 7 | labels: 8 | app.kubernetes.io/name: metrics 9 | app.kubernetes.io/part-of: monitoring 10 | spec: 11 | alertmanagerConfig: 12 | disabled: true 13 | prometheusConfig: 14 | replicas: 2 15 | resourceSelector: {} 16 | namespaceSelector: 17 | matchLabels: 18 | app.kubernetes.io/name: api-service 19 | app.kubernetes.io/part-of: myapp 20 | --- 21 | apiVersion: monitoring.rhobs/v1alpha1 22 | kind: MonitoringStack 23 | metadata: 24 | name: backend-monitoring 25 | namespace: project-c 26 | labels: 27 | app.kubernetes.io/name: metrics 28 | app.kubernetes.io/part-of: monitoring 29 | spec: 30 | alertmanagerConfig: 31 | disabled: true 32 | prometheusConfig: 33 | replicas: 2 34 | resourceSelector: 35 | matchLabels: 36 | app.kubernetes.io/name: backend 37 | -------------------------------------------------------------------------------- /bundle.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM scratch 2 | 3 | # Core bundle labels. 4 | LABEL operators.operatorframework.io.bundle.mediatype.v1=registry+v1 5 | LABEL operators.operatorframework.io.bundle.manifests.v1=manifests/ 6 | LABEL operators.operatorframework.io.bundle.metadata.v1=metadata/ 7 | LABEL operators.operatorframework.io.bundle.package.v1=observability-operator 8 | LABEL operators.operatorframework.io.bundle.channels.v1=development 9 | LABEL operators.operatorframework.io.bundle.channel.default.v1=development 10 | LABEL operators.operatorframework.io.metrics.builder=operator-sdk-v1.41.1 11 | LABEL operators.operatorframework.io.metrics.mediatype.v1=metrics+v1 12 | LABEL operators.operatorframework.io.metrics.project_layout=unknown 13 | 14 | # Labels for testing. 15 | LABEL operators.operatorframework.io.test.mediatype.v1=scorecard+v1 16 | LABEL operators.operatorframework.io.test.config.v1=tests/scorecard/ 17 | 18 | # Copy files to locations specified by labels. 19 | COPY bundle/manifests /manifests/ 20 | COPY bundle/metadata /metadata/ 21 | COPY bundle/tests/scorecard /tests/scorecard/ 22 | -------------------------------------------------------------------------------- /test/Dockerfile: -------------------------------------------------------------------------------- 1 | ### Builder ### 2 | FROM golang:1.24 as builder 3 | 4 | ENV PKG=/workspace 5 | WORKDIR ${PKG} 6 | 7 | COPY go.mod go.mod 8 | COPY go.sum go.sum 9 | COPY pkg/apis/ pkg/apis/ 10 | # 11 | # cache deps before building and copying source so that we don't need to re-download as much 12 | # and so that source changes don't invalidate our downloaded layer 13 | RUN go mod download 14 | 15 | # install go-junit-report binary that will then be copied to the runner image 16 | RUN GOBIN=/workspace/bin go install github.com/jstemmer/go-junit-report/v2@latest 17 | 18 | # Copy the go source 19 | COPY pkg/ pkg/ 20 | COPY test/ test/ 21 | 22 | # compile test into e2e.test binary 23 | RUN go test -c -tags netgo,osusergo ./test/e2e/ 24 | 25 | ### Runner ### 26 | 27 | FROM registry.access.redhat.com/ubi8/ubi-minimal:latest 28 | WORKDIR /workspace 29 | 30 | COPY --from=builder /workspace/e2e.test . 31 | COPY --from=builder /workspace/test/osd-e2e.sh . 32 | COPY --from=builder /workspace/bin/go-junit-report . 33 | 34 | RUN mkdir -p /test-run-results 35 | ENTRYPOINT [ "/workspace/osd-e2e.sh" ] 36 | -------------------------------------------------------------------------------- /deploy/olm/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | resources: 5 | - ../crds/common 6 | - ../perses 7 | - ../dependencies 8 | - ../monitoring 9 | - ../operator 10 | - ../scorecard 11 | - ../samples 12 | 13 | images: 14 | - name: observability-operator 15 | newName: observability-operator 16 | newTag: 1.3.0 17 | 18 | patches: 19 | - patch: |- 20 | apiVersion: apps/v1 21 | kind: Deployment 22 | metadata: 23 | name: NOT-USED-BECAUSE-TARGET-IS-SPECIFIED 24 | spec: 25 | template: 26 | spec: 27 | affinity: 28 | nodeAffinity: 29 | preferredDuringSchedulingIgnoredDuringExecution: 30 | - preference: 31 | matchExpressions: 32 | - key: node-role.kubernetes.io/infra 33 | operator: Exists 34 | weight: 1 35 | tolerations: 36 | - effect: NoSchedule 37 | key: node-role.kubernetes.io/infra 38 | operator: Exists 39 | target: 40 | group: apps 41 | kind: Deployment 42 | version: v1 43 | -------------------------------------------------------------------------------- /deploy/dependencies/admission-webhook/prometheus-rule-validating-webhook.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: admissionregistration.k8s.io/v1 2 | kind: ValidatingWebhookConfiguration 3 | metadata: 4 | labels: 5 | app.kubernetes.io/component: admission-webhook 6 | app.kubernetes.io/name: prometheus-operator-admission-webhook 7 | name: prometheusrules.monitoring.rhobs 8 | webhooks: 9 | - admissionReviewVersions: 10 | - v1 11 | clientConfig: 12 | # NOTE: the caBundle get automatically injected by OLM 13 | caBundle: Cg== 14 | service: 15 | # NOTE: when changing the service, ensure the same changes are applied 16 | # to alertmanager-config-validating-webhook as well 17 | name: obo-prometheus-operator-admission-webhook 18 | namespace: operators 19 | path: /admission-prometheusrules/validate 20 | failurePolicy: Ignore 21 | name: prometheusrules.monitoring.rhobs 22 | rules: 23 | - apiGroups: 24 | - monitoring.rhobs 25 | apiVersions: 26 | - '*' 27 | operations: 28 | - CREATE 29 | - UPDATE 30 | resources: 31 | - prometheusrules 32 | scope: Namespaced 33 | sideEffects: None 34 | timeoutSeconds: 5 35 | -------------------------------------------------------------------------------- /deploy/dependencies/admission-webhook/alertmanager-config-validating-webhook.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: admissionregistration.k8s.io/v1 2 | kind: ValidatingWebhookConfiguration 3 | metadata: 4 | labels: 5 | app.kubernetes.io/component: admission-webhook 6 | app.kubernetes.io/name: prometheus-operator-admission-webhook 7 | name: alertmanagerconfigs.monitoring.rhobs 8 | webhooks: 9 | - admissionReviewVersions: 10 | - v1 11 | clientConfig: 12 | # NOTE: the caBundle get automatically injected by OLM 13 | caBundle: Cg== 14 | service: 15 | # NOTE: when changing the service, ensure the same changes are applied 16 | # to prometheus-rule-validating-webhook 17 | name: obo-prometheus-operator-admission-webhook 18 | namespace: operators 19 | path: /admission-alertmanagerconfigs/validate 20 | name: alertmanagerconfigs.monitoring.rhobs 21 | failurePolicy: Ignore 22 | rules: 23 | - apiGroups: 24 | - monitoring.rhobs 25 | apiVersions: 26 | - '*' 27 | operations: 28 | - CREATE 29 | - UPDATE 30 | resources: 31 | - alertmanagerconfigs 32 | scope: Namespaced 33 | sideEffects: None 34 | timeoutSeconds: 5 35 | -------------------------------------------------------------------------------- /deploy/monitoring/observability-operator-rules.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: monitoring.coreos.com/v1 3 | kind: PrometheusRule 4 | metadata: 5 | labels: 6 | app.kubernetes.io/component: operator 7 | app.kubernetes.io/name: observability-operator 8 | app.kubernetes.io/part-of: observability-operator 9 | openshift.io/user-monitoring: "false" 10 | name: observability-operator 11 | spec: 12 | groups: 13 | - name: operator 14 | rules: 15 | - alert: ClusterObservabilityOperatorReconciliationsFailed 16 | annotations: 17 | description: |- 18 | {{$value | humanize}}% of reconciliation requests are failing for the '{{ $labels.controller}}' controller. 19 | 20 | Check the logs of the {{$labels.namespace}}/{{$labels.pod}} pod to investigate further. 21 | summary: Cluster observability operator fails to reconcile resources 22 | expr: |- 23 | sum by(controller,pod,namespace) (rate(controller_runtime_reconcile_total{result="error",job="observability-operator"}[5m])) 24 | / 25 | sum by(controller,pod,namespace) (rate(controller_runtime_reconcile_total{job="observability-operator"}[5m])) > 0.1 26 | for: 15m 27 | labels: 28 | severity: warning 29 | -------------------------------------------------------------------------------- /hack/kind/registry.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | creationTimestamp: null 6 | labels: 7 | app: local-registry 8 | name: local-registry 9 | spec: 10 | replicas: 1 11 | selector: 12 | matchLabels: 13 | app: local-registry 14 | template: 15 | metadata: 16 | labels: 17 | app: local-registry 18 | spec: 19 | containers: 20 | - image: registry:2 21 | name: registry 22 | --- 23 | apiVersion: v1 24 | kind: Service 25 | metadata: 26 | name: local-registry 27 | labels: 28 | app: local-registry 29 | spec: 30 | # Use a fixed IP address so that we can use it in config.yaml to 31 | # patch the registry address and force an http protocol 32 | clusterIP: 10.96.223.192 33 | ports: 34 | - port: 30000 35 | protocol: TCP 36 | targetPort: 5000 37 | selector: 38 | app: local-registry 39 | --- 40 | apiVersion: v1 41 | kind: Service 42 | metadata: 43 | name: local-registry-node-port 44 | labels: 45 | app: local-registry 46 | spec: 47 | type: NodePort 48 | ports: 49 | - protocol: TCP 50 | port: 30000 51 | targetPort: 5000 52 | nodePort: 30000 53 | selector: 54 | app: local-registry 55 | -------------------------------------------------------------------------------- /bundle/manifests/observability-operator_monitoring.coreos.com_v1_prometheusrule.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | app.kubernetes.io/component: operator 6 | app.kubernetes.io/name: observability-operator 7 | app.kubernetes.io/part-of: observability-operator 8 | openshift.io/user-monitoring: "false" 9 | name: observability-operator 10 | spec: 11 | groups: 12 | - name: operator 13 | rules: 14 | - alert: ClusterObservabilityOperatorReconciliationsFailed 15 | annotations: 16 | description: |- 17 | {{$value | humanize}}% of reconciliation requests are failing for the '{{ $labels.controller}}' controller. 18 | 19 | Check the logs of the {{$labels.namespace}}/{{$labels.pod}} pod to investigate further. 20 | summary: Cluster observability operator fails to reconcile resources 21 | expr: |- 22 | sum by(controller,pod,namespace) (rate(controller_runtime_reconcile_total{result="error",job="observability-operator"}[5m])) 23 | / 24 | sum by(controller,pod,namespace) (rate(controller_runtime_reconcile_total{job="observability-operator"}[5m])) > 0.1 25 | for: 15m 26 | labels: 27 | severity: warning 28 | -------------------------------------------------------------------------------- /.github/package-operator-publish/action.yaml: -------------------------------------------------------------------------------- 1 | name: 'Publish package-operator package' 2 | description: 'Publishes the operator as a package-operator package' 3 | inputs: 4 | quay_login: 5 | description: "Quay login" 6 | required: true 7 | quay_token: 8 | description: "Quay token" 9 | required: true 10 | runs: 11 | using: composite 12 | steps: 13 | - uses: actions/checkout@v3 14 | with: 15 | fetch-depth: 0 16 | 17 | - name: Setup Go environment 18 | uses: actions/setup-go@v5 19 | with: 20 | go-version-file: 'go.mod' 21 | check-latest: true 22 | 23 | - name: Install tools 24 | uses: ./.github/tools-cache 25 | 26 | - name: Registry Login 27 | uses: docker/login-action@v2 28 | with: 29 | registry: quay.io 30 | username: ${{ inputs.quay_login }} 31 | password: ${{ inputs.quay_token }} 32 | 33 | - name: Generate Package image 34 | shell: bash 35 | run: make package 36 | 37 | - name: Publish Package 38 | shell: bash 39 | run: make package-push 40 | 41 | - name: Generate Package image with kubeconfig option 42 | shell: bash 43 | run: make package-kubeconfig 44 | 45 | - name: Publish Package with kubeconfig option 46 | shell: bash 47 | run: make package-push-kubeconfig 48 | -------------------------------------------------------------------------------- /pkg/apis/observability/v1alpha1/register.go: -------------------------------------------------------------------------------- 1 | // Package v1alpha1 contains API Schema definitions for the rhobs v1alpha1 API group 2 | // 3 | // The observability-operator API module uses semantic versioning for version tags, 4 | // but does not guarantee backward compatibility, even for versions v1.0.0 and above. 5 | // Breaking changes may occur without major version bumps. 6 | // 7 | // +kubebuilder:object:generate=true 8 | // +groupName=observability.openshift.io 9 | package v1alpha1 10 | 11 | import ( 12 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 13 | "k8s.io/apimachinery/pkg/runtime" 14 | "k8s.io/apimachinery/pkg/runtime/schema" 15 | ) 16 | 17 | var ( 18 | // GroupVersion is group version used to register these objects 19 | GroupVersion = schema.GroupVersion{Group: "observability.openshift.io", Version: "v1alpha1"} 20 | 21 | // SchemeBuilder is used to add go types to the GroupVersionKind scheme 22 | SchemeBuilder = runtime.NewSchemeBuilder(addTypes) 23 | 24 | // AddToScheme adds the types in this group-version to the given scheme. 25 | AddToScheme = SchemeBuilder.AddToScheme 26 | ) 27 | 28 | func addTypes(s *runtime.Scheme) error { 29 | s.AddKnownTypes(GroupVersion, &ObservabilityInstaller{}, &ObservabilityInstallerList{}) 30 | metav1.AddToGroupVersion(s, GroupVersion) 31 | return nil 32 | } 33 | -------------------------------------------------------------------------------- /pkg/controllers/uiplugin/config/korrel8r.yaml: -------------------------------------------------------------------------------- 1 | # Default configuration for deploying Korrel8r as a service in an OpenShift cluster. 2 | # Store service URLs assume that stores are installed in their default locations. 3 | stores: 4 | - domain: k8s 5 | - domain: alert 6 | metrics: https://{{ .Metric }}.{{ .MonitoringNs }}.svc:9091 7 | alertmanager: https://{{ .MetricAlert }}.{{ .MonitoringNs }}.svc:9094 8 | certificateAuthority: ./run/secrets/kubernetes.io/serviceaccount/service-ca.crt 9 | - domain: log 10 | lokiStack: https://{{ .Log }}.{{ .LoggingNs }}.svc:8080 11 | certificateAuthority: ./run/secrets/kubernetes.io/serviceaccount/service-ca.crt 12 | - domain: metric 13 | metric: https://{{ .Metric }}.{{ .MonitoringNs }}.svc:9091 14 | certificateAuthority: ./run/secrets/kubernetes.io/serviceaccount/service-ca.crt 15 | - domain: netflow 16 | lokiStack: https://{{ .Netflow }}.{{ .NetobservNs }}.svc:8080 17 | certificateAuthority: ./run/secrets/kubernetes.io/serviceaccount/service-ca.crt 18 | - domain: trace 19 | tempoStack: https://{{ .Trace }}.{{ .TracingNs }}.svc.cluster.local:8080/api/traces/v1/platform/tempo/api/search 20 | certificateAuthority: ./run/secrets/kubernetes.io/serviceaccount/service-ca.crt 21 | 22 | include: 23 | - /etc/korrel8r/rules/all.yaml 24 | -------------------------------------------------------------------------------- /.github/e2e-tests-olm/action.yaml: -------------------------------------------------------------------------------- 1 | name: e2e-tests-olm 2 | description: Runs E2E tests against the OLM bundle using unified setup 3 | 4 | runs: 5 | using: composite 6 | steps: 7 | - uses: actions/setup-go@v5 8 | with: 9 | go-version-file: 'go.mod' 10 | check-latest: true 11 | 12 | - name: Install required tools using unified setup 13 | uses: ./.github/tools-cache 14 | 15 | - name: Set up e2e environment 16 | shell: bash 17 | run: | 18 | # Use the unified setup script with CI-friendly options 19 | ./hack/setup-e2e-env.sh \ 20 | --skip-host-check 21 | 22 | - name: Run e2e script 23 | shell: bash 24 | run: ./test/run-e2e.sh --ci 25 | 26 | - name: Capture cluster state 27 | if: always() 28 | shell: bash 29 | run: | 30 | # Capture apiserver state 31 | oc adm inspect node --dest-dir cluster-state || true 32 | oc adm inspect -A statefulset --dest-dir cluster-state || true 33 | oc adm inspect -A deployment --dest-dir cluster-state || true 34 | oc adm inspect -A ns --dest-dir cluster-state || true 35 | cp -r tmp/e2e cluster-state/ || true 36 | 37 | - name: Archive production artifacts 38 | if: always() 39 | uses: actions/upload-artifact@v4 40 | with: 41 | name: cluster-state 42 | path: cluster-state 43 | -------------------------------------------------------------------------------- /docs/user-guides/thanos_querier/install/05-monitors.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: monitoring.rhobs/v1 3 | kind: ServiceMonitor 4 | metadata: 5 | name: api 6 | namespace: project-a 7 | labels: 8 | app.kubernetes.io/name: api-service 9 | app.kubernetes.io/part-of: myapp 10 | spec: 11 | endpoints: 12 | - interval: 30s 13 | port: web 14 | scheme: http 15 | selector: 16 | matchLabels: 17 | app.kubernetes.io/name: api-service 18 | app.kubernetes.io/part-of: myapp 19 | --- 20 | apiVersion: monitoring.rhobs/v1 21 | kind: ServiceMonitor 22 | metadata: 23 | name: api 24 | namespace: project-b 25 | labels: 26 | app.kubernetes.io/name: api-service 27 | app.kubernetes.io/part-of: myapp 28 | spec: 29 | endpoints: 30 | - interval: 30s 31 | port: web 32 | scheme: http 33 | selector: 34 | matchLabels: 35 | app.kubernetes.io/name: api-service 36 | app.kubernetes.io/part-of: myapp 37 | --- 38 | apiVersion: monitoring.rhobs/v1 39 | kind: ServiceMonitor 40 | metadata: 41 | name: backend 42 | namespace: project-c 43 | labels: 44 | app.kubernetes.io/name: backend 45 | app.kubernetes.io/part-of: myapp 46 | spec: 47 | endpoints: 48 | - interval: 30s 49 | port: web 50 | scheme: http 51 | selector: 52 | matchLabels: 53 | app.kubernetes.io/name: backend 54 | app.kubernetes.io/part-of: myapp 55 | -------------------------------------------------------------------------------- /pkg/controllers/uiplugin/components_test.go: -------------------------------------------------------------------------------- 1 | package uiplugin 2 | 3 | import ( 4 | "testing" 5 | 6 | "gotest.tools/v3/assert" 7 | ) 8 | 9 | func TestIsVersionAheadOrEqual(t *testing.T) { 10 | testCases := []struct { 11 | clusterVersion string 12 | nextClusterVersion string 13 | expected bool 14 | }{ 15 | { 16 | clusterVersion: "v4.18", 17 | nextClusterVersion: "v4.17", 18 | expected: true, 19 | }, 20 | { 21 | clusterVersion: "v4.17", 22 | nextClusterVersion: "v4.17", 23 | expected: true, 24 | }, 25 | { 26 | clusterVersion: "v4.16", 27 | nextClusterVersion: "v4.17", 28 | expected: false, 29 | }, 30 | { 31 | clusterVersion: "4.18", 32 | nextClusterVersion: "v4.17", 33 | expected: true, 34 | }, 35 | { 36 | clusterVersion: "4.17.0-0.nightly-2024-07-09-121045", 37 | nextClusterVersion: "v4.17", 38 | expected: true, 39 | }, 40 | { 41 | clusterVersion: "4.16.0-0.nightly-2024-07-09-121045", 42 | nextClusterVersion: "v4.17", 43 | expected: false, 44 | }, 45 | { 46 | clusterVersion: "v4.18", 47 | nextClusterVersion: "", 48 | expected: false, 49 | }, 50 | } 51 | 52 | for _, tc := range testCases { 53 | actual := isVersionAheadOrEqual(tc.clusterVersion, tc.nextClusterVersion) 54 | assert.Equal(t, tc.expected, actual) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /deploy/scorecard/patches/olm.config.yaml: -------------------------------------------------------------------------------- 1 | - op: add 2 | path: /stages/0/tests/- 3 | value: 4 | entrypoint: 5 | - scorecard-test 6 | - olm-bundle-validation 7 | image: quay.io/operator-framework/scorecard-test:v1.13.0 8 | labels: 9 | suite: olm 10 | test: olm-bundle-validation-test 11 | - op: add 12 | path: /stages/0/tests/- 13 | value: 14 | entrypoint: 15 | - scorecard-test 16 | - olm-crds-have-validation 17 | image: quay.io/operator-framework/scorecard-test:v1.13.0 18 | labels: 19 | suite: olm 20 | test: olm-crds-have-validation-test 21 | - op: add 22 | path: /stages/0/tests/- 23 | value: 24 | entrypoint: 25 | - scorecard-test 26 | - olm-crds-have-resources 27 | image: quay.io/operator-framework/scorecard-test:v1.13.0 28 | labels: 29 | suite: olm 30 | test: olm-crds-have-resources-test 31 | - op: add 32 | path: /stages/0/tests/- 33 | value: 34 | entrypoint: 35 | - scorecard-test 36 | - olm-spec-descriptors 37 | image: quay.io/operator-framework/scorecard-test:v1.13.0 38 | labels: 39 | suite: olm 40 | test: olm-spec-descriptors-test 41 | - op: add 42 | path: /stages/0/tests/- 43 | value: 44 | entrypoint: 45 | - scorecard-test 46 | - olm-status-descriptors 47 | image: quay.io/operator-framework/scorecard-test:v1.13.0 48 | labels: 49 | suite: olm 50 | test: olm-status-descriptors-test 51 | -------------------------------------------------------------------------------- /hack/loadtest/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -u -o pipefail 4 | trap cleanup INT 5 | 6 | # Functions that given a number it creates a namespace 7 | # and in that namespace it creates a monitoring stack 8 | create_monitoring_stack() { 9 | 10 | local stack_number=$1; shift 11 | local ms_name=stack-$stack_number 12 | local namespace=loadtest-$stack_number 13 | 14 | monitoring_stack=$(cat <<- EOF 15 | apiVersion: monitoring.rhobs/v1alpha1 16 | kind: MonitoringStack 17 | metadata: 18 | name: ${ms_name} 19 | namespace: ${namespace} 20 | labels: 21 | load-test: test 22 | spec: 23 | logLevel: debug 24 | retention: 15d 25 | resourceSelector: 26 | matchLabels: 27 | load-test-instance: ${ms_name} 28 | EOF 29 | ) 30 | 31 | kubectl create namespace "$namespace" 32 | echo "$monitoring_stack" | kubectl -n "$namespace" apply -f - 33 | } 34 | 35 | cleanup() { 36 | echo "INFO: cleaning up all namespaces" 37 | kubectl delete ns loadtest-{1..10} 38 | } 39 | 40 | main() { 41 | # Goal: create 10 monitoring stack CRs, wait for OO to 42 | # reconcile and then clean-up 43 | 44 | echo "INFO: Running load test" 45 | for ((i=1; i<=10; i++)); do 46 | create_monitoring_stack "$i" 47 | done 48 | 49 | # Give some time for OO to reconcile all the MS 50 | # and create the necessary resources 51 | local timeout=180 52 | echo "INFO: sleeping for $timeout" 53 | sleep "$timeout" 54 | 55 | cleanup 56 | } 57 | 58 | main "$@" 59 | -------------------------------------------------------------------------------- /pkg/assets/loader.go: -------------------------------------------------------------------------------- 1 | package assets 2 | 3 | import ( 4 | "os" 5 | 6 | apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" 7 | "k8s.io/apimachinery/pkg/util/yaml" 8 | "sigs.k8s.io/controller-runtime/pkg/client" 9 | ) 10 | 11 | type Asset struct { 12 | File string 13 | Object client.Object 14 | } 15 | 16 | // NewCRDAsset returns a new Asset with type CustomResourceDefinition 17 | func NewCRDAsset(file string) Asset { 18 | return Asset{ 19 | File: file, 20 | Object: &apiextensionsv1.CustomResourceDefinition{}, 21 | } 22 | } 23 | 24 | // Loader loads Kubernetes objects from YAML manifests 25 | type Loader struct { 26 | assetsPath string 27 | } 28 | 29 | // NewLoader returns a new Loader for assets in assetsPath 30 | func NewLoader(assetsPath string) *Loader { 31 | return &Loader{ 32 | assetsPath: assetsPath, 33 | } 34 | } 35 | 36 | // Load parses YAML manifests from disk and returns 37 | // the corresponding resources as golang objects 38 | func (l *Loader) Load(assets []Asset) ([]client.Object, error) { 39 | resources := make([]client.Object, len(assets)) 40 | for i, asset := range assets { 41 | file, err := os.Open(l.assetsPath + asset.File) 42 | if err != nil { 43 | return nil, err 44 | } 45 | decoder := yaml.NewYAMLOrJSONDecoder(file, 0) 46 | if err := decoder.Decode(asset.Object); err != nil { 47 | return nil, err 48 | } 49 | 50 | resources[i] = asset.Object 51 | } 52 | 53 | return resources, nil 54 | } 55 | -------------------------------------------------------------------------------- /test/e2e/operator_metrics_test.go: -------------------------------------------------------------------------------- 1 | package e2e 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/prometheus/common/model" 8 | "gotest.tools/v3/assert" 9 | 10 | "github.com/rhobs/observability-operator/test/e2e/framework" 11 | ) 12 | 13 | func TestOperatorMetrics(t *testing.T) { 14 | t.Run("operator exposes metrics", func(t *testing.T) { 15 | pod := f.GetOperatorPod(t) 16 | 17 | var opts []func(*framework.HTTPOptions) 18 | if f.IsOpenshiftCluster { 19 | opts = append(opts, framework.WithHTTPS()) 20 | } 21 | 22 | metrics, err := f.GetPodMetrics(pod, opts...) 23 | assert.NilError(t, err) 24 | 25 | v, err := framework.ParseMetrics(metrics) 26 | assert.NilError(t, err) 27 | 28 | assert.Assert(t, len(v) > 0, "no metrics") 29 | }) 30 | 31 | t.Run("metrics ingested in Prometheus", func(t *testing.T) { 32 | if !f.IsOpenshiftCluster { 33 | t.Skip("requires an OpenShift cluster") 34 | } 35 | 36 | err := f.AssertPromQLResult( 37 | t, 38 | fmt.Sprintf(`up{job="observability-operator",namespace="%s"} == 1`, f.OperatorNamespace), 39 | func(v model.Value) error { 40 | if v.Type() != model.ValVector { 41 | return fmt.Errorf("invalid value type: expecting %d, got %s", model.ValVector, v.Type()) 42 | } 43 | 44 | vec := v.(model.Vector) 45 | if len(vec) != 1 { 46 | return fmt.Errorf("expecting 1 item, got %d", len(vec)) 47 | } 48 | 49 | return nil 50 | }) 51 | assert.NilError(t, err) 52 | }) 53 | } 54 | -------------------------------------------------------------------------------- /test/lib/utils.bash: -------------------------------------------------------------------------------- 1 | header() { 2 | local title="🔆🔆🔆 $* 🔆🔆🔆 " 3 | 4 | local len=40 5 | if [[ ${#title} -gt $len ]]; then 6 | len=${#title} 7 | fi 8 | 9 | echo -e "\n\n \033[1m${title}\033[0m" 10 | echo -n "━━━━━" 11 | printf '━%.0s' $(seq "$len") 12 | echo "━━━━━━━" 13 | 14 | } 15 | 16 | info() { 17 | echo " 🔔 $*" 18 | } 19 | 20 | ok() { 21 | echo " ✅ $*" 22 | } 23 | 24 | warn() { 25 | echo " ⚠️ $*" 26 | } 27 | 28 | skip() { 29 | echo " 🙈 SKIP: $*" 30 | } 31 | 32 | die() { 33 | echo -e "\n ✋ $* " 34 | echo -e "──────────────────── ⛔️⛔️⛔️ ────────────────────────\n" 35 | exit 1 36 | } 37 | 38 | line() { 39 | local len="$1" 40 | shift 41 | 42 | echo -n "────" 43 | printf '─%.0s' $(seq "$len") 44 | echo "────────" 45 | } 46 | 47 | # wait_for_operators_ready requires the namespace where the operator is installed 48 | wait_for_operators_ready() { 49 | local ns="$1" 50 | shift 51 | 52 | header "Wait for ObO to be Ready" 53 | 54 | local tries=30 55 | while [[ $tries -gt 0 ]] && 56 | ! kubectl -n "$ns" rollout status deploy/obo-prometheus-operator; do 57 | sleep 10 58 | ((tries--)) 59 | done 60 | 61 | kubectl wait -n "$ns" --for=condition=Available deploy/obo-prometheus-operator --timeout=300s 62 | kubectl wait -n "$ns" --for=condition=Available deploy/obo-prometheus-operator-admission-webhook --timeout=300s 63 | kubectl wait -n "$ns" --for=condition=Available deploy/observability-operator --timeout=300s 64 | 65 | ok "Obo up and running" 66 | } 67 | -------------------------------------------------------------------------------- /pkg/controllers/observability/collector.yaml: -------------------------------------------------------------------------------- 1 | extensions: 2 | bearertokenauth: 3 | filename: "/var/run/secrets/kubernetes.io/serviceaccount/token" 4 | 5 | receivers: 6 | otlp: 7 | protocols: 8 | grpc: {} 9 | http: {} 10 | jaeger: 11 | protocols: 12 | grpc: {} 13 | thrift_http: {} 14 | thrift_compact: {} 15 | thrift_binary: {} 16 | zipkin: {} 17 | 18 | processors: 19 | k8sattributes: {} 20 | batch: {} 21 | memory_limiter: 22 | check_interval: 1s 23 | limit_percentage: 75 24 | spike_limit_percentage: 25 25 | 26 | exporters: 27 | debug: {} 28 | otlphttp/tempo: 29 | endpoint: https://tempo-{{ .TempoName }}-gateway.{{ .Namespace }}.svc.cluster.local:8080/api/traces/v1/{{ .TempoTenant }} 30 | tls: 31 | insecure: false 32 | ca_file: "/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt" 33 | reload_interval: 5s 34 | auth: 35 | authenticator: bearertokenauth 36 | 37 | service: 38 | telemetry: 39 | logs: 40 | level: "INFO" 41 | development: true 42 | encoding: "json" 43 | metrics: 44 | level: detailed 45 | readers: 46 | - pull: 47 | exporter: 48 | prometheus: 49 | host: '0.0.0.0' 50 | port: 8888 51 | extensions: [bearertokenauth] 52 | pipelines: 53 | traces: 54 | receivers: [otlp, jaeger, zipkin] 55 | processors: [memory_limiter, k8sattributes, batch] 56 | exporters: 57 | - debug 58 | - otlphttp/tempo 59 | -------------------------------------------------------------------------------- /pkg/reconciler/create_update_reconciler.go: -------------------------------------------------------------------------------- 1 | package reconciler 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 8 | "k8s.io/apimachinery/pkg/runtime" 9 | ctrl "sigs.k8s.io/controller-runtime" 10 | "sigs.k8s.io/controller-runtime/pkg/client" 11 | "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 12 | 13 | "github.com/rhobs/observability-operator/pkg/controllers/util" 14 | ) 15 | 16 | type createUpdateReconciler struct { 17 | resourceOwner metav1.Object 18 | resource client.Object 19 | } 20 | 21 | func (r createUpdateReconciler) Reconcile(ctx context.Context, c client.Client, scheme *runtime.Scheme) error { 22 | // If the resource owner is in the same namespace as the resource, or if the resource owner is cluster scoped set the owner reference. 23 | if r.resourceOwner.GetNamespace() == r.resource.GetNamespace() || r.resourceOwner.GetNamespace() == "" { 24 | if err := controllerutil.SetControllerReference(r.resourceOwner, r.resource, scheme); err != nil { 25 | return fmt.Errorf("%s/%s (%s): updater failed to set owner reference: %w", 26 | r.resource.GetNamespace(), r.resource.GetName(), 27 | r.resource.GetObjectKind().GroupVersionKind().String(), err) 28 | } 29 | } 30 | 31 | _, err := ctrl.CreateOrUpdate(ctx, c, r.resource, func() error { return nil }) 32 | 33 | return err 34 | } 35 | 36 | func NewCreateUpdateReconciler(resource client.Object, owner metav1.Object) Reconciler { 37 | return createUpdateReconciler{ 38 | resourceOwner: owner, 39 | resource: util.AddCommonLabels(resource, owner.GetName()), 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /pkg/apis/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/rhobs/observability-operator/pkg/apis 2 | 3 | go 1.24.0 4 | 5 | require ( 6 | github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring v0.83.0-rhobs1 7 | github.com/stretchr/testify v1.10.0 8 | k8s.io/api v0.33.2 9 | k8s.io/apimachinery v0.33.2 10 | ) 11 | 12 | require ( 13 | cel.dev/expr v0.24.0 // indirect 14 | github.com/antlr4-go/antlr/v4 v4.13.0 // indirect 15 | github.com/davecgh/go-spew v1.1.1 // indirect 16 | github.com/fxamacker/cbor/v2 v2.8.0 // indirect 17 | github.com/go-logr/logr v1.4.2 // indirect 18 | github.com/gogo/protobuf v1.3.2 // indirect 19 | github.com/google/cel-go v0.26.1 // indirect 20 | github.com/json-iterator/go v1.1.12 // indirect 21 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 22 | github.com/modern-go/reflect2 v1.0.2 // indirect 23 | github.com/pmezard/go-difflib v1.0.0 // indirect 24 | github.com/stoewer/go-strcase v1.2.0 // indirect 25 | github.com/x448/float16 v0.8.4 // indirect 26 | golang.org/x/exp v0.0.0-20230515195305-f3d0a9c9a5cc // indirect 27 | golang.org/x/net v0.40.0 // indirect 28 | golang.org/x/text v0.25.0 // indirect 29 | google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 // indirect 30 | google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 // indirect 31 | google.golang.org/protobuf v1.36.5 // indirect 32 | gopkg.in/inf.v0 v0.9.1 // indirect 33 | gopkg.in/yaml.v3 v3.0.1 // indirect 34 | k8s.io/klog/v2 v2.130.1 // indirect 35 | k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979 // indirect 36 | sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect 37 | sigs.k8s.io/randfill v1.0.0 // indirect 38 | sigs.k8s.io/structured-merge-diff/v4 v4.7.0 // indirect 39 | sigs.k8s.io/yaml v1.4.0 // indirect 40 | ) 41 | -------------------------------------------------------------------------------- /test/e2e/framework/monitoring_stack.go: -------------------------------------------------------------------------------- 1 | package framework 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "gotest.tools/v3/assert" 8 | v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | "k8s.io/apimachinery/pkg/types" 10 | "k8s.io/client-go/util/retry" 11 | 12 | stack "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1" 13 | ) 14 | 15 | type MonitoringStackConfig func(monitoringStack *stack.MonitoringStack) 16 | 17 | func SetPrometheusReplicas(replicas int32) MonitoringStackConfig { 18 | return func(ms *stack.MonitoringStack) { 19 | ms.Spec.PrometheusConfig.Replicas = &replicas 20 | } 21 | } 22 | 23 | func SetResourceSelector(resourceSelector *v1.LabelSelector) MonitoringStackConfig { 24 | return func(ms *stack.MonitoringStack) { 25 | ms.Spec.ResourceSelector = resourceSelector 26 | } 27 | } 28 | 29 | func SetAlertmanagerDisabled(disabled bool) MonitoringStackConfig { 30 | return func(ms *stack.MonitoringStack) { 31 | ms.Spec.AlertmanagerConfig.Disabled = disabled 32 | } 33 | } 34 | 35 | func SetAlertmanagerReplicas(replicas int32) MonitoringStackConfig { 36 | return func(ms *stack.MonitoringStack) { 37 | ms.Spec.AlertmanagerConfig.Replicas = &replicas 38 | } 39 | } 40 | 41 | // UpdateWithRetry updates monitoringstack with retry 42 | func (f *Framework) UpdateWithRetry(t *testing.T, ms *stack.MonitoringStack, fns ...MonitoringStackConfig) error { 43 | err := retry.RetryOnConflict(retry.DefaultRetry, func() error { 44 | key := types.NamespacedName{Name: ms.Name, Namespace: ms.Namespace} 45 | err := f.K8sClient.Get(context.Background(), key, ms) 46 | assert.NilError(t, err, "failed to get a monitoring stack") 47 | for _, fn := range fns { 48 | fn(ms) 49 | } 50 | err = f.K8sClient.Update(context.Background(), ms) 51 | return err 52 | }) 53 | return err 54 | } 55 | -------------------------------------------------------------------------------- /docs/user-guides/thanos_querier/install/06_load_generator.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: load-gen 7 | app.kubernetes.io/part-of: myapp 8 | name: load-gen 9 | namespace: project-d 10 | spec: 11 | replicas: 2 12 | selector: 13 | matchLabels: 14 | app.kubernetes.io/name: load-gen 15 | app.kubernetes.io/part-of: myapp 16 | template: 17 | metadata: 18 | labels: 19 | app.kubernetes.io/name: load-gen 20 | app.kubernetes.io/part-of: myapp 21 | spec: 22 | containers: 23 | - image: registry.redhat.io/ubi9/ubi-minimal 24 | imagePullPolicy: IfNotPresent 25 | name: load-gen 26 | command: 27 | - /bin/sh 28 | - -c 29 | - | 30 | # List of URLs to query 31 | urls=( 32 | "http://api.project-a.svc:8080/" 33 | "http://api.project-b.svc:8080/" 34 | "http://api.project-b.svc:8080/err" 35 | "http://backend.project-c.svc:8080/" 36 | ) 37 | 38 | while true; do 39 | # Pick a random URL from the array 40 | random_index=$((RANDOM % ${#urls[@]})) 41 | url="${urls[$random_index]}" 42 | 43 | # Query the URL (suppress output) 44 | curl -s -o /dev/null "$url" 45 | 46 | echo "Queried: $url" 47 | 48 | # Generate random delay between 0 and 1 second (in milliseconds) 49 | # RANDOM gives 0-32767, so we divide by 32767 to get 0-1 range 50 | delay=$(awk -v r=$RANDOM 'BEGIN{printf "%.3f", r/32767}') 51 | 52 | # Sleep for the random delay 53 | sleep "$delay" 54 | done 55 | -------------------------------------------------------------------------------- /hack/kind/config.yaml: -------------------------------------------------------------------------------- 1 | kind: Cluster 2 | apiVersion: kind.x-k8s.io/v1alpha4 3 | containerdConfigPatches: 4 | # 10.96.223.192 is the fixed ip of the registry service - see: hack/kind/registry.yaml 5 | - |- 6 | [plugins."io.containerd.grpc.v1.cri".registry.mirrors."local-registry:30000"] 7 | endpoint = ["http://10.96.223.192:30000"] 8 | nodes: 9 | - role: control-plane 10 | image: kindest/node:v1.24.0 11 | extraPortMappings: 12 | - containerPort: 30000 13 | hostPort: 30000 14 | # operator metrics endpoint for scrapping with promq 15 | - containerPort: 30001 16 | hostPort: 30001 17 | kubeadmConfigPatches: 18 | - | 19 | kind: ClusterConfiguration 20 | apiServer: 21 | # enable auditing flags on the API server 22 | extraArgs: 23 | audit-log-path: /var/log/kubernetes/kube-apiserver-audit.log 24 | audit-policy-file: /etc/kubernetes/policies/audit-policy.yaml 25 | # mount new files / directories on the control plane 26 | extraVolumes: 27 | - name: audit-policies 28 | hostPath: /etc/kubernetes/policies 29 | mountPath: /etc/kubernetes/policies 30 | readOnly: true 31 | pathType: "DirectoryOrCreate" 32 | - name: "audit-logs" 33 | hostPath: "/var/log/kubernetes" 34 | mountPath: "/var/log/kubernetes" 35 | readOnly: false 36 | pathType: DirectoryOrCreate 37 | # mount the local file on the control plane 38 | extraMounts: 39 | - hostPath: ./hack/kind/audit-policy.yaml 40 | containerPath: /etc/kubernetes/policies/audit-policy.yaml 41 | readOnly: true 42 | - role: worker 43 | image: kindest/node:v1.24.0 44 | - role: worker 45 | image: kindest/node:v1.24.0 46 | -------------------------------------------------------------------------------- /pkg/apis/uiplugin/v1alpha1/register.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // Package v1alpha1 contains API Schema definitions for the rhobs v1alpha1 API group 18 | // 19 | // The observability-operator API module uses semantic versioning for version tags, 20 | // but does not guarantee backward compatibility, even for versions v1.0.0 and above. 21 | // Breaking changes may occur without major version bumps. 22 | // 23 | // +kubebuilder:object:generate=true 24 | // +groupName=observability.openshift.io 25 | package v1alpha1 26 | 27 | import ( 28 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 | "k8s.io/apimachinery/pkg/runtime" 30 | "k8s.io/apimachinery/pkg/runtime/schema" 31 | ) 32 | 33 | var ( 34 | // GroupVersion is group version used to register these objects 35 | GroupVersion = schema.GroupVersion{Group: "observability.openshift.io", Version: "v1alpha1"} 36 | 37 | // SchemeBuilder is used to add go types to the GroupVersionKind scheme 38 | SchemeBuilder = runtime.NewSchemeBuilder(addTypes) 39 | 40 | // AddToScheme adds the types in this group-version to the given scheme. 41 | AddToScheme = SchemeBuilder.AddToScheme 42 | ) 43 | 44 | func addTypes(s *runtime.Scheme) error { 45 | s.AddKnownTypes(GroupVersion, &UIPlugin{}, &UIPluginList{}) 46 | metav1.AddToGroupVersion(s, GroupVersion) 47 | return nil 48 | } 49 | -------------------------------------------------------------------------------- /pkg/apis/monitoring/v1alpha1/register.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // Package v1alpha1 contains API Schema definitions for the rhobs v1alpha1 API group 18 | // 19 | // The observability-operator API module uses semantic versioning for version tags, 20 | // but does not guarantee backward compatibility, even for versions v1.0.0 and above. 21 | // Breaking changes may occur without major version bumps. 22 | // 23 | // +kubebuilder:object:generate=true 24 | // +groupName=monitoring.rhobs 25 | package v1alpha1 26 | 27 | import ( 28 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 | "k8s.io/apimachinery/pkg/runtime" 30 | "k8s.io/apimachinery/pkg/runtime/schema" 31 | ) 32 | 33 | var ( 34 | // GroupVersion is group version used to register these objects 35 | GroupVersion = schema.GroupVersion{Group: "monitoring.rhobs", Version: "v1alpha1"} 36 | 37 | // SchemeBuilder is used to add go types to the GroupVersionKind scheme 38 | SchemeBuilder = runtime.NewSchemeBuilder(addTypes) 39 | 40 | // AddToScheme adds the types in this group-version to the given scheme. 41 | AddToScheme = SchemeBuilder.AddToScheme 42 | ) 43 | 44 | func addTypes(s *runtime.Scheme) error { 45 | s.AddKnownTypes(GroupVersion, &MonitoringStack{}, &MonitoringStackList{}, &ThanosQuerier{}, &ThanosQuerierList{}) 46 | metav1.AddToGroupVersion(s, GroupVersion) 47 | return nil 48 | } 49 | -------------------------------------------------------------------------------- /bundle/tests/scorecard/config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: scorecard.operatorframework.io/v1alpha3 2 | kind: Configuration 3 | metadata: 4 | name: config 5 | stages: 6 | - parallel: true 7 | tests: 8 | - entrypoint: 9 | - scorecard-test 10 | - basic-check-spec 11 | image: quay.io/operator-framework/scorecard-test:v1.13.0 12 | labels: 13 | suite: basic 14 | test: basic-check-spec-test 15 | storage: 16 | spec: 17 | mountPath: {} 18 | - entrypoint: 19 | - scorecard-test 20 | - olm-bundle-validation 21 | image: quay.io/operator-framework/scorecard-test:v1.13.0 22 | labels: 23 | suite: olm 24 | test: olm-bundle-validation-test 25 | storage: 26 | spec: 27 | mountPath: {} 28 | - entrypoint: 29 | - scorecard-test 30 | - olm-crds-have-validation 31 | image: quay.io/operator-framework/scorecard-test:v1.13.0 32 | labels: 33 | suite: olm 34 | test: olm-crds-have-validation-test 35 | storage: 36 | spec: 37 | mountPath: {} 38 | - entrypoint: 39 | - scorecard-test 40 | - olm-crds-have-resources 41 | image: quay.io/operator-framework/scorecard-test:v1.13.0 42 | labels: 43 | suite: olm 44 | test: olm-crds-have-resources-test 45 | storage: 46 | spec: 47 | mountPath: {} 48 | - entrypoint: 49 | - scorecard-test 50 | - olm-spec-descriptors 51 | image: quay.io/operator-framework/scorecard-test:v1.13.0 52 | labels: 53 | suite: olm 54 | test: olm-spec-descriptors-test 55 | storage: 56 | spec: 57 | mountPath: {} 58 | - entrypoint: 59 | - scorecard-test 60 | - olm-status-descriptors 61 | image: quay.io/operator-framework/scorecard-test:v1.13.0 62 | labels: 63 | suite: olm 64 | test: olm-status-descriptors-test 65 | storage: 66 | spec: 67 | mountPath: {} 68 | storage: 69 | spec: 70 | mountPath: {} 71 | -------------------------------------------------------------------------------- /docs/user-guides/federation/manifests/20-smon-cmo.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.rhobs/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | name: federate-cmo-smon 5 | namespace: federate-cmo 6 | labels: 7 | kubernetes.io/part-of: federate-cmo-ms 8 | monitoring.rhobs/stack: federate-cmo-ms 9 | 10 | spec: 11 | selector: # 👈 use the prometheus service to create a "dummy" target. 12 | matchLabels: 13 | app.kubernetes.io/managed-by: observability-operator 14 | app.kubernetes.io/name: federate-cmo-ms-prometheus 15 | 16 | endpoints: 17 | - params: 18 | 'match[]': # 👈 scrape only required metrics from in-cluster prometheus 19 | - '{__name__=~"container_cpu_.*", namespace="federate-cmo"}' 20 | - '{__name__="container_memory_working_set_bytes", namespace="federate-cmo"}' 21 | 22 | relabelings: 23 | # 👇 relabel example 24 | - targetLabel: source 25 | replacement: my-openshift-cluster 26 | 27 | # 👇 override the target's address by the prometheus-k8s service name. 28 | - action: replace 29 | targetLabel: __address__ 30 | replacement: prometheus-k8s.openshift-monitoring.svc:9091 31 | 32 | # 👇 remove the default target labels as they aren't relevant in case of federation. 33 | - action: labeldrop 34 | regex: pod|namespace|service|endpoint|container 35 | 36 | # 👇 30s interval creates 4 scrapes per minute 37 | # prometheus-k8s.svc x 2 ms-prometheus x (60s/ 30s) = 4 38 | interval: 30s 39 | 40 | # 👇 ensure that the scraped labels are preferred over target's labels. 41 | honorLabels: true 42 | 43 | port: web 44 | scheme: https 45 | path: "/federate" 46 | 47 | bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 48 | 49 | tlsConfig: 50 | serverName: prometheus-k8s.openshift-monitoring.svc 51 | ca: 52 | configMap: # 👈 automatically created by serving-ca operator 53 | key: service-ca.crt 54 | name: openshift-service-ca.crt 55 | -------------------------------------------------------------------------------- /test/e2e/traces_minio.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: minio 5 | --- 6 | apiVersion: v1 7 | kind: PersistentVolumeClaim 8 | metadata: 9 | labels: 10 | app.kubernetes.io/name: minio 11 | name: minio 12 | namespace: minio 13 | spec: 14 | accessModes: 15 | - ReadWriteOnce 16 | resources: 17 | requests: 18 | storage: 2Gi 19 | --- 20 | apiVersion: apps/v1 21 | kind: Deployment 22 | metadata: 23 | name: minio 24 | namespace: minio 25 | spec: 26 | selector: 27 | matchLabels: 28 | app.kubernetes.io/name: minio 29 | strategy: 30 | type: Recreate 31 | template: 32 | metadata: 33 | labels: 34 | app.kubernetes.io/name: minio 35 | spec: 36 | containers: 37 | - command: 38 | - /bin/sh 39 | - -c 40 | - | 41 | mkdir -p /storage/tempo && \ 42 | minio server /storage 43 | env: 44 | - name: MINIO_ACCESS_KEY 45 | value: tempo 46 | - name: MINIO_SECRET_KEY 47 | value: supersecret 48 | image: quay.io/minio/minio:RELEASE.2024-10-02T17-50-41Z 49 | name: minio 50 | ports: 51 | - containerPort: 9000 52 | volumeMounts: 53 | - mountPath: /storage 54 | name: storage 55 | volumes: 56 | - name: storage 57 | persistentVolumeClaim: 58 | claimName: minio 59 | --- 60 | apiVersion: v1 61 | kind: Service 62 | metadata: 63 | name: minio 64 | namespace: minio 65 | spec: 66 | ports: 67 | - port: 9000 68 | protocol: TCP 69 | targetPort: 9000 70 | selector: 71 | app.kubernetes.io/name: minio 72 | type: ClusterIP 73 | --- 74 | apiVersion: v1 75 | kind: Secret 76 | metadata: 77 | name: minio-test 78 | namespace: minio 79 | stringData: 80 | endpoint: http://minio.minio.svc:9000 81 | bucket: tempo 82 | access_key_id: tempo 83 | access_key_secret: supersecret 84 | type: Opaque 85 | -------------------------------------------------------------------------------- /test/e2e/po_admission_webhook_test.go: -------------------------------------------------------------------------------- 1 | package e2e 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | monv1 "github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring/v1" 8 | "gotest.tools/v3/assert" 9 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 10 | "k8s.io/apimachinery/pkg/util/intstr" 11 | "k8s.io/utils/ptr" 12 | ) 13 | 14 | func TestPrometheusRuleWebhook(t *testing.T) { 15 | assertCRDExists(t, 16 | "prometheusrules.monitoring.rhobs", 17 | ) 18 | ts := []testCase{{ 19 | name: "Valid PrometheusRules are accepted", 20 | scenario: validPrometheusRuleIsAccepted, 21 | }, { 22 | name: "Invalid PrometheusRules are rejected", 23 | scenario: invalidPrometheusRuleIsRejected, 24 | }} 25 | 26 | for _, tc := range ts { 27 | t.Run(tc.name, tc.scenario) 28 | } 29 | } 30 | 31 | func validPrometheusRuleIsAccepted(t *testing.T) { 32 | rule := newSinglePrometheusRule(t, "valid-rule", 33 | `increase(controller_runtime_reconcile_errors_total{job="foobar"}[15m]) > 0`, 34 | ) 35 | err := f.K8sClient.Create(context.Background(), rule) 36 | assert.NilError(t, err, `failed to create a valid log`) 37 | } 38 | 39 | func invalidPrometheusRuleIsRejected(t *testing.T) { 40 | rule := newSinglePrometheusRule(t, "valid-rule", `FOOBAR({job="foobar"}[15m]) > 0`) 41 | err := f.K8sClient.Create(context.Background(), rule) 42 | assert.ErrorContains(t, err, `denied the request: Rules are not valid`) 43 | } 44 | 45 | func newSinglePrometheusRule(t *testing.T, name, expr string) *monv1.PrometheusRule { 46 | rule := &monv1.PrometheusRule{ 47 | ObjectMeta: metav1.ObjectMeta{ 48 | Name: name, 49 | Namespace: e2eTestNamespace, 50 | }, 51 | Spec: monv1.PrometheusRuleSpec{ 52 | Groups: []monv1.RuleGroup{{ 53 | Name: "single-rule-group", 54 | Rules: []monv1.Rule{{ 55 | Alert: "alert name", 56 | Expr: intstr.FromString(expr), 57 | For: ptr.To(monv1.Duration("15m")), 58 | }}, 59 | }}, 60 | }, 61 | } 62 | f.CleanUp(t, func() { 63 | f.K8sClient.Delete(context.Background(), rule) 64 | }) 65 | 66 | return rule 67 | } 68 | -------------------------------------------------------------------------------- /deploy/perses/perses-operator-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: perses-operator 5 | labels: 6 | app.kubernetes.io/component: controller 7 | app.kubernetes.io/name: perses-operator 8 | app.kubernetes.io/part-of: observability-operator 9 | spec: 10 | selector: 11 | matchLabels: 12 | app.kubernetes.io/component: controller 13 | app.kubernetes.io/name: perses-operator 14 | app.kubernetes.io/part-of: observability-operator 15 | replicas: 1 16 | template: 17 | metadata: 18 | annotations: 19 | kubectl.kubernetes.io/default-container: perses-operator 20 | labels: 21 | app.kubernetes.io/component: controller 22 | app.kubernetes.io/name: perses-operator 23 | app.kubernetes.io/part-of: observability-operator 24 | spec: 25 | containers: 26 | - name: perses-operator 27 | image: quay.io/openshift-observability-ui/perses-operator:v0.2-go-1.23 28 | securityContext: 29 | allowPrivilegeEscalation: false 30 | capabilities: 31 | drop: 32 | - "ALL" 33 | volumeMounts: 34 | - name: openshift-service-ca 35 | mountPath: /ca 36 | readOnly: true 37 | livenessProbe: 38 | httpGet: 39 | path: /healthz 40 | port: 8081 41 | initialDelaySeconds: 15 42 | periodSeconds: 20 43 | readinessProbe: 44 | httpGet: 45 | path: /readyz 46 | port: 8081 47 | initialDelaySeconds: 5 48 | periodSeconds: 10 49 | resources: 50 | limits: 51 | cpu: 500m 52 | memory: 512Mi 53 | requests: 54 | cpu: 100m 55 | memory: 128Mi 56 | serviceAccountName: perses-operator 57 | volumes: 58 | - name: openshift-service-ca 59 | configMap: 60 | name: openshift-service-ca.crt 61 | optional: true 62 | items: 63 | - key: service-ca.crt 64 | path: service-ca.crt 65 | -------------------------------------------------------------------------------- /deploy/operator/observability-operator-deployment.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: observability-operator 6 | labels: 7 | app.kubernetes.io/name: observability-operator 8 | app.kubernetes.io/component: operator 9 | app.kubernetes.io/version: 0.0.1 10 | spec: 11 | selector: 12 | matchLabels: 13 | app.kubernetes.io/name: observability-operator 14 | app.kubernetes.io/component: operator 15 | replicas: 1 16 | template: 17 | metadata: 18 | labels: 19 | app.kubernetes.io/name: observability-operator 20 | app.kubernetes.io/component: operator 21 | app.kubernetes.io/version: 0.0.1 22 | spec: 23 | securityContext: 24 | runAsNonRoot: true 25 | containers: 26 | - name: operator 27 | image: observability-operator:0.0.1 28 | imagePullPolicy: Always 29 | args: 30 | - --namespace=$(NAMESPACE) 31 | env: 32 | - name: NAMESPACE 33 | valueFrom: 34 | fieldRef: 35 | fieldPath: metadata.namespace 36 | securityContext: 37 | allowPrivilegeEscalation: false 38 | capabilities: 39 | drop: 40 | - ALL 41 | resources: 42 | limits: 43 | cpu: 400m 44 | memory: 512Mi 45 | requests: 46 | cpu: 100m 47 | memory: 256Mi 48 | readinessProbe: 49 | httpGet: 50 | path: /healthz 51 | port: 8081 52 | livenessProbe: 53 | httpGet: 54 | path: /healthz 55 | port: 8081 56 | volumeMounts: 57 | - mountPath: /etc/tls/private 58 | name: observability-operator-tls 59 | readOnly: true 60 | serviceAccountName: observability-operator-sa 61 | volumes: 62 | - name: observability-operator-tls 63 | secret: 64 | secretName: observability-operator-tls 65 | optional: true 66 | terminationGracePeriodSeconds: 30 67 | -------------------------------------------------------------------------------- /hack/update-obo-prometheus-operator.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | 5 | # Usage: 6 | # hack/update-obo-prometheus-operator.sh 7 | # 8 | # Example: 9 | # hack/update-obo-prometheus-operator.sh v0.87.0-rhobs1 v0.88.0-rhobs1 10 | # 11 | # This script replaces all occurrences of the given obo-prometheus-operator 12 | # version string in source files, including: 13 | # - kustomization files (e.g. deploy/dependencies/kustomization.yaml) 14 | # - go.mod files 15 | # - any other YAML/YML files under the repo, excluding generated assets 16 | # under bundle/ and tmp/. 17 | # 18 | # After running this script you should typically run: 19 | # - go mod tidy 20 | # - make bundle 21 | 22 | if [[ $# -ne 2 ]]; then 23 | echo "Usage: $0 " >&2 24 | exit 1 25 | fi 26 | 27 | OLD_VERSION="$1" 28 | NEW_VERSION="$2" 29 | 30 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 31 | REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" 32 | 33 | cd "${REPO_ROOT}" 34 | 35 | echo "Updating obo-prometheus-operator from '${OLD_VERSION}' to '${NEW_VERSION}'" 36 | echo "Repository root: ${REPO_ROOT}" 37 | 38 | # Find candidate files: 39 | # - All kustomization files 40 | # - All go.mod files 41 | # - All YAML/YML files 42 | # Exclude: 43 | # - Generated bundle content 44 | # - Temporary / build output 45 | mapfile -t FILES < <( 46 | grep -Rl --null "${OLD_VERSION}" . \ 47 | --include='kustomization.yaml' \ 48 | --include='go.mod' \ 49 | --include='*.yaml' \ 50 | --include='*.yml' \ 51 | | tr '\0' '\n' \ 52 | | grep -v -E '^./bundle/' \ 53 | | grep -v -E '^./tmp/' 54 | ) 55 | 56 | if [[ ${#FILES[@]} -eq 0 ]]; then 57 | echo "No files found containing '${OLD_VERSION}' (nothing to do)." >&2 58 | exit 1 59 | fi 60 | 61 | echo "Will update the following files:" 62 | for f in "${FILES[@]}"; do 63 | echo " - ${f}" 64 | done 65 | 66 | for f in "${FILES[@]}"; do 67 | sed -i "s/${OLD_VERSION}/${NEW_VERSION}/g" "${f}" 68 | done 69 | 70 | echo "Done." 71 | echo "Next steps (recommended):" 72 | echo " - go mod tidy" 73 | echo " - make bundle" 74 | -------------------------------------------------------------------------------- /test/e2e/traces_tempo_readiness.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: tempo-readiness-check 5 | namespace: tracing-observability 6 | labels: 7 | app: tempo-readiness-check 8 | spec: 9 | template: 10 | metadata: 11 | labels: 12 | app: tempo-readiness-check 13 | annotations: 14 | service.beta.openshift.io/inject-cabundle: "true" 15 | spec: 16 | restartPolicy: Never 17 | containers: 18 | - name: readiness-checker 19 | image: ghcr.io/grafana/tempo-operator/test-utils:main 20 | command: 21 | - /bin/sh 22 | - -c 23 | - | 24 | echo "=== Curl Request with mTLS Authentication (Ignore Hostname) ===" 25 | curl -v -f -k \ 26 | --cert /etc/mtls/tls.crt \ 27 | --key /etc/mtls/tls.key \ 28 | --connect-timeout 10 \ 29 | --max-time 30 \ 30 | --show-error \ 31 | https://tempo-coo-ingester:3200/ready 32 | 33 | CURL_EXIT_CODE=$? 34 | echo 35 | echo "=== Curl Exit Code: $CURL_EXIT_CODE ===" 36 | 37 | if [ $CURL_EXIT_CODE -eq 0 ]; then 38 | echo "SUCCESS: Service is ready!" 39 | exit 0 40 | else 41 | echo "FAILED: Service is not ready (curl exit code: $CURL_EXIT_CODE)" 42 | exit 1 43 | fi 44 | volumeMounts: 45 | - name: service-ca 46 | mountPath: /etc/ssl/certs 47 | readOnly: true 48 | - name: mtls-certs 49 | mountPath: /etc/mtls 50 | readOnly: true 51 | - name: tempo-ca 52 | mountPath: /etc/tempo-ca 53 | readOnly: true 54 | volumes: 55 | - name: service-ca 56 | configMap: 57 | name: openshift-service-ca.crt 58 | - name: mtls-certs 59 | secret: 60 | secretName: tempo-coo-gateway-mtls 61 | - name: tempo-ca 62 | configMap: 63 | name: tempo-coo-ca-bundle -------------------------------------------------------------------------------- /test/e2e/framework/prometheus_client.go: -------------------------------------------------------------------------------- 1 | package framework 2 | 3 | import ( 4 | "crypto/tls" 5 | "crypto/x509" 6 | "encoding/json" 7 | "fmt" 8 | "net/http" 9 | "time" 10 | 11 | "github.com/prometheus/common/model" 12 | ) 13 | 14 | // PrometheusClient is an HTTP-based client for querying a Prometheus server 15 | type PrometheusClient struct { 16 | baseURL string 17 | client *http.Client 18 | } 19 | 20 | // PrometheusResponse is used to contain prometheus query results 21 | type PrometheusResponse struct { 22 | Status string `json:"status"` 23 | Error string `json:"error"` 24 | Data prometheusResponseData `json:"data"` 25 | } 26 | 27 | type prometheusResponseData struct { 28 | ResultType string `json:"resultType"` 29 | Result model.Vector `json:"result"` 30 | } 31 | 32 | func NewPrometheusClient(url string) *PrometheusClient { 33 | return &PrometheusClient{ 34 | baseURL: url, 35 | client: &http.Client{ 36 | Timeout: 10 * time.Second, 37 | }, 38 | } 39 | } 40 | 41 | func NewTLSPrometheusClient(url string, caCert string, serverName string) (*PrometheusClient, error) { 42 | ca := x509.NewCertPool() 43 | ok := ca.AppendCertsFromPEM([]byte(caCert)) 44 | if !ok { 45 | return nil, fmt.Errorf("failed to parse ca certificate") 46 | } 47 | tlsConf := tls.Config{ 48 | RootCAs: ca, 49 | ServerName: serverName, 50 | } 51 | transport := &http.Transport{ 52 | TLSClientConfig: &tlsConf, 53 | } 54 | return &PrometheusClient{ 55 | baseURL: url, 56 | client: &http.Client{ 57 | Transport: transport, 58 | Timeout: 10 * time.Second, 59 | }, 60 | }, nil 61 | } 62 | 63 | func (c *PrometheusClient) Query(query string) (*PrometheusResponse, error) { 64 | url := fmt.Sprintf("%s/api/v1/query?query=%s", c.baseURL, query) 65 | resp, err := c.client.Get(url) 66 | if err != nil { 67 | return nil, fmt.Errorf("unable to query Prometheus: %v", err) 68 | } 69 | defer resp.Body.Close() 70 | 71 | var result PrometheusResponse 72 | if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { 73 | return nil, fmt.Errorf("unable to parse query response: %v", err) 74 | } 75 | 76 | return &result, nil 77 | } 78 | -------------------------------------------------------------------------------- /docs/assess-resources.md: -------------------------------------------------------------------------------- 1 | 2 | # Procedure to assess resources used by Observability Operator 3 | 4 | 1. Provision an OpenShift cluster 5 | 6 | 2. Run `oc apply -f hack/olm/catalog-src.yaml` to install the Observability Operator (OO) catalogue. 7 | 8 | 3. Using the UI install OO 9 | 10 | 4. Scale down the following deployments, so we can remove the currently set limits on OO: 11 | 12 | ```bash 13 | # Scale down the cluster version operator 14 | oc -n openshift-cluster-version scale deployment.apps/cluster-version-operator --replicas=0 15 | # Scale down the OLM operator 16 | oc -n openshift-operator-lifecycle-manager scale deployment.apps/olm-operator --replicas=0 17 | ``` 18 | 19 | 5. Edit the OO and Prometheus Operator deployment to remove it's limits with: 20 | 21 | ```bash 22 | oc -n openshift-operators patch deployment.apps/observability-operator --type='json' -p='[{"op": "remove", "path": "/spec/template/spec/containers/0/resources/limits"}]' 23 | oc -n openshift-operators patch deployment.apps/observability-operator-prometheus-operator --type='json' -p='[{"op": "remove", "path": "/spec/template/spec/containers/0/resources/limits"}]' 24 | ``` 25 | 26 | 6. Run the load tests with `./hack/loadtest/test.sh` 27 | 28 | 7. Using the OpenShift UI in the Developer tab, navigate to Observe and input the following querries. 29 | 1. For memory we should look at `container_memory_rss` as that is the metric used by kubelet to OOM kill the container 30 | 2. For CPU we should look at `container_cpu_usage_seconds_total` as that is the metric used by kubelet 31 | 32 | ```bash 33 | # PromQL for memory 34 | container_memory_rss{container!~"|POD", namespace="openshift-operators"} 35 | # PromQL for CPU 36 | sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='openshift-operators'}) by (pod) 37 | ``` 38 | 39 | 8. Take for both OO and Prometheus Operator measurements of their preformance 40 | 1. Establish a baseline for both CPU and memory (minimum they consume), those will be our `requests` 41 | 2. Multiply that value by 3 and validate that it fits the intervals of values observed, those will be our `limits` 42 | 3. Give some extra head room to `limits` to anticipate feature growth -------------------------------------------------------------------------------- /pkg/operator/scheme.go: -------------------------------------------------------------------------------- 1 | package operator 2 | 3 | import ( 4 | tempov1alpha1 "github.com/grafana/tempo-operator/api/tempo/v1alpha1" 5 | otelv1beta1 "github.com/open-telemetry/opentelemetry-operator/apis/v1beta1" 6 | osv1 "github.com/openshift/api/console/v1" 7 | osv1alpha1 "github.com/openshift/api/console/v1alpha1" 8 | operatorv1 "github.com/openshift/api/operator/v1" 9 | olmv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" 10 | monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" 11 | monitoringv1 "github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring/v1" 12 | persesv1alpha1 "github.com/rhobs/perses-operator/api/v1alpha1" 13 | corev1 "k8s.io/api/core/v1" 14 | apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" 15 | "k8s.io/apimachinery/pkg/runtime" 16 | utilruntime "k8s.io/apimachinery/pkg/util/runtime" 17 | clientgoscheme "k8s.io/client-go/kubernetes/scheme" 18 | 19 | rhobsv1alpha1 "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1" 20 | obsv1alpha1 "github.com/rhobs/observability-operator/pkg/apis/observability/v1alpha1" 21 | uiv1alpha1 "github.com/rhobs/observability-operator/pkg/apis/uiplugin/v1alpha1" 22 | ) 23 | 24 | func NewScheme(cfg *OperatorConfiguration) *runtime.Scheme { 25 | scheme := runtime.NewScheme() 26 | 27 | utilruntime.Must(clientgoscheme.AddToScheme(scheme)) 28 | utilruntime.Must(rhobsv1alpha1.AddToScheme(scheme)) 29 | utilruntime.Must(apiextensionsv1.AddToScheme(scheme)) 30 | utilruntime.Must(monitoringv1.AddToScheme(scheme)) 31 | utilruntime.Must(uiv1alpha1.AddToScheme(scheme)) 32 | utilruntime.Must(obsv1alpha1.AddToScheme(scheme)) 33 | utilruntime.Must(otelv1beta1.AddToScheme(scheme)) 34 | utilruntime.Must(tempov1alpha1.AddToScheme(scheme)) 35 | 36 | if cfg.FeatureGates.OpenShift.Enabled { 37 | utilruntime.Must(osv1.Install(scheme)) 38 | utilruntime.Must(osv1alpha1.Install(scheme)) 39 | utilruntime.Must(operatorv1.Install(scheme)) 40 | utilruntime.Must(corev1.AddToScheme(scheme)) 41 | utilruntime.Must(monv1.AddToScheme(scheme)) 42 | utilruntime.Must(persesv1alpha1.AddToScheme(scheme)) 43 | utilruntime.Must(olmv1alpha1.AddToScheme(scheme)) 44 | } 45 | 46 | return scheme 47 | } 48 | -------------------------------------------------------------------------------- /test/e2e/traces_verify.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: verify-traces-traceql-grpc 5 | namespace: tracing-observability 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: verify-traces 11 | image: ghcr.io/grafana/tempo-operator/test-utils:main 12 | command: 13 | - /bin/bash 14 | - -eux 15 | - -c 16 | args: 17 | - | 18 | token=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token) 19 | curl \ 20 | -v -G \ 21 | --header "Authorization: Bearer $token" \ 22 | --cacert /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt \ 23 | https://tempo-coo-gateway.tracing-observability.svc:8080/api/traces/v1/application/tempo/api/search \ 24 | --data-urlencode 'q={ resource.service.name="grpc" }' \ 25 | | tee /tmp/jaeger.out 26 | num_traces=$(jq ".traces | length" /tmp/jaeger.out) 27 | if [[ "$num_traces" != "10" ]]; then 28 | echo && echo "The API returned $num_traces instead of 10 traces." 29 | exit 1 30 | fi 31 | restartPolicy: Never 32 | --- 33 | apiVersion: rbac.authorization.k8s.io/v1 34 | kind: ClusterRole 35 | metadata: 36 | name: tempostack-traces-reader 37 | rules: 38 | - apiGroups: 39 | - 'tempo.grafana.com' 40 | resources: 41 | - application 42 | resourceNames: 43 | - traces 44 | verbs: 45 | - 'get' 46 | --- 47 | apiVersion: rbac.authorization.k8s.io/v1 48 | kind: ClusterRoleBinding 49 | metadata: 50 | name: tempostack-traces-reader 51 | roleRef: 52 | apiGroup: rbac.authorization.k8s.io 53 | kind: ClusterRole 54 | name: tempostack-traces-reader 55 | subjects: 56 | - kind: Group 57 | apiGroup: rbac.authorization.k8s.io 58 | name: system:authenticated 59 | --- 60 | apiVersion: rbac.authorization.k8s.io/v1 61 | kind: RoleBinding 62 | metadata: 63 | name: view 64 | namespace: tracing-observability 65 | roleRef: 66 | apiGroup: rbac.authorization.k8s.io 67 | kind: ClusterRole 68 | name: view 69 | subjects: 70 | - kind: ServiceAccount 71 | name: default 72 | namespace: tracing-observability 73 | -------------------------------------------------------------------------------- /test/e2e/uiplugin_test.go: -------------------------------------------------------------------------------- 1 | package e2e 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "testing" 7 | "time" 8 | 9 | "gotest.tools/v3/assert" 10 | appsv1 "k8s.io/api/apps/v1" 11 | "k8s.io/apimachinery/pkg/api/errors" 12 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 13 | "k8s.io/apimachinery/pkg/util/wait" 14 | "sigs.k8s.io/controller-runtime/pkg/client" 15 | 16 | uiv1 "github.com/rhobs/observability-operator/pkg/apis/uiplugin/v1alpha1" 17 | "github.com/rhobs/observability-operator/test/e2e/framework" 18 | ) 19 | 20 | var uiPluginInstallNS string 21 | 22 | func TestUIPlugin(t *testing.T) { 23 | if !f.IsOpenshiftCluster { 24 | t.Skip("The tests are skipped on non-ocp cluster") 25 | } 26 | 27 | flag.Parse() 28 | uiPluginInstallNS = *operatorInstallNS 29 | 30 | assertCRDExists(t, "uiplugins.observability.openshift.io") 31 | 32 | ts := []testCase{ 33 | { 34 | name: "Create dashboards UIPlugin", 35 | scenario: dashboardsUIPlugin, 36 | }, 37 | } 38 | 39 | for _, tc := range ts { 40 | t.Run(tc.name, tc.scenario) 41 | } 42 | } 43 | 44 | func dashboardsUIPlugin(t *testing.T) { 45 | db := newDashboardsUIPlugin(t) 46 | err := f.K8sClient.Create(context.Background(), db) 47 | assert.NilError(t, err, "failed to create a dashboards UIPlugin") 48 | // Check deploy observability-ui-dashboards ius ready 49 | name := "observability-ui-dashboards" 50 | dbDeployment := appsv1.Deployment{} 51 | f.GetResourceWithRetry(t, name, uiPluginInstallNS, &dbDeployment) 52 | f.AssertDeploymentReady(name, uiPluginInstallNS, framework.WithTimeout(5*time.Minute))(t) 53 | } 54 | 55 | func newDashboardsUIPlugin(t *testing.T) *uiv1.UIPlugin { 56 | db := &uiv1.UIPlugin{ 57 | ObjectMeta: metav1.ObjectMeta{ 58 | Name: "dashboards", 59 | }, 60 | Spec: uiv1.UIPluginSpec{ 61 | Type: uiv1.UIPluginType("Dashboards"), 62 | }, 63 | } 64 | f.CleanUp(t, func() { 65 | f.K8sClient.Delete(context.Background(), db) 66 | waitForDBUIPluginDeletion(db) 67 | }) 68 | 69 | return db 70 | } 71 | 72 | func waitForDBUIPluginDeletion(db *uiv1.UIPlugin) error { 73 | return wait.PollUntilContextTimeout(context.Background(), 5*time.Second, wait.ForeverTestTimeout, true, func(ctx context.Context) (done bool, err error) { 74 | err = f.K8sClient.Get(context.Background(), 75 | client.ObjectKey{Name: db.Name}, 76 | db) 77 | return errors.IsNotFound(err), nil 78 | }) 79 | } 80 | -------------------------------------------------------------------------------- /deploy/package-operator/dependencies/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | namespace: observability-operator 4 | commonAnnotations: 5 | package-operator.run/phase: dependencies 6 | resources: 7 | - ../../dependencies/ 8 | patches: 9 | - patch: |- 10 | apiVersion: v1 11 | kind: ServiceAccount 12 | metadata: 13 | name: obo-prometheus-operator-admission-webhook 14 | namespace: operators 15 | labels: 16 | app.kubernetes.io/name: prometheus-operator-admission-webhook 17 | $patch: delete 18 | - patch: |- 19 | apiVersion: rbac.authorization.k8s.io/v1 20 | kind: ClusterRole 21 | metadata: 22 | name: obo-prometheus-operator-admission-webhook 23 | namespace: operators 24 | labels: 25 | app.kubernetes.io/name: prometheus-operator-admission-webhook 26 | $patch: delete 27 | - patch: |- 28 | apiVersion: rbac.authorization.k8s.io/v1 29 | kind: ClusterRoleBinding 30 | metadata: 31 | name: obo-prometheus-operator-admission-webhook 32 | namespace: operators 33 | labels: 34 | app.kubernetes.io/name: prometheus-operator-admission-webhook 35 | $patch: delete 36 | - patch: |- 37 | apiVersion: v1 38 | kind: Service 39 | metadata: 40 | name: obo-prometheus-operator-admission-webhook 41 | namespace: operators 42 | labels: 43 | app.kubernetes.io/name: prometheus-operator-admission-webhook 44 | $patch: delete 45 | - patch: |- 46 | apiVersion: apps/v1 47 | kind: Deployment 48 | metadata: 49 | name: obo-prometheus-operator-admission-webhook 50 | namespace: operators 51 | labels: 52 | app.kubernetes.io/name: prometheus-operator-admission-webhook 53 | $patch: delete 54 | - patch: |- 55 | apiVersion: admissionregistration.k8s.io/v1 56 | kind: ValidatingWebhookConfiguration 57 | metadata: 58 | name: alertmanagerconfigs.monitoring.rhobs 59 | namespace: operators 60 | labels: 61 | app.kubernetes.io/name: prometheus-operator-admission-webhook 62 | $patch: delete 63 | - patch: |- 64 | apiVersion: admissionregistration.k8s.io/v1 65 | kind: ValidatingWebhookConfiguration 66 | metadata: 67 | name: obo-prometheusrules.monitoring.rhobs 68 | namespace: operators 69 | labels: 70 | app.kubernetes.io/name: prometheus-operator-admission-webhook 71 | $patch: delete 72 | -------------------------------------------------------------------------------- /pkg/controllers/monitoring/monitoring-stack/testdata/no-tls: -------------------------------------------------------------------------------- 1 | 2 | - job_name: prometheus-self 3 | scheme: http 4 | tls_config: 5 | ca_file: "" 6 | server_name: "" 7 | relabel_configs: 8 | - action: keep 9 | source_labels: 10 | - __meta_kubernetes_service_label_app_kubernetes_io_name 11 | regex: ms-no-tls-prometheus 12 | - action: keep 13 | source_labels: 14 | - __meta_kubernetes_endpoint_port_name 15 | regex: web 16 | - source_labels: 17 | - __meta_kubernetes_namespace 18 | target_label: namespace 19 | - source_labels: 20 | - __meta_kubernetes_service_name 21 | target_label: service 22 | - source_labels: 23 | - __meta_kubernetes_pod_name 24 | target_label: pod 25 | - source_labels: 26 | - __meta_kubernetes_pod_container_name 27 | target_label: container 28 | - target_label: endpoint 29 | replacement: web 30 | kubernetes_sd_configs: 31 | - role: endpoints 32 | namespaces: 33 | names: 34 | - ns-no-tls 35 | - job_name: alertmanager-self 36 | scrape_interval: 30s 37 | scrape_timeout: 10s 38 | metrics_path: /metrics 39 | scheme: http 40 | tls_config: 41 | ca_file: "" 42 | server_name: "" 43 | relabel_configs: 44 | - source_labels: 45 | - __meta_kubernetes_service_label_app_kubernetes_io_name 46 | separator: ; 47 | regex: ms-no-tls-alertmanager 48 | replacement: $1 49 | action: keep 50 | - source_labels: [__meta_kubernetes_endpoint_port_name] 51 | separator: ; 52 | regex: web 53 | replacement: $1 54 | action: keep 55 | - source_labels: [__meta_kubernetes_namespace] 56 | separator: ; 57 | regex: (.*) 58 | target_label: namespace 59 | replacement: $1 60 | action: replace 61 | - source_labels: [__meta_kubernetes_service_name] 62 | separator: ; 63 | regex: (.*) 64 | target_label: service 65 | replacement: $1 66 | action: replace 67 | - source_labels: [__meta_kubernetes_pod_name] 68 | separator: ; 69 | regex: (.*) 70 | target_label: pod 71 | replacement: $1 72 | action: replace 73 | - source_labels: [__meta_kubernetes_pod_container_name] 74 | separator: ; 75 | regex: (.*) 76 | target_label: container 77 | replacement: $1 78 | action: replace 79 | - separator: ; 80 | regex: (.*) 81 | target_label: endpoint 82 | replacement: web 83 | action: replace 84 | kubernetes_sd_configs: 85 | - role: endpoints 86 | namespaces: 87 | names: 88 | - ns-no-tls -------------------------------------------------------------------------------- /deploy/perses/perses-operator-cluster-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | creationTimestamp: null 5 | name: perses-operator 6 | rules: 7 | - apiGroups: 8 | - apps 9 | resources: 10 | - deployments 11 | - statefulsets 12 | verbs: 13 | - create 14 | - delete 15 | - get 16 | - list 17 | - patch 18 | - update 19 | - watch 20 | - apiGroups: 21 | - "" 22 | resources: 23 | - events 24 | verbs: 25 | - create 26 | - patch 27 | - apiGroups: 28 | - "" 29 | resources: 30 | - services 31 | - configmaps 32 | - secrets 33 | verbs: 34 | - get 35 | - patch 36 | - update 37 | - create 38 | - delete 39 | - list 40 | - watch 41 | - apiGroups: 42 | - perses.dev 43 | resources: 44 | - perses 45 | verbs: 46 | - create 47 | - delete 48 | - get 49 | - list 50 | - patch 51 | - update 52 | - watch 53 | - apiGroups: 54 | - perses.dev 55 | resources: 56 | - perses/finalizers 57 | verbs: 58 | - update 59 | - apiGroups: 60 | - perses.dev 61 | resources: 62 | - perses/status 63 | verbs: 64 | - get 65 | - patch 66 | - update 67 | - apiGroups: 68 | - perses.dev 69 | resources: 70 | - persesdashboards 71 | verbs: 72 | - create 73 | - delete 74 | - get 75 | - list 76 | - patch 77 | - update 78 | - watch 79 | - apiGroups: 80 | - perses.dev 81 | resources: 82 | - persesdashboards/finalizers 83 | verbs: 84 | - update 85 | - apiGroups: 86 | - perses.dev 87 | resources: 88 | - persesdashboards/status 89 | verbs: 90 | - get 91 | - patch 92 | - update 93 | - apiGroups: 94 | - perses.dev 95 | resources: 96 | - persesdatasources 97 | verbs: 98 | - create 99 | - delete 100 | - get 101 | - list 102 | - patch 103 | - update 104 | - watch 105 | - apiGroups: 106 | - perses.dev 107 | resources: 108 | - persesdatasources/finalizers 109 | verbs: 110 | - update 111 | - apiGroups: 112 | - perses.dev 113 | resources: 114 | - persesdatasources/status 115 | verbs: 116 | - get 117 | - patch 118 | - update 119 | -------------------------------------------------------------------------------- /must-gather/README.md: -------------------------------------------------------------------------------- 1 | observability-operator must-gather 2 | ================= 3 | 4 | `observability-operator-must-gather` is a tool built on top of [OpenShift must-gather](https://github.com/openshift/must-gather) 5 | that expands its capabilities to gather Observability Operator information. 6 | 7 | **Note:** This image is only built for x86_64 architecture 8 | 9 | ### Usage 10 | To gather only Observability Operator information: 11 | ```sh 12 | oc adm must-gather --image=quay.io/rhobs/observability-operator:latest -- /usr/bin/gather 13 | ``` 14 | 15 | To gather default [OpenShift must-gather](https://github.com/openshift/must-gather) in addition to Observability Operator information: 16 | ```sh 17 | oc adm must-gather --image-stream=openshift/must-gather --image=quay.io/rhobs/observability-operator -- /usr/bin/gather 18 | ``` 19 | 20 | The command above will create a local directory with a dump of the Observability Operator state. 21 | 22 | You will get a dump of: 23 | - The observability-operator operator deployment 24 | - All observability-operator operant pods 25 | - Alertmanager and Prometheus status for all stacks 26 | 27 | In order to get data about other parts of the cluster (not specific to observability-operator ) you should 28 | run `oc adm must-gather` (without passing a custom image). Run `oc adm must-gather -h` to see more options. 29 | 30 | Example must-gather for observability-operator output: 31 | ``` 32 | monitoring 33 | └── observability-operator 34 | ├── [namespace name] 35 | │   └── [monitoring stack name] 36 | │   ├── alertmanager 37 | │   │   ├── status.json 38 | │   │   └── status.stderr 39 | │   └── prometheus 40 | │   ├── alertmanagers.json 41 | │   ├── alertmanagers.stderr 42 | │   ├── prometheus-[monitoring stack name]-[replica] 43 | │   │   ├── status 44 | │   │   │   ├── runtimeinfo.json 45 | │   │   │   ├── runtimeinfo.stderr 46 | │   │   │   ├── tsdb.json 47 | │   │   │   └── tsdb.stderr 48 | │   │   ├── targets-active.json 49 | │   │   ├── targets-active.stderr 50 | │   │   ├── targets?state=active.json 51 | │   │   └── targets?state=active.stderr 52 | │   ├── rules.json 53 | │   ├── rules.stderr 54 | │   └── status 55 | │   ├── config.json 56 | │   ├── config.stderr 57 | │   ├── flags.json 58 | │   └── flags.stderr 59 | ├── operants.yaml 60 | └── operator.yaml 61 | ``` 62 | -------------------------------------------------------------------------------- /DEPENDENCY_CONSTRAINTS.md: -------------------------------------------------------------------------------- 1 | # Dependency Constraints 2 | 3 | This document describes the constraints and pinned dependencies in this project. 4 | 5 | ## Pinned Dependencies 6 | 7 | ### OpenShift API (`github.com/openshift/api`) 8 | 9 | **Current Version:** `v0.0.0-20240404200104-96ed2d49b255` 10 | 11 | **Why Pinned:** The observability-operator needs to support both OpenShift console API `v1` and `v1alpha1` for backward compatibility: 12 | - OpenShift >= 4.17 uses `console/v1` API 13 | - OpenShift < 4.17 uses `console/v1alpha1` API 14 | 15 | Newer versions of `github.com/openshift/api` (after April 2024) have removed the `console/v1alpha1` API, breaking compatibility with older OpenShift versions. 16 | 17 | **Impact:** The codebase maintains dual API support with runtime version detection to create the appropriate Console Plugin resources. 18 | 19 | **Files Affected:** 20 | - `pkg/controllers/uiplugin/controller.go` - Version detection logic 21 | - `pkg/controllers/uiplugin/components.go` - Dual Console Plugin creation 22 | - `pkg/controllers/uiplugin/plugin_info_builder.go` - Plugin info structure with LegacyProxies 23 | - `pkg/operator/scheme.go` - API scheme registration 24 | - All uiplugin package files using `osv1alpha1` imports 25 | 26 | ## Safe to Update Dependencies 27 | 28 | The following dependencies can be safely updated: 29 | - Kubernetes API packages (`k8s.io/*`) 30 | - Controller Runtime (`sigs.k8s.io/controller-runtime`) 31 | - Prometheus packages (`github.com/prometheus/*`) 32 | - RHOBS Prometheus Operator (`github.com/rhobs/obo-prometheus-operator`) 33 | - Go standard library extensions (`golang.org/x/*`) 34 | - Utility libraries (`github.com/go-logr/logr`, `github.com/google/go-cmp`, etc.) 35 | 36 | ## Updating Dependencies 37 | 38 | To update dependencies safely: 39 | 40 | 1. **Individual updates:** Update specific packages excluding openshift/api: 41 | ```bash 42 | go get -u k8s.io/api k8s.io/apimachinery k8s.io/client-go 43 | go get -u sigs.k8s.io/controller-runtime 44 | go get -u github.com/rhobs/obo-prometheus-operator@v0.83.0-rhobs1 45 | ``` 46 | 47 | 2. **Avoid bulk updates:** Don't use `go get -u ./...` as it will try to update openshift/api 48 | 49 | 3. **Always test:** Run `make test-unit` and `make build` after each update 50 | 51 | 4. **Fix go.sum:** Run `go mod tidy` after updates to fix missing entries 52 | 53 | ## Future Considerations 54 | 55 | When OpenShift < 4.17 support is no longer needed: 56 | 1. Remove `console/v1alpha1` API usage 57 | 2. Unpin `github.com/openshift/api` 58 | 3. Remove dual API support code 59 | 4. Update this document -------------------------------------------------------------------------------- /docs/user-guides/thanos_querier.md: -------------------------------------------------------------------------------- 1 | # Using ThanosQuerier to federate MonitoringStacks 2 | 3 | `ThanosQuerier` can be used to query data from a set of `MonitoringStack` resources. 4 | 5 | ## Architecture 6 | 7 | A `ThanosQuerier` resource selects a set of `MonitoringStack` resources using 8 | label and namespace selectors. 9 | 10 | Under the hood, the observability operator creates a Kubernetes Deployment 11 | which is configured to connect to the Thanos sidecars running in the Prometheus 12 | pods. 13 | 14 | ## Tutorial 15 | 16 | ### Pre-requisites 17 | 18 | * Observability operator installed and running. 19 | * Cluster admin permissions. 20 | 21 | ### Installation 22 | 23 | We are going to create 24 | * 1 `MonitoringStack` running in the `project-a` namespace which monitors API services running in `project-a` and `project-b`. 25 | * 1 `MonitoringStack` running in the `project-c` namespace which monitors the backend service running in `project-c`. 26 | * 1 `ThanosQuerier` running in the `project-d` namespace which federates data from the 2 `MonitoringStack`s. 27 | * Deployments running in `project-a`, `project-b` and `project-c` namespaces which represent a multi-service application. 28 | * Load generation Deployment running in the `project-d` namespace. 29 | 30 | To install the environment, run: 31 | 32 | ```shell 33 | kubectl apply -f docs/user-guides/thanos_querier/install 34 | ``` 35 | 36 | To verify the installation, run: 37 | 38 | ```shell 39 | kubectl wait --for=condition=Available -A --timeout=10s -l app.kubernetes.io/part-of=monitoring monitoringstacks 40 | kubectl wait --for=condition=Available -A --timeout=10s -l app.kubernetes.io/managed-by=observability-operator deployments 41 | kubectl wait --for=condition=Available -A --timeout=10s -l app.kubernetes.io/part-of=myapp deployments 42 | ``` 43 | 44 | To access the Thanos Query UI, run: 45 | 46 | ```shell 47 | kubectl port-forward -n project-c svc/thanos-querier-example 10902:localhost:10902 48 | ``` 49 | 50 | Then open `http://localhost:10902` in your browser. You can check that all Prometheus instances are present in the Stores page and that metrics are showing up. 51 | 52 | ### Configuring a Perses dashboard 53 | 54 | To install the example Perses dashboard (+datasource), run: 55 | 56 | ```shell 57 | kubectl apply -f docs/user-guides/thanos_querier/console 58 | ``` 59 | 60 | To verify the installation, run: 61 | 62 | ``` 63 | kubectl wait --for=condition=Available uiplugins monitoring 64 | ``` 65 | 66 | You should now be able to access the custom dashboard under `Observe > Dashboards (Perses)` in the `project-d` namespace. 67 | -------------------------------------------------------------------------------- /pkg/controllers/monitoring/monitoring-stack/testdata/tls: -------------------------------------------------------------------------------- 1 | 2 | - job_name: prometheus-self 3 | scheme: https 4 | tls_config: 5 | ca_file: "/etc/prometheus/secrets/prometheus-tls/ca.pem" 6 | server_name: "ms-with-tls-prometheus" 7 | relabel_configs: 8 | - action: keep 9 | source_labels: 10 | - __meta_kubernetes_service_label_app_kubernetes_io_name 11 | regex: ms-with-tls-prometheus 12 | - action: keep 13 | source_labels: 14 | - __meta_kubernetes_endpoint_port_name 15 | regex: web 16 | - source_labels: 17 | - __meta_kubernetes_namespace 18 | target_label: namespace 19 | - source_labels: 20 | - __meta_kubernetes_service_name 21 | target_label: service 22 | - source_labels: 23 | - __meta_kubernetes_pod_name 24 | target_label: pod 25 | - source_labels: 26 | - __meta_kubernetes_pod_container_name 27 | target_label: container 28 | - target_label: endpoint 29 | replacement: web 30 | kubernetes_sd_configs: 31 | - role: endpoints 32 | namespaces: 33 | names: 34 | - ns-with-tls 35 | - job_name: alertmanager-self 36 | scrape_interval: 30s 37 | scrape_timeout: 10s 38 | metrics_path: /metrics 39 | scheme: https 40 | tls_config: 41 | ca_file: "/etc/prometheus/secrets/alertmanager-tls/ca.pem" 42 | server_name: "ms-with-tls-alertmanager" 43 | relabel_configs: 44 | - source_labels: 45 | - __meta_kubernetes_service_label_app_kubernetes_io_name 46 | separator: ; 47 | regex: ms-with-tls-alertmanager 48 | replacement: $1 49 | action: keep 50 | - source_labels: [__meta_kubernetes_endpoint_port_name] 51 | separator: ; 52 | regex: web 53 | replacement: $1 54 | action: keep 55 | - source_labels: [__meta_kubernetes_namespace] 56 | separator: ; 57 | regex: (.*) 58 | target_label: namespace 59 | replacement: $1 60 | action: replace 61 | - source_labels: [__meta_kubernetes_service_name] 62 | separator: ; 63 | regex: (.*) 64 | target_label: service 65 | replacement: $1 66 | action: replace 67 | - source_labels: [__meta_kubernetes_pod_name] 68 | separator: ; 69 | regex: (.*) 70 | target_label: pod 71 | replacement: $1 72 | action: replace 73 | - source_labels: [__meta_kubernetes_pod_container_name] 74 | separator: ; 75 | regex: (.*) 76 | target_label: container 77 | replacement: $1 78 | action: replace 79 | - separator: ; 80 | regex: (.*) 81 | target_label: endpoint 82 | replacement: web 83 | action: replace 84 | kubernetes_sd_configs: 85 | - role: endpoints 86 | namespaces: 87 | names: 88 | - ns-with-tls -------------------------------------------------------------------------------- /docs/design/assets/release.uml: -------------------------------------------------------------------------------- 1 | @startuml 2 | 'NOTE: use http://sujoyu.github.io/plantuml-previewer to render the Sequence diagram 3 | 4 | title Release Workflow for 1.2.3 release 5 | 6 | autonumber 7 | actor "Release\n Manager" as RM order 10 8 | actor Reviewer order 20 9 | database Upstream order 30 10 | control CI order 40 11 | participant Publish order 50 12 | database Quay order 60 13 | 14 | activate RM 15 | RM -> RM: make initiate-release 16 | RM -> Upstream: open PR ""chore(release): 1.2.3"" \n against ""main"" branch 17 | deactivate RM 18 | 19 | activate Upstream 20 | Upstream -> CI: run checks, e2e 21 | CI -[#green]> Upstream: pass 22 | deactivate Upstream 23 | 24 | Reviewer -> Upstream: approve &\n merge to main branch 25 | 26 | == candidate release prep == 27 | activate CI 28 | Upstream -> CI: release workflow (release.yaml) 29 | CI -> CI: run e2e 30 | CI -> CI: generate release notes 31 | CI -> CI: create GH release notes 32 | CI -> Upstream: git tag ""v1.2.3"" and \ngit push tag ""v1.2.3"" 33 | CI -> Upstream: publish pre-release to GH 34 | deactivate CI 35 | == candidate release prep == 36 | 37 | == candidate release == 38 | Upstream -> CI: start candidate release (olm-candidate.yaml) 39 | CI -> Publish: candidate release (olm-publish.yaml) 40 | CI -> Publish: run olm-publish (channels: development, candidate) 41 | activate Publish #FFBBBB 42 | Publish -> Publish: login to quay 43 | Publish -> Publish: merge olm-catalog branch 44 | Publish -> Publish: build operator image 45 | Publish -[#red]> Quay: push operator image 46 | Publish -> Publish: build bundle image 47 | Publish -[#red]> Quay: push bundle image 48 | Publish -> Publish: make catalog-image 49 | 50 | activate Publish #lightcoral 51 | Publish -> Publish: opm render to olm/observability-operator-index/index.yaml 52 | Publish -> Publish: olm/update-channels.sh 53 | deactivate Publish 54 | Publish -> Publish: git commit changes to bundle and olm dir 55 | Publish -[#red]> Quay: publish catalog-image 56 | Publish -> Upstream: push catalog changes to olm-catalog branch 57 | deactivate Publish 58 | == candidate release == 59 | 60 | == stable release == 61 | RM -> Upstream: remove pre-release from release 62 | Upstream -> CI: stable release (olm-stable.yaml) 63 | CI -> Publish: run olm-publish (channels: development, candidate, stable) 64 | activate Publish #DarkSalmon 65 | Publish -> Publish: olm-publish workflow 66 | ...same release worflow... 67 | Publish -> Quay: publish catalog-image 68 | Publish -> Upstream: push catalog changes to olm-catalog branch 69 | deactivate Publish 70 | == stable release == 71 | 72 | @enduml 73 | -------------------------------------------------------------------------------- /pkg/apis/observability/v1alpha1/tracing.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | // TracingSpec defines the desired state of the tracing capability. 4 | // +kubebuilder:validation:XValidation:rule="(!has(self.enabled) || !self.enabled) || [has(self.storage.objectStorage.s3), has(self.storage.objectStorage.s3STS), has(self.storage.objectStorage.s3CCO), has(self.storage.objectStorage.azure), has(self.storage.objectStorage.azureWIF), has(self.storage.objectStorage.gcs), has(self.storage.objectStorage.gcsWIF)].filter(x, x).size() > 0",message="Storage configuration is required when tracing is enabled" 5 | type TracingSpec struct { 6 | CommonCapabilitiesSpec `json:",inline"` 7 | 8 | // Storage defines the storage for the tracing capability 9 | Storage TracingStorageSpec `json:"storage,omitempty"` 10 | } 11 | 12 | // TracingStorageSpec defines the storage for tracing capability. 13 | type TracingStorageSpec struct { 14 | // ObjectStorageSpec defines the object storage configuration for tracing. 15 | // +optional 16 | // +kubebuilder:validation:Optional 17 | // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Object storage config" 18 | ObjectStorageSpec TracingObjectStorageSpec `json:"objectStorage,omitempty"` 19 | } 20 | 21 | // TracingObjectStorageSpec defines the object storage for the tracing capability. 22 | // +kubebuilder:validation:XValidation:rule="[has(self.s3), has(self.s3STS), has(self.s3CCO), has(self.azure), has(self.azureWIF), has(self.gcs), has(self.gcsWIF)].filter(x, x).size() <= 1",message="Only one or zero storage configurations can be specified" 23 | type TracingObjectStorageSpec struct { 24 | // S3 defines the S3 object storage configuration. 25 | S3 *S3Spec `json:"s3,omitempty"` 26 | // S3STS defines the S3 object storage configuration using short-lived credentials. 27 | S3STS *S3STSpec `json:"s3STS,omitempty"` 28 | // S3CCO defines the S3 object storage configuration using CCO. 29 | S3CCO *S3CCOSpec `json:"s3CCO,omitempty"` 30 | 31 | // Azure defines the Azure Blob Storage configuration. 32 | Azure *AzureSpec `json:"azure,omitempty"` 33 | // AzureWIF defines the Azure Blob Storage configuration using a Workload Identity Federation. 34 | AzureWIF *AzureWIFSpec `json:"azureWIF,omitempty"` 35 | 36 | // GCS defines the Google Cloud Storage configuration. 37 | GCS *GCSSpec `json:"gcs,omitempty"` 38 | // GCSSToken defines the Google Cloud Storage configuration using short-lived tokens. 39 | GCSSTSSpec *GCSWIFSpec `json:"gcsWIF,omitempty"` 40 | 41 | // TLS configuration for reaching the object storage endpoint. 42 | // 43 | // +optional 44 | // +kubebuilder:validation:Optional 45 | // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="TLS Config" 46 | TLS *TLSSpec `json:"tls,omitempty"` 47 | } 48 | -------------------------------------------------------------------------------- /.github/olm-publish/action.yaml: -------------------------------------------------------------------------------- 1 | name: 'Publish to OLM' 2 | description: 'Publishes the operator to OLM' 3 | inputs: 4 | quay_login: 5 | description: "Quay login" 6 | required: true 7 | quay_token: 8 | description: "Quay token" 9 | required: true 10 | runs: 11 | using: composite 12 | steps: 13 | - name: Check Go version 14 | id: go-version 15 | shell: bash 16 | run: | 17 | go version 18 | - name: Setup Go environment 19 | uses: actions/setup-go@v5 20 | if: steps.go-version.outcome == 'failure' 21 | with: 22 | go-version-file: 'go.mod' 23 | check-latest: true 24 | 25 | - name: Install tools 26 | uses: ./.github/tools-cache 27 | 28 | - name: Registry Login 29 | uses: docker/login-action@v2 30 | with: 31 | registry: quay.io 32 | username: ${{ inputs.quay_login }} 33 | password: ${{ inputs.quay_token }} 34 | 35 | - uses: actions/checkout@v3 36 | with: 37 | fetch-depth: 0 38 | 39 | - name: Create new integration branch 40 | shell: bash 41 | # Creating a branch here in order to avoid commiting to our local main 42 | # branch further down in this action. In some make targets we use main's sha 43 | # and by creating a branch here we can just assume local main is the same as 44 | # remote main. 45 | run: git checkout -b olm-publish-action-scratch 46 | 47 | - name: Git merge olm-catalog branch 48 | shell: bash 49 | run: | 50 | git config user.name rhobs-release-bot 51 | git config user.email release-bot@monitoring.rhobs.io 52 | 53 | # NOTE: uses strategy-option=ours as bundle created for a release using 54 | # make initiate-release conflicts with the previous bundle created for 55 | # development branch, so the version in main takes precedence over the 56 | # one in olm-catalog branch 57 | git merge -Xours origin/olm-catalog -m "ci: merge olm-catalog branch" 58 | 59 | - name: Build Operator Image 60 | shell: bash 61 | run: make operator-image 62 | 63 | - name: Publish Operator 64 | shell: bash 65 | run: make operator-push 66 | 67 | - name: Build Bundle Image 68 | shell: bash 69 | run: make bundle-image 70 | 71 | - name: Publish Bundle 72 | shell: bash 73 | run: make bundle-push 74 | 75 | 76 | - name: Build Catalog Image 77 | shell: bash 78 | run: | 79 | make catalog-image 80 | 81 | # NOTE: add a commit only if the catalog can built sucessfully 82 | git add bundle/ olm/ 83 | git commit -m "ci(bot): update catalog image" 84 | 85 | - name: Publish catalog image 86 | shell: bash 87 | run: | 88 | make catalog-push 89 | 90 | # update olm-catalog only if catalog could be successfully published 91 | git push origin HEAD:olm-catalog 92 | -------------------------------------------------------------------------------- /hack/dev-deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd .. 4 | 5 | # Define ANSI color codes 6 | RED='\033[0;31m' 7 | GREEN='\033[0;32m' 8 | ENDCOLOR='\033[0m' 9 | 10 | # Get the current date and time in 'monDD-HHMM' lowercase format 11 | # For example: sep12-1216 12 | TIMESTAMP=$(date +'%b%d-%H%M' | tr '[:upper:]' '[:lower:]') 13 | 14 | # Replace IMG_BASE with the image your image registry 15 | IMG_BASE="${IMG_BASE:-"quay.io/jezhu/observability-operator"}" 16 | VERSION="${VERSION:-1.0.0-dev-${TIMESTAMP}}" 17 | 18 | print_title() { 19 | echo -e "\n${GREEN} =============================================== ${ENDCOLOR}\n" 20 | echo -e "${GREEN} $1 ${ENDCOLOR}" 21 | echo -e "\n${GREEN} =============================================== ${ENDCOLOR}\n" 22 | } 23 | 24 | # Add cluster-monitoring label 25 | oc label namespace observability-operator openshift.io/cluster-monitoring="true" 26 | 27 | # Enabled UIPlugins >> openshift.enabled=true 28 | perl -pi -e 's/(flag\.BoolVar\(&openShiftEnabled,\s*"openshift\.enabled",\s*)false/$1true/' ./cmd/operator/main.go 29 | 30 | # Build Bundle 31 | print_title "Build Bundle: make operator-image bundle-image operator-push bundle-push" 32 | GOOS=linux GOARCH=amd64 ARCH=amd64 make operator-image bundle-image operator-push bundle-push \ 33 | IMG_BASE="${IMG_BASE}" \ 34 | VERSION="${VERSION}" 35 | 36 | # Build Bundle - if make command above fails to build 37 | if ! make; then 38 | echo -e "\n${RED}Error: 'make operator-image bundle-image operator-push bundle-push...' command failed.${ENDCOLOR}\n" >&2 39 | exit 1 40 | fi 41 | 42 | # Delete Previous CatalogSource, Subscription, and ClusterServiceVersion 43 | print_title "Delete Previous ClusterServiceVersion and Subscription" 44 | # oc project openshift-operators 45 | CAT_NAME=$(oc get catalogsource | grep 'observability-operator' | awk '{print $1}') && oc delete catalogsource "${CAT_NAME}" 46 | SUB_NAME=$(oc get subscriptions | grep 'observability-operator' | awk '{print $1}') && oc delete subscriptions "${SUB_NAME}" 47 | CSV_NAME=$(oc get clusterserviceversion | grep 'observability-operator' | awk '{print $1}') && oc delete clusterserviceversion "${CSV_NAME}" 48 | 49 | # delete uiplugin if hanging by unblock finalizer 50 | kubectl patch uiplugin monitoring --type='merge' -p='{"metadata":{"finalizers":null}}' 51 | 52 | # OR Delete the whole operator 53 | operator-sdk cleanup observability-operator -n openshift-operators 54 | 55 | # Run the bundle using the fully qualified image tag. 56 | print_title "Run Bundle: operator-sdk run bundle" 57 | operator-sdk run bundle \ 58 | "${IMG_BASE}-bundle:${VERSION}" \ 59 | --install-mode AllNamespaces \ 60 | --namespace openshift-operators \ 61 | --security-context-config restricted 62 | 63 | # Revert to Original State and Disable UIPlugins >> openshift.enabled=false 64 | perl -pi -e 's/(flag\.BoolVar\(&openShiftEnabled,\s*"openshift\.enabled",\s*)true/$1false/' ./cmd/operator/main.go 65 | -------------------------------------------------------------------------------- /.github/workflows/pr-checks.yaml: -------------------------------------------------------------------------------- 1 | name: Pre-submit tests 2 | on: 3 | pull_request: 4 | 5 | jobs: 6 | commit-lint: 7 | if: (github.actor != 'dependabot[bot]') && ! startsWith(github.head_ref, 'dependabot/') 8 | name: Lint the commit messages 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v6 12 | with: 13 | fetch-depth: 0 14 | - uses: wagoid/commitlint-github-action@v6 15 | 16 | github-actions-yaml-lint: 17 | name: Lint Github Action 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v6 21 | - uses: reviewdog/action-actionlint@v1 22 | 23 | lint: 24 | name: Lint code 25 | runs-on: ubuntu-latest 26 | steps: 27 | - uses: actions/checkout@v6 28 | 29 | - uses: actions/setup-go@v6 30 | with: 31 | go-version-file: 'go.mod' 32 | check-latest: true 33 | 34 | - name: Use tools cache 35 | uses: ./.github/tools-cache 36 | 37 | - name: Run unit tests 38 | run: make test-unit 39 | 40 | - name: Lint Go code 41 | run: make lint-golang 42 | 43 | - name: Lint Shell scripts 44 | run: make lint-shell 45 | 46 | - name: Check for linter changes 47 | run: git diff --exit-code 48 | 49 | generate: 50 | name: Verify generated code 51 | runs-on: ubuntu-latest 52 | steps: 53 | - uses: actions/checkout@v6 54 | 55 | - uses: actions/setup-go@v6 56 | with: 57 | go-version-file: 'go.mod' 58 | check-latest: true 59 | 60 | - run: make --always-make generate bundle && git diff --exit-code 61 | 62 | tool-versions: 63 | runs-on: ubuntu-latest 64 | name: Validate tools cache 65 | steps: 66 | - uses: actions/checkout@v6 67 | 68 | - uses: actions/setup-go@v6 69 | with: 70 | go-version-file: 'go.mod' 71 | check-latest: true 72 | 73 | - name: Install tools 74 | uses: ./.github/tools-cache 75 | 76 | - run: make --always-make tools && git diff --exit-code 77 | 78 | build-bundle-image: 79 | name: Build bundle image 80 | runs-on: ubuntu-latest 81 | steps: 82 | - uses: actions/checkout@v6 83 | 84 | - name: Setup Go environment 85 | uses: actions/setup-go@v6 86 | with: 87 | go-version-file: 'go.mod' 88 | check-latest: true 89 | cache: true 90 | 91 | - name: Install tools 92 | uses: ./.github/tools-cache 93 | 94 | - name: Build Bundle Image 95 | run: make bundle-image 96 | 97 | e2e-tests-olm: 98 | name: Run end-to-end tests 99 | runs-on: ubuntu-latest 100 | steps: 101 | - uses: actions/checkout@v6 102 | 103 | - name: e2e tests through OLM 104 | uses: ./.github/e2e-tests-olm 105 | -------------------------------------------------------------------------------- /pkg/controllers/uiplugin/dashboards.go: -------------------------------------------------------------------------------- 1 | package uiplugin 2 | 3 | import ( 4 | osv1 "github.com/openshift/api/console/v1" 5 | osv1alpha1 "github.com/openshift/api/console/v1alpha1" 6 | corev1 "k8s.io/api/core/v1" 7 | rbacv1 "k8s.io/api/rbac/v1" 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | 10 | uiv1alpha1 "github.com/rhobs/observability-operator/pkg/apis/uiplugin/v1alpha1" 11 | ) 12 | 13 | func createDashboardsPluginInfo(plugin *uiv1alpha1.UIPlugin, namespace, name, image string) (*UIPluginInfo, error) { 14 | pluginName := "observability-ui-" + name 15 | readerRoleName := plugin.Name + "-datasource-reader" 16 | datasourcesNamespace := "openshift-config-managed" 17 | 18 | return &UIPluginInfo{ 19 | Image: image, 20 | Name: pluginName, 21 | ConsoleName: pluginTypeToConsoleName[plugin.Spec.Type], 22 | DisplayName: "Console Enhanced Dashboards", 23 | ResourceNamespace: namespace, 24 | LegacyProxies: []osv1alpha1.ConsolePluginProxy{ 25 | { 26 | Type: osv1alpha1.ProxyTypeService, 27 | Alias: "backend", 28 | Authorize: true, 29 | Service: osv1alpha1.ConsolePluginProxyServiceConfig{ 30 | Name: pluginName, 31 | Namespace: namespace, 32 | Port: port, 33 | }, 34 | }, 35 | }, 36 | Proxies: []osv1.ConsolePluginProxy{ 37 | { 38 | Alias: "backend", 39 | Authorization: "UserToken", 40 | Endpoint: osv1.ConsolePluginProxyEndpoint{ 41 | Type: osv1.ProxyTypeService, 42 | Service: &osv1.ConsolePluginProxyServiceConfig{ 43 | Name: pluginName, 44 | Namespace: namespace, 45 | Port: port, 46 | }, 47 | }, 48 | }, 49 | }, 50 | Role: &rbacv1.Role{ 51 | TypeMeta: metav1.TypeMeta{ 52 | APIVersion: rbacv1.SchemeGroupVersion.String(), 53 | Kind: "Role", 54 | }, 55 | ObjectMeta: metav1.ObjectMeta{ 56 | Name: readerRoleName, 57 | Namespace: datasourcesNamespace, 58 | }, 59 | Rules: []rbacv1.PolicyRule{ 60 | { 61 | APIGroups: []string{""}, 62 | Resources: []string{"configmaps"}, 63 | Verbs: []string{"get", "list", "watch"}, 64 | }, 65 | }, 66 | }, 67 | RoleBinding: &rbacv1.RoleBinding{ 68 | TypeMeta: metav1.TypeMeta{ 69 | APIVersion: rbacv1.SchemeGroupVersion.String(), 70 | Kind: "RoleBinding", 71 | }, 72 | ObjectMeta: metav1.ObjectMeta{ 73 | Name: pluginName + "-rolebinding", 74 | Namespace: datasourcesNamespace, 75 | }, 76 | Subjects: []rbacv1.Subject{ 77 | { 78 | APIGroup: corev1.SchemeGroupVersion.Group, 79 | Kind: "ServiceAccount", 80 | Name: pluginName + serviceAccountSuffix, 81 | Namespace: namespace, 82 | }, 83 | }, 84 | RoleRef: rbacv1.RoleRef{ 85 | APIGroup: rbacv1.SchemeGroupVersion.Group, 86 | Kind: "Role", 87 | Name: readerRoleName, 88 | }, 89 | }, 90 | }, nil 91 | } 92 | -------------------------------------------------------------------------------- /docs/user-guides/perses-dashboards/dashboard/02-dashboard.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: perses.dev/v1alpha1 2 | kind: PersesDashboard 3 | metadata: 4 | name: cluster-overview 5 | namespace: perses-example 6 | spec: 7 | display: 8 | name: Cluster Overview 9 | panels: 10 | "0_0": 11 | kind: Panel 12 | spec: 13 | display: 14 | name: CPU 15 | plugin: 16 | kind: StatChart 17 | spec: 18 | calculation: mean 19 | format: 20 | unit: percent-decimal 21 | thresholds: 22 | steps: 23 | - color: green 24 | value: 0 25 | - color: red 26 | value: 80 27 | queries: 28 | - kind: TimeSeriesQuery 29 | spec: 30 | plugin: 31 | kind: PrometheusTimeSeriesQuery 32 | spec: 33 | datasource: 34 | kind: PrometheusDatasource 35 | query: cluster:node_cpu:ratio_rate5m{} 36 | "0_1": 37 | kind: Panel 38 | spec: 39 | display: 40 | name: Memory 41 | plugin: 42 | kind: StatChart 43 | spec: 44 | calculation: mean 45 | format: 46 | unit: percent-decimal 47 | thresholds: 48 | steps: 49 | - color: green 50 | value: 0 51 | - color: red 52 | value: 80 53 | queries: 54 | - kind: TimeSeriesQuery 55 | spec: 56 | plugin: 57 | kind: PrometheusTimeSeriesQuery 58 | spec: 59 | datasource: 60 | kind: PrometheusDatasource 61 | query: 1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(node_memory_MemTotal_bytes{job="node-exporter"}) 62 | "0_2": 63 | kind: Panel 64 | spec: 65 | display: 66 | name: Welcome to Perses dashboards 67 | plugin: 68 | kind: Markdown 69 | spec: 70 | text: |- 71 | This is a very simple dashboard which displays the overall 72 | utilization of CPU and memory in the cluster based on the metrics 73 | collected by the in-cluster monitoring stack. 74 | 75 | Check the [Observability Operator](https://github.com/rhobs/observability-operator) repository for more examples. 76 | layouts: 77 | - kind: Grid 78 | spec: 79 | display: 80 | title: Overview 81 | collapse: 82 | open: true 83 | items: 84 | - x: 0 85 | "y": 1 86 | width: 8 87 | height: 6 88 | content: 89 | $ref: "#/spec/panels/0_2" 90 | - x: 8 91 | "y": 1 92 | width: 4 93 | height: 6 94 | content: 95 | $ref: "#/spec/panels/0_0" 96 | - x: 12 97 | "y": 1 98 | width: 4 99 | height: 6 100 | content: 101 | $ref: "#/spec/panels/0_1" 102 | duration: 1h 103 | -------------------------------------------------------------------------------- /docs/user-guides/thanos_querier/install/04-applications.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: api-service 7 | app.kubernetes.io/part-of: myapp 8 | name: api 9 | namespace: project-a 10 | spec: 11 | replicas: 2 12 | selector: 13 | matchLabels: 14 | app.kubernetes.io/name: api-service 15 | app.kubernetes.io/part-of: myapp 16 | template: 17 | metadata: 18 | labels: 19 | app.kubernetes.io/name: api-service 20 | app.kubernetes.io/part-of: myapp 21 | spec: 22 | containers: 23 | - image: ghcr.io/rhobs/prometheus-example-app:0.5.1 24 | imagePullPolicy: IfNotPresent 25 | name: app 26 | --- 27 | apiVersion: v1 28 | kind: Service 29 | metadata: 30 | labels: 31 | app.kubernetes.io/name: api-service 32 | app.kubernetes.io/part-of: myapp 33 | name: api 34 | namespace: project-a 35 | spec: 36 | ports: 37 | - port: 8080 38 | protocol: TCP 39 | targetPort: 8080 40 | name: web 41 | selector: 42 | app.kubernetes.io/name: api-service 43 | app.kubernetes.io/part-of: myapp 44 | type: ClusterIP 45 | --- 46 | apiVersion: apps/v1 47 | kind: Deployment 48 | metadata: 49 | labels: 50 | app.kubernetes.io/name: api-service 51 | app.kubernetes.io/part-of: myapp 52 | name: api 53 | namespace: project-b 54 | spec: 55 | replicas: 3 56 | selector: 57 | matchLabels: 58 | app.kubernetes.io/name: api-service 59 | app.kubernetes.io/part-of: myapp 60 | template: 61 | metadata: 62 | labels: 63 | app.kubernetes.io/name: api-service 64 | app.kubernetes.io/part-of: myapp 65 | spec: 66 | containers: 67 | - image: ghcr.io/rhobs/prometheus-example-app:0.5.1 68 | imagePullPolicy: IfNotPresent 69 | name: app 70 | --- 71 | apiVersion: v1 72 | kind: Service 73 | metadata: 74 | labels: 75 | app.kubernetes.io/name: api-service 76 | app.kubernetes.io/part-of: myapp 77 | name: api 78 | namespace: project-b 79 | spec: 80 | ports: 81 | - port: 8080 82 | protocol: TCP 83 | targetPort: 8080 84 | name: web 85 | selector: 86 | app.kubernetes.io/name: api-service 87 | app.kubernetes.io/part-of: myapp 88 | type: ClusterIP 89 | --- 90 | apiVersion: apps/v1 91 | kind: Deployment 92 | metadata: 93 | labels: 94 | app.kubernetes.io/name: backend 95 | app.kubernetes.io/part-of: myapp 96 | name: backend 97 | namespace: project-c 98 | spec: 99 | replicas: 3 100 | selector: 101 | matchLabels: 102 | app.kubernetes.io/name: backend 103 | app.kubernetes.io/part-of: myapp 104 | template: 105 | metadata: 106 | labels: 107 | app.kubernetes.io/name: backend 108 | app.kubernetes.io/part-of: myapp 109 | spec: 110 | containers: 111 | - image: ghcr.io/rhobs/prometheus-example-app:0.5.1 112 | imagePullPolicy: IfNotPresent 113 | name: backend 114 | --- 115 | apiVersion: v1 116 | kind: Service 117 | metadata: 118 | labels: 119 | app.kubernetes.io/name: backend 120 | app.kubernetes.io/part-of: myapp 121 | name: backend 122 | namespace: project-c 123 | spec: 124 | ports: 125 | - port: 8080 126 | protocol: TCP 127 | targetPort: 8080 128 | name: web 129 | selector: 130 | app.kubernetes.io/name: backend 131 | app.kubernetes.io/part-of: myapp 132 | type: ClusterIP 133 | -------------------------------------------------------------------------------- /pkg/controllers/monitoring/monitoring-stack/components_test.go: -------------------------------------------------------------------------------- 1 | package monitoringstack 2 | 3 | import ( 4 | "testing" 5 | 6 | monv1 "github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring/v1" 7 | v1 "github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring/v1" 8 | "gotest.tools/v3/assert" 9 | "gotest.tools/v3/golden" 10 | corev1 "k8s.io/api/core/v1" 11 | "k8s.io/apimachinery/pkg/api/resource" 12 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 13 | 14 | stack "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1" 15 | ) 16 | 17 | func TestStorageSpec(t *testing.T) { 18 | validPVCSpec := &corev1.PersistentVolumeClaimSpec{ 19 | AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, 20 | Resources: corev1.VolumeResourceRequirements{ 21 | Requests: corev1.ResourceList{ 22 | corev1.ResourceStorage: resource.MustParse("200Mi"), 23 | }, 24 | }, 25 | } 26 | 27 | tt := []struct { 28 | pvc *corev1.PersistentVolumeClaimSpec 29 | expected *monv1.StorageSpec 30 | }{ 31 | {pvc: nil, expected: nil}, 32 | {pvc: &corev1.PersistentVolumeClaimSpec{}, expected: nil}, 33 | { 34 | pvc: validPVCSpec, 35 | expected: &monv1.StorageSpec{ 36 | VolumeClaimTemplate: v1.EmbeddedPersistentVolumeClaim{ 37 | Spec: *validPVCSpec, 38 | }, 39 | }, 40 | }, 41 | } 42 | 43 | for _, tc := range tt { 44 | actual := storageForPVC(tc.pvc) 45 | assert.DeepEqual(t, tc.expected, actual) 46 | } 47 | } 48 | 49 | func TestNewAdditionalScrapeConfigsSecret(t *testing.T) { 50 | for _, tc := range []struct { 51 | name string 52 | spec stack.MonitoringStackSpec 53 | goldenFile string 54 | }{ 55 | { 56 | name: "no-tls", 57 | spec: stack.MonitoringStackSpec{ 58 | PrometheusConfig: &stack.PrometheusConfig{}, 59 | AlertmanagerConfig: stack.AlertmanagerConfig{}, 60 | }, 61 | goldenFile: "no-tls", 62 | }, 63 | { 64 | name: "with-tls", 65 | spec: stack.MonitoringStackSpec{ 66 | PrometheusConfig: &stack.PrometheusConfig{ 67 | WebTLSConfig: &stack.WebTLSConfig{ 68 | PrivateKey: stack.SecretKeySelector{ 69 | Name: "prometheus-tls", 70 | Key: "key.pem", 71 | }, 72 | Certificate: stack.SecretKeySelector{ 73 | Name: "prometheus-tls", 74 | Key: "cert.pem", 75 | }, 76 | CertificateAuthority: stack.SecretKeySelector{ 77 | Name: "prometheus-tls", 78 | Key: "ca.pem", 79 | }, 80 | }, 81 | }, 82 | AlertmanagerConfig: stack.AlertmanagerConfig{ 83 | WebTLSConfig: &stack.WebTLSConfig{ 84 | PrivateKey: stack.SecretKeySelector{ 85 | Name: "alertmanager-tls", 86 | Key: "key.pem", 87 | }, 88 | Certificate: stack.SecretKeySelector{ 89 | Name: "alertmanager-tls", 90 | Key: "cert.pem", 91 | }, 92 | CertificateAuthority: stack.SecretKeySelector{ 93 | Name: "alertmanager-tls", 94 | Key: "ca.pem", 95 | }, 96 | }, 97 | }, 98 | }, 99 | goldenFile: "tls", 100 | }, 101 | } { 102 | t.Run(tc.name, func(t *testing.T) { 103 | ms := stack.MonitoringStack{ 104 | ObjectMeta: metav1.ObjectMeta{ 105 | Name: "ms-" + tc.name, 106 | Namespace: "ns-" + tc.name, 107 | }, 108 | Spec: tc.spec, 109 | } 110 | s := newAdditionalScrapeConfigsSecret(&ms, tc.name) 111 | assert.Equal(t, s.Name, tc.name) 112 | golden.Assert(t, s.StringData[AdditionalScrapeConfigsSelfScrapeKey], tc.goldenFile) 113 | }) 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # observability-operator 2 | 3 | The Observability Operator is a Kubernetes operator which enables the 4 | management of Monitoring/Alerting stacks through Kubernetes CRDs. Eventually it 5 | might also cover Logging and Tracing. 6 | 7 | The project relies heavily on the [controller-runtime](https://github.com/kubernetes-sigs/controller-runtime) library. 8 | 9 | ## Trying out the Operator 10 | 11 | Observability Operator requires Operator Lifecycle Manager (OLM) to be running 12 | in cluster. The easiest way is to use OpenShift where OLM is preinstalled. 13 | 14 | Once the operator is installed, you can follow-up with the [User Guides](docs/user-guides/README.md). 15 | 16 | ### OpenShift 17 | 18 | Add the Observability Operator Catalog as shown below. 19 | 20 | ``` 21 | kubectl apply -f hack/olm/catalog-src.yaml 22 | ``` 23 | This adds a new Catalog to the list of catalogs. Now, you should be able to use 24 | OLM Web interface to install/uninstall Observability Operator like any other 25 | operator. 26 | 27 | If you prefer CLI, applying the subscription as shown below will install the 28 | operator. 29 | 30 | ``` 31 | kubectl create -f ./hack/olm/subscription.yaml 32 | ``` 33 | 34 | 35 | #### Uninstall 36 | 37 | It is easier to use the web console to remove the installed operator. 38 | Instructions below removes all traces of what was setup in the previous step 39 | including removing the catalog. 40 | ``` 41 | oc delete -n operators csv \ 42 | -l operators.coreos.com/observability-operator.operators= 43 | 44 | oc delete -n openshift-operators \ 45 | installplan,subscriptions \ 46 | -l operators.coreos.com/observability-operator.operators= 47 | 48 | oc delete -f hack/olm 49 | 50 | oc delete crds "$(oc api-resources --api-group=monitoring.rhobs -o name)" 51 | 52 | ``` 53 | 54 | ### Kubernetes 55 | 56 | As mentioned above, Observability Operator requires Operator Lifecycle Manager 57 | (OLM) to be running in cluster, so installing OLM is the first step to getting 58 | the Observability Operator running on k8s. 59 | 60 | 61 | ``` 62 | operator-sdk olm install 63 | kubectl create -f ./hack/olm/k8s/catalog-src.yaml 64 | kubectl create -f ./hack/olm/k8s/subscription.yaml 65 | 66 | ``` 67 | **NOTE:** To install `operator-sdk`, you can make use of `make tools` which 68 | installs `operator-sdk` (along with other tools needed for development) 69 | to `tmp/bin` 70 | 71 | For more information, about running Observability Operator (ObO) on Kind, 72 | please refer to the [Developer Docs](./docs/developer.md). 73 | 74 | #### Uninstalling 75 | ``` 76 | kubectl delete -n operators csv \ 77 | -l operators.coreos.com/observability-operator.operators= 78 | 79 | kubectl delete -n operators \ 80 | installplan,subscriptions \ 81 | -l operators.coreos.com/observability-operator.operators= 82 | 83 | kubectl delete -f hack/olm/k8s 84 | 85 | kubectl delete crds "$(kubectl api-resources --api-group=monitoring.rhobs -o name)" 86 | 87 | ``` 88 | ## Development 89 | 90 | Please refer to [Developer Docs](./docs/developer.md) 91 | 92 | ## Meetings 93 | ___ 94 | - Weekly meeting: [Thursday at 08:00 CET (Central European Time)](https://meet.google.com/gwy-vssi-hfr) 95 | - [Meeting notes and Agenda](https://docs.google.com/document/d/1Iy3CRIEzsHUhtMuzCVRX-8fbmsivcu2iju1J2vN2knQ/edit?usp=meetingnotes&showmeetingnotespromo=true). 96 | 97 | ## Contact 98 | ___ 99 | - Red Hat Slack #observability-operator-users and ping @obo-support-team. 100 | - [Mailing list](mso-users@redhat.com) 101 | - Github Team: @rhobs/observability-operator-maintainers 102 | -------------------------------------------------------------------------------- /pkg/controllers/operator/components.go: -------------------------------------------------------------------------------- 1 | package operator 2 | 3 | import ( 4 | "fmt" 5 | 6 | monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" 7 | corev1 "k8s.io/api/core/v1" 8 | rbacv1 "k8s.io/api/rbac/v1" 9 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 10 | "k8s.io/utils/ptr" 11 | 12 | "github.com/rhobs/observability-operator/pkg/reconciler" 13 | ) 14 | 15 | const ( 16 | name = "observability-operator" 17 | ) 18 | 19 | func operatorComponentReconcilers(owner metav1.Object, namespace string) []reconciler.Reconciler { 20 | return []reconciler.Reconciler{ 21 | reconciler.NewUpdater(newServiceMonitor(namespace), owner), 22 | reconciler.NewUpdater(newPrometheusRole(namespace), owner), 23 | reconciler.NewUpdater(newRoleBindingForPrometheusRole(namespace), owner), 24 | } 25 | } 26 | 27 | func newServiceMonitor(namespace string) *monv1.ServiceMonitor { 28 | return &monv1.ServiceMonitor{ 29 | TypeMeta: metav1.TypeMeta{ 30 | APIVersion: monv1.SchemeGroupVersion.String(), 31 | Kind: "ServiceMonitor", 32 | }, 33 | ObjectMeta: metav1.ObjectMeta{ 34 | Name: name, 35 | Namespace: namespace, 36 | Labels: map[string]string{ 37 | "app.kubernetes.io/component": "operator", 38 | "app.kubernetes.io/name": name, 39 | "app.kubernetes.io/part-of": name, 40 | "openshift.io/user-monitoring": "false", 41 | }, 42 | }, 43 | 44 | Spec: monv1.ServiceMonitorSpec{ 45 | Endpoints: []monv1.Endpoint{ 46 | { 47 | Port: "metrics", 48 | Scheme: ptr.To(monv1.Scheme("https")), 49 | TLSConfig: &monv1.TLSConfig{ 50 | CAFile: "/etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt", 51 | CertFile: "/etc/prometheus/secrets/metrics-client-certs/tls.crt", 52 | KeyFile: "/etc/prometheus/secrets/metrics-client-certs/tls.key", 53 | SafeTLSConfig: monv1.SafeTLSConfig{ 54 | ServerName: ptr.To(fmt.Sprintf("%s.%s.svc", name, namespace)), 55 | }, 56 | }, 57 | }, 58 | }, 59 | Selector: metav1.LabelSelector{ 60 | MatchLabels: map[string]string{ 61 | "app.kubernetes.io/component": "operator", 62 | "app.kubernetes.io/name": name, 63 | }, 64 | }, 65 | }, 66 | } 67 | } 68 | 69 | func newPrometheusRole(namespace string) *rbacv1.Role { 70 | return &rbacv1.Role{ 71 | TypeMeta: metav1.TypeMeta{ 72 | APIVersion: rbacv1.SchemeGroupVersion.String(), 73 | Kind: "Role", 74 | }, 75 | ObjectMeta: metav1.ObjectMeta{ 76 | Name: "prometheus-k8s", 77 | Namespace: namespace, 78 | }, 79 | Rules: []rbacv1.PolicyRule{{ 80 | APIGroups: []string{""}, 81 | Resources: []string{"services", "endpoints", "pods"}, 82 | Verbs: []string{"get", "list", "watch"}, 83 | }}, 84 | } 85 | } 86 | 87 | func newRoleBindingForPrometheusRole(namespace string) *rbacv1.RoleBinding { 88 | roleBinding := &rbacv1.RoleBinding{ 89 | TypeMeta: metav1.TypeMeta{ 90 | APIVersion: rbacv1.SchemeGroupVersion.String(), 91 | Kind: "RoleBinding", 92 | }, 93 | ObjectMeta: metav1.ObjectMeta{ 94 | Name: "prometheus-k8s", 95 | Namespace: namespace, 96 | }, 97 | Subjects: []rbacv1.Subject{{ 98 | APIGroup: corev1.SchemeGroupVersion.Group, 99 | Kind: "ServiceAccount", 100 | Name: "prometheus-k8s", 101 | Namespace: reconciler.OpenshiftMonitoringNamespace, 102 | }}, 103 | RoleRef: rbacv1.RoleRef{ 104 | APIGroup: rbacv1.SchemeGroupVersion.Group, 105 | Kind: "Role", 106 | Name: "prometheus-k8s", 107 | }, 108 | } 109 | return roleBinding 110 | } 111 | -------------------------------------------------------------------------------- /pkg/controllers/uiplugin/plugin_info_builder.go: -------------------------------------------------------------------------------- 1 | package uiplugin 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/go-logr/logr" 8 | osv1 "github.com/openshift/api/console/v1" 9 | osv1alpha1 "github.com/openshift/api/console/v1alpha1" 10 | corev1 "k8s.io/api/core/v1" 11 | rbacv1 "k8s.io/api/rbac/v1" 12 | "k8s.io/client-go/dynamic" 13 | "sigs.k8s.io/controller-runtime/pkg/client" 14 | 15 | uiv1alpha1 "github.com/rhobs/observability-operator/pkg/apis/uiplugin/v1alpha1" 16 | ) 17 | 18 | type UIPluginInfo struct { 19 | Image string 20 | Korrel8rImage string 21 | HealthAnalyzerImage string 22 | LokiServiceNames map[string]string 23 | TempoServiceNames map[string]string 24 | Name string 25 | ConsoleName string 26 | DisplayName string 27 | ExtraArgs []string 28 | LegacyProxies []osv1alpha1.ConsolePluginProxy 29 | Proxies []osv1.ConsolePluginProxy 30 | Role *rbacv1.Role 31 | RoleBinding *rbacv1.RoleBinding 32 | ClusterRoles []*rbacv1.ClusterRole 33 | ClusterRoleBindings []*rbacv1.ClusterRoleBinding 34 | ConfigMap *corev1.ConfigMap 35 | ResourceNamespace string 36 | PersesImage string 37 | AreMonitoringFeatsDisabled bool 38 | } 39 | 40 | var pluginTypeToConsoleName = map[uiv1alpha1.UIPluginType]string{ 41 | uiv1alpha1.TypeDashboards: "console-dashboards-plugin", 42 | uiv1alpha1.TypeTroubleshootingPanel: "troubleshooting-panel-console-plugin", 43 | uiv1alpha1.TypeDistributedTracing: "distributed-tracing-console-plugin", 44 | uiv1alpha1.TypeLogging: "logging-view-plugin", 45 | uiv1alpha1.TypeMonitoring: "monitoring-console-plugin", 46 | } 47 | 48 | func PluginInfoBuilder(ctx context.Context, k client.Client, dk dynamic.Interface, plugin *uiv1alpha1.UIPlugin, pluginConf UIPluginsConfiguration, compatibilityInfo CompatibilityEntry, clusterVersion string, logger logr.Logger) (*UIPluginInfo, error) { 49 | image := pluginConf.Images[compatibilityInfo.ImageKey] 50 | if image == "" { 51 | return nil, fmt.Errorf("no image provided for plugin type %s with key %s", plugin.Spec.Type, compatibilityInfo.ImageKey) 52 | } 53 | 54 | namespace := pluginConf.ResourcesNamespace 55 | switch plugin.Spec.Type { 56 | case uiv1alpha1.TypeDashboards: 57 | return createDashboardsPluginInfo(plugin, namespace, plugin.Name, image) 58 | 59 | case uiv1alpha1.TypeTroubleshootingPanel: 60 | pluginInfo, err := createTroubleshootingPanelPluginInfo(plugin, namespace, plugin.Name, image, []string{}) 61 | if err != nil { 62 | return nil, err 63 | } 64 | 65 | pluginInfo.Korrel8rImage = pluginConf.Images["korrel8r"] 66 | pluginInfo.LokiServiceNames[OpenshiftLoggingNs], err = getLokiServiceName(ctx, k, OpenshiftLoggingNs) 67 | if err != nil { 68 | return nil, err 69 | } 70 | 71 | pluginInfo.LokiServiceNames[OpenshiftNetobservNs], err = getLokiServiceName(ctx, k, OpenshiftNetobservNs) 72 | if err != nil { 73 | return nil, err 74 | } 75 | 76 | pluginInfo.TempoServiceNames[OpenshiftTracingNs], err = getTempoServiceName(ctx, k, OpenshiftTracingNs) 77 | if err != nil { 78 | return nil, err 79 | } 80 | 81 | return pluginInfo, nil 82 | 83 | case uiv1alpha1.TypeDistributedTracing: 84 | return createDistributedTracingPluginInfo(plugin, namespace, plugin.Name, image, []string{}) 85 | 86 | case uiv1alpha1.TypeLogging: 87 | return createLoggingPluginInfo(plugin, namespace, plugin.Name, image, compatibilityInfo.Features, ctx, dk, logger, pluginConf.Images["korrel8r"]) 88 | 89 | case uiv1alpha1.TypeMonitoring: 90 | return createMonitoringPluginInfo(plugin, namespace, plugin.Name, image, compatibilityInfo.Features, clusterVersion, pluginConf.Images["health-analyzer"], pluginConf.Images["perses"]) 91 | } 92 | 93 | return nil, fmt.Errorf("plugin type not supported: %s", plugin.Spec.Type) 94 | } 95 | -------------------------------------------------------------------------------- /pkg/controllers/operator/controller.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package operator 18 | 19 | import ( 20 | "context" 21 | "time" 22 | 23 | "github.com/go-logr/logr" 24 | monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" 25 | corev1 "k8s.io/api/core/v1" 26 | "k8s.io/apimachinery/pkg/api/errors" 27 | "k8s.io/apimachinery/pkg/runtime" 28 | ctrl "sigs.k8s.io/controller-runtime" 29 | "sigs.k8s.io/controller-runtime/pkg/builder" 30 | "sigs.k8s.io/controller-runtime/pkg/client" 31 | "sigs.k8s.io/controller-runtime/pkg/controller" 32 | "sigs.k8s.io/controller-runtime/pkg/predicate" 33 | ) 34 | 35 | type resourceManager struct { 36 | k8sClient client.Client 37 | scheme *runtime.Scheme 38 | logger logr.Logger 39 | controller controller.Controller 40 | namespace string 41 | } 42 | 43 | // RBAC for managing Prometheus Operator CRs 44 | // The controller also needs update permission to the services/finalizers 45 | // subresource to set the owner reference with blockOwnerDeletion=true on the 46 | // ServiceMonitor resource. 47 | //+kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors,verbs=list;watch;create;update;delete;patch 48 | //+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles;rolebindings,verbs=list;create;update;patch 49 | //+kubebuilder:rbac:groups="",resources=services/finalizers,verbs=update;patch 50 | 51 | // RegisterWithManager registers the controller with Manager 52 | func RegisterWithManager(mgr ctrl.Manager, namespace string) error { 53 | 54 | rm := &resourceManager{ 55 | k8sClient: mgr.GetClient(), 56 | scheme: mgr.GetScheme(), 57 | logger: ctrl.Log.WithName(name), 58 | namespace: namespace, 59 | } 60 | // We only want to trigger a reconciliation when the generation 61 | // of a child changes. Until we need to update our the status for our own objects, 62 | // we can save CPU cycles by avoiding reconciliations triggered by 63 | // child status changes. 64 | generationChanged := builder.WithPredicates(predicate.GenerationChangedPredicate{}) 65 | 66 | ctrl, err := ctrl.NewControllerManagedBy(mgr). 67 | For( 68 | &corev1.Service{}, 69 | builder.WithPredicates(predicate.NewPredicateFuncs(func(object client.Object) bool { 70 | return object.GetName() == name 71 | })), 72 | ). 73 | Named(name). 74 | Owns(&monv1.ServiceMonitor{}, generationChanged). 75 | Build(rm) 76 | 77 | if err != nil { 78 | return err 79 | } 80 | rm.controller = ctrl 81 | return nil 82 | } 83 | 84 | func (rm resourceManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 85 | logger := rm.logger.WithValues("operator", req.NamespacedName) 86 | logger.Info("Reconciling operator resources") 87 | 88 | op := &corev1.Service{} 89 | err := rm.k8sClient.Get(ctx, req.NamespacedName, op) 90 | if errors.IsNotFound(err) { 91 | return ctrl.Result{}, nil 92 | } 93 | if err != nil { 94 | return ctrl.Result{}, err 95 | } 96 | 97 | reconcilers := operatorComponentReconcilers(op, rm.namespace) 98 | for _, reconciler := range reconcilers { 99 | err := reconciler.Reconcile(ctx, rm.k8sClient, rm.scheme) 100 | // handle create / update errors that can happen due to a stale cache by 101 | // retrying after some time. 102 | if errors.IsAlreadyExists(err) || errors.IsConflict(err) { 103 | logger.V(3).Info("skipping reconcile error", "err", err) 104 | return ctrl.Result{RequeueAfter: 2 * time.Second}, nil 105 | } 106 | if err != nil { 107 | return ctrl.Result{}, err 108 | } 109 | } 110 | 111 | return ctrl.Result{}, nil 112 | } 113 | -------------------------------------------------------------------------------- /docs/user-guides/thanos_querier/console/03_dashboard.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: perses.dev/v1alpha1 3 | kind: PersesDashboard 4 | metadata: 5 | name: app-overview 6 | namespace: project-d 7 | labels: 8 | app.kubernetes.io/name: dashboard 9 | app.kubernetes.io/part-of: monitoring 10 | spec: 11 | display: 12 | name: Overview 13 | variables: 14 | - kind: ListVariable 15 | spec: 16 | display: 17 | hidden: false 18 | allowAllValue: true 19 | allowMultiple: true 20 | sort: alphabetical-asc 21 | plugin: 22 | kind: PrometheusLabelValuesVariable 23 | spec: 24 | labelName: service 25 | matchers: 26 | - up{service=~"api|backend"} 27 | name: service 28 | - kind: ListVariable 29 | spec: 30 | display: 31 | hidden: false 32 | allowAllValue: true 33 | allowMultiple: true 34 | sort: alphabetical-asc 35 | plugin: 36 | kind: PrometheusLabelValuesVariable 37 | spec: 38 | labelName: namespace 39 | matchers: 40 | - up{service=~"$service"} 41 | name: namespace 42 | panels: 43 | "0_0": 44 | kind: Panel 45 | spec: 46 | display: 47 | name: Requests rate (req/s) 48 | plugin: 49 | kind: TimeSeriesChart 50 | spec: {} 51 | queries: 52 | - kind: TimeSeriesQuery 53 | spec: 54 | plugin: 55 | kind: PrometheusTimeSeriesQuery 56 | spec: 57 | datasource: 58 | kind: PrometheusDatasource 59 | query: sum by(service, namespace, code) (rate(http_requests_total{service=~"$service",namespace=~"$namespace"}[$__rate_interval])) 60 | seriesNameFormat: svc={{service}},code={{code}},namespace={{namespace}} 61 | "0_1": 62 | kind: Panel 63 | spec: 64 | display: 65 | name: Errors 66 | plugin: 67 | kind: TimeSeriesChart 68 | spec: {} 69 | queries: 70 | - kind: TimeSeriesQuery 71 | spec: 72 | plugin: 73 | kind: PrometheusTimeSeriesQuery 74 | spec: 75 | datasource: 76 | kind: PrometheusDatasource 77 | query: |- 78 | sum by(service, namespace) (rate(http_requests_total{service=~"$service",namespace=~"$namespace",code!~"2.."}[$__rate_interval])) 79 | / 80 | sum by(service, namespace) (rate(http_requests_total{service=~"$service",namespace=~"$namespace"}[$__rate_interval])) 81 | seriesNameFormat: svc={{service}},namespace={{namespace}} 82 | "0_2": 83 | kind: Panel 84 | spec: 85 | display: 86 | name: Duration (90th percentile) 87 | plugin: 88 | kind: TimeSeriesChart 89 | spec: {} 90 | queries: 91 | - kind: TimeSeriesQuery 92 | spec: 93 | plugin: 94 | kind: PrometheusTimeSeriesQuery 95 | spec: 96 | datasource: 97 | kind: PrometheusDatasource 98 | query: |- 99 | histogram_quantile(0.9, sum by (le,namespace,service) (rate(http_request_duration_seconds_bucket{service=~"$service",namespace=~"$namespace"}[$__rate_interval]))) 100 | seriesNameFormat: svc={{service}},namespace={{namespace}} 101 | layouts: 102 | - kind: Grid 103 | spec: 104 | display: 105 | title: Rate/Errors/Duration 106 | collapse: 107 | open: true 108 | items: 109 | - x: 0 110 | "y": 1 111 | width: 24 112 | height: 7 113 | content: 114 | $ref: "#/spec/panels/0_0" 115 | - x: 0 116 | "y": 2 117 | width: 24 118 | height: 7 119 | content: 120 | $ref: "#/spec/panels/0_1" 121 | - x: 0 122 | "y": 3 123 | width: 24 124 | height: 7 125 | content: 126 | $ref: "#/spec/panels/0_2" 127 | duration: 30m 128 | -------------------------------------------------------------------------------- /hack/README.md: -------------------------------------------------------------------------------- 1 | # E2E Test Environment Setup 2 | 3 | This directory contains scripts for setting up the end-to-end (e2e) test environment. 4 | 5 | ## Unified Setup Script 6 | 7 | **`setup-e2e-env.sh`** - The main script for setting up e2e test environments. This script unifies the setup process used by both local development and CI environments to prevent config drift. 8 | 9 | **Prerequisites**: Run `make tools` first to install project-specific tools (operator-sdk, oc, etc.). 10 | 11 | ### Key Features 12 | 13 | - **Unified Setup**: Same script used locally and in GitHub Actions 14 | - **Flexible Configuration**: Options to control what gets installed/configured 15 | - **Tool Management**: Can install kind, kubectl, and any system packages via package managers 16 | - **Environment Validation**: Checks prerequisites before proceeding 17 | - **CI-Friendly**: Special options for CI environments (skip /etc/hosts checks, etc.) 18 | 19 | ### Usage Examples 20 | 21 | ```bash 22 | # First, install project tools 23 | make tools 24 | 25 | # Full setup with defaults (local development) 26 | ./hack/setup-e2e-env.sh 27 | 28 | # CI-friendly setup (skip host checks, use specific versions) 29 | ./hack/setup-e2e-env.sh --skip-host-check --kind-version v0.23.0 30 | 31 | # Only validate prerequisites, don't install anything 32 | ./hack/setup-e2e-env.sh --validate-only 33 | 34 | # Install tools but don't create cluster (useful for rebuilding) 35 | ./hack/setup-e2e-env.sh --no-cluster 36 | 37 | # Install additional packages (any system packages) 38 | ./hack/setup-e2e-env.sh curl jq tree htop 39 | 40 | # Custom cluster configuration 41 | ./hack/setup-e2e-env.sh --cluster-name my-test --kind-image kindest/node:v1.25.0 42 | ``` 43 | 44 | ### Options 45 | 46 | | Option | Description | Default | 47 | |--------|-------------|---------| 48 | | `--help` | Show usage information | | 49 | | `--validate-only` | Only validate prerequisites | | 50 | | `--no-kind` | Skip kind installation | | 51 | | `--no-kubectl` | Skip kubectl installation | | 52 | | `--no-cluster` | Skip cluster setup | | 53 | | `--no-olm` | Skip OLM installation | | 54 | | `--no-registry` | Skip local registry setup | | 55 | | `--skip-host-check` | Skip /etc/hosts validation (for CI) | | 56 | | `--cluster-name NAME` | Kind cluster name | `obs-operator` | 57 | | `--kind-version VERSION` | Kind version to install | `v0.23.0` | 58 | | `--kind-image IMAGE` | Kind node image | `kindest/node:v1.24.0` | 59 | | `--kubeconfig PATH` | Kubeconfig file path | `~/.kube/kind/obs-operator` | 60 | 61 | 62 | ### What Gets Set Up 63 | 64 | The script sets up a complete e2e test environment including: 65 | 66 | 1. **Tool Installation** (if needed): 67 | - kind (Kubernetes in Docker) 68 | - kubectl (Kubernetes CLI) 69 | - Any additional system packages via package managers (apt-get, dnf, yum, zypper, pacman, brew, apk) 70 | - **Note**: Project tools (operator-sdk, oc, etc.) must be installed via `make tools` 71 | 72 | 2. **Kind Cluster**: 73 | - Creates cluster with configuration from `hack/kind/config.yaml` 74 | - Labels control-plane as infra node 75 | - Waits for cluster to be ready 76 | 77 | 3. **Cluster Components**: 78 | - OLM (Operator Lifecycle Manager) v0.28.0 79 | - Local Docker registry for testing 80 | - Monitoring CRDs 81 | 82 | 4. **Validation**: 83 | - Prerequisite checks (go, git, curl) 84 | - Host configuration validation 85 | - Cluster health verification 86 | 87 | ## Backward Compatibility 88 | 89 | The old `hack/kind/setup.sh` script is now deprecated but still works - it forwards to the new unified script with appropriate options. 90 | 91 | ## CI Integration 92 | 93 | GitHub Actions use this same script via `.github/e2e-tests-olm/action.yaml`. Note that CI environments install project tools via the tools-cache action before running the setup script: 94 | 95 | ```yaml 96 | - name: Install required tools using unified setup 97 | uses: ./.github/tools-cache 98 | 99 | - name: Set up e2e environment 100 | shell: bash 101 | run: | 102 | ./hack/setup-e2e-env.sh \ 103 | --skip-host-check \ 104 | --kind-version ${{ inputs.kind-version }} \ 105 | --kind-image ${{ inputs.kind-image }} 106 | ``` 107 | 108 | This ensures both local development and CI use identical setup procedures, preventing config drift and test result differences. -------------------------------------------------------------------------------- /deploy/crds/kubernetes/monitoring.coreos.com_prometheusrules.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apiextensions.k8s.io/v1 3 | kind: CustomResourceDefinition 4 | metadata: 5 | annotations: 6 | controller-gen.kubebuilder.io/version: v0.9.2 7 | creationTimestamp: null 8 | name: prometheusrules.monitoring.coreos.com 9 | spec: 10 | group: monitoring.coreos.com 11 | names: 12 | categories: 13 | - prometheus-operator 14 | kind: PrometheusRule 15 | listKind: PrometheusRuleList 16 | plural: prometheusrules 17 | shortNames: 18 | - promrule 19 | singular: prometheusrule 20 | scope: Namespaced 21 | versions: 22 | - name: v1 23 | schema: 24 | openAPIV3Schema: 25 | description: PrometheusRule defines recording and alerting rules for a Prometheus 26 | instance 27 | properties: 28 | apiVersion: 29 | description: 'APIVersion defines the versioned schema of this representation 30 | of an object. Servers should convert recognized schemas to the latest 31 | internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' 32 | type: string 33 | kind: 34 | description: 'Kind is a string value representing the REST resource this 35 | object represents. Servers may infer this from the endpoint the client 36 | submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' 37 | type: string 38 | metadata: 39 | type: object 40 | spec: 41 | description: Specification of desired alerting rule definitions for Prometheus. 42 | properties: 43 | groups: 44 | description: Content of Prometheus rule file 45 | items: 46 | description: 'RuleGroup is a list of sequentially evaluated recording 47 | and alerting rules. Note: PartialResponseStrategy is only used 48 | by ThanosRuler and will be ignored by Prometheus instances. Valid 49 | values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/main/docs/components/rule.md#partial-response' 50 | properties: 51 | interval: 52 | type: string 53 | name: 54 | type: string 55 | partial_response_strategy: 56 | type: string 57 | rules: 58 | items: 59 | description: 'Rule describes an alerting or recording rule 60 | See Prometheus documentation: [alerting](https://www.prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) 61 | or [recording](https://www.prometheus.io/docs/prometheus/latest/configuration/recording_rules/#recording-rules) 62 | rule' 63 | properties: 64 | alert: 65 | type: string 66 | annotations: 67 | additionalProperties: 68 | type: string 69 | type: object 70 | expr: 71 | anyOf: 72 | - type: integer 73 | - type: string 74 | x-kubernetes-int-or-string: true 75 | for: 76 | type: string 77 | labels: 78 | additionalProperties: 79 | type: string 80 | type: object 81 | record: 82 | type: string 83 | required: 84 | - expr 85 | type: object 86 | type: array 87 | required: 88 | - name 89 | - rules 90 | type: object 91 | type: array 92 | type: object 93 | required: 94 | - spec 95 | type: object 96 | served: true 97 | storage: true 98 | -------------------------------------------------------------------------------- /test/e2e/main_test.go: -------------------------------------------------------------------------------- 1 | package e2e 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "os" 9 | "testing" 10 | 11 | configv1 "github.com/openshift/api/config/v1" 12 | olmv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" 13 | "go.uber.org/zap/zapcore" 14 | v1 "k8s.io/api/core/v1" 15 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 16 | ctrl "sigs.k8s.io/controller-runtime" 17 | "sigs.k8s.io/controller-runtime/pkg/client" 18 | "sigs.k8s.io/controller-runtime/pkg/client/config" 19 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 20 | 21 | "github.com/rhobs/observability-operator/pkg/operator" 22 | "github.com/rhobs/observability-operator/test/e2e/framework" 23 | ) 24 | 25 | var ( 26 | f *framework.Framework 27 | ) 28 | 29 | const e2eTestNamespace = "e2e-tests" 30 | 31 | var ( 32 | retain = flag.Bool("retain", false, "When set, the namespace in which tests are run will not be cleaned up") 33 | operatorInstallNS = flag.String("operatorInstallNS", "openshift-operator", "The namespace where the operator is installed") 34 | ) 35 | 36 | func TestMain(m *testing.M) { 37 | flag.Parse() 38 | 39 | // Setup controller-runtime logger to avoid warning messages 40 | opts := zap.Options{ 41 | Development: true, 42 | TimeEncoder: zapcore.RFC3339TimeEncoder, 43 | } 44 | ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) 45 | 46 | // Deferred calls are not executed on os.Exit from TestMain. 47 | // As a workaround, we call another function in which we can add deferred calls. 48 | // http://blog.englund.nu/golang,/testing/2017/03/12/using-defer-in-testmain.html 49 | code := main(m) 50 | os.Exit(code) 51 | } 52 | 53 | func main(m *testing.M) int { 54 | if err := setupFramework(); err != nil { 55 | log.Println(err) 56 | return 1 57 | } 58 | 59 | cleanup, err := createNamespace(e2eTestNamespace) 60 | if err != nil { 61 | log.Println(err) 62 | return 1 63 | } 64 | if !*retain { 65 | defer cleanup() 66 | } 67 | 68 | exitCode := m.Run() 69 | 70 | tests := []testing.InternalTest{ 71 | { 72 | Name: "NoReconcilationErrors", 73 | F: func(t *testing.T) { 74 | // see: https://github.com/rhobs/observability-operator/issues/200 75 | t.Skip("skipping reconciliation error test until #200 is fixed") 76 | f.AssertNoReconcileErrors(t) 77 | }, 78 | }, 79 | { 80 | // Kubernetes will emit events with reason=OwnerRefInvalidNamespace 81 | // if the operator defines invalid owner references. 82 | // See: 83 | // - https://kubernetes.io/docs/concepts/architecture/garbage-collection/#owners-dependents 84 | // - https://issues.redhat.com/browse/COO-117 85 | Name: "NoOwnerRefInvalidNamespaceReasonEvent", 86 | F: func(t *testing.T) { 87 | f.AssertNoEventWithReason(t, "OwnerRefInvalidNamespace") 88 | }, 89 | }, 90 | } 91 | 92 | log.Println("=== Running post e2e test validations ===") 93 | if !testing.RunTests(func(_, _ string) (bool, error) { return true, nil }, tests) { 94 | return 1 95 | } 96 | 97 | return exitCode 98 | } 99 | 100 | func setupFramework() error { 101 | cfg := config.GetConfigOrDie() 102 | scheme := operator.NewScheme(&operator.OperatorConfiguration{}) 103 | err := olmv1alpha1.AddToScheme(scheme) 104 | if err != nil { 105 | return fmt.Errorf("failed to register olmv1alpha1 to scheme %w", err) 106 | } 107 | err = configv1.Install(scheme) 108 | if err != nil { 109 | return fmt.Errorf("failed to register configv1 to scheme %w", err) 110 | } 111 | k8sClient, err := client.New(cfg, client.Options{ 112 | Scheme: scheme, 113 | }) 114 | if err != nil { 115 | return err 116 | } 117 | 118 | f = &framework.Framework{ 119 | K8sClient: k8sClient, 120 | Config: cfg, 121 | Retain: *retain, 122 | OperatorNamespace: *operatorInstallNS, 123 | } 124 | 125 | return f.Setup() 126 | } 127 | 128 | func createNamespace(name string) (func(), error) { 129 | ns := &v1.Namespace{ 130 | TypeMeta: metav1.TypeMeta{ 131 | APIVersion: v1.SchemeGroupVersion.String(), 132 | Kind: "Namespace", 133 | }, 134 | ObjectMeta: metav1.ObjectMeta{ 135 | Name: name, 136 | }, 137 | } 138 | if err := f.K8sClient.Create(context.Background(), ns); err != nil { 139 | return nil, err 140 | } 141 | 142 | cleanup := func() { 143 | f.K8sClient.Delete(context.Background(), ns) 144 | } 145 | 146 | return cleanup, nil 147 | } 148 | -------------------------------------------------------------------------------- /pkg/controllers/uiplugin/distributed_tracing.go: -------------------------------------------------------------------------------- 1 | package uiplugin 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "strings" 7 | 8 | osv1 "github.com/openshift/api/console/v1" 9 | osv1alpha1 "github.com/openshift/api/console/v1alpha1" 10 | "gopkg.in/yaml.v3" 11 | corev1 "k8s.io/api/core/v1" 12 | rbacv1 "k8s.io/api/rbac/v1" 13 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 14 | 15 | uiv1alpha1 "github.com/rhobs/observability-operator/pkg/apis/uiplugin/v1alpha1" 16 | ) 17 | 18 | func createDistributedTracingPluginInfo(plugin *uiv1alpha1.UIPlugin, namespace, name, image string, features []string) (*UIPluginInfo, error) { 19 | distributedTracingConfig := plugin.Spec.DistributedTracing 20 | 21 | configYaml, err := marshalDistributedTracingPluginConfig(distributedTracingConfig) 22 | if err != nil { 23 | return nil, fmt.Errorf("error creating plugin configuration file: %w", err) 24 | } 25 | 26 | extraArgs := []string{ 27 | "-plugin-config-path=/etc/plugin/config/config.yaml", 28 | } 29 | 30 | if len(features) > 0 { 31 | extraArgs = append(extraArgs, fmt.Sprintf("-features=%s", strings.Join(features, ","))) 32 | } 33 | 34 | pluginInfo := &UIPluginInfo{ 35 | Image: image, 36 | Name: plugin.Name, 37 | ConsoleName: pluginTypeToConsoleName[plugin.Spec.Type], 38 | DisplayName: "Distributed Tracing Console Plugin", 39 | ResourceNamespace: namespace, 40 | ExtraArgs: extraArgs, 41 | LegacyProxies: []osv1alpha1.ConsolePluginProxy{ 42 | { 43 | Type: osv1alpha1.ProxyTypeService, 44 | Alias: "backend", 45 | Authorize: true, 46 | Service: osv1alpha1.ConsolePluginProxyServiceConfig{ 47 | Name: name, 48 | Namespace: namespace, 49 | Port: port, 50 | }, 51 | }, 52 | }, 53 | Proxies: []osv1.ConsolePluginProxy{ 54 | { 55 | Alias: "backend", 56 | Authorization: "UserToken", 57 | Endpoint: osv1.ConsolePluginProxyEndpoint{ 58 | Type: osv1.ProxyTypeService, 59 | Service: &osv1.ConsolePluginProxyServiceConfig{ 60 | Name: name, 61 | Namespace: namespace, 62 | Port: port, 63 | }, 64 | }, 65 | }, 66 | }, 67 | ConfigMap: &corev1.ConfigMap{ 68 | TypeMeta: metav1.TypeMeta{ 69 | APIVersion: corev1.SchemeGroupVersion.String(), 70 | Kind: "ConfigMap", 71 | }, 72 | ObjectMeta: metav1.ObjectMeta{ 73 | Name: name, 74 | Namespace: namespace, 75 | }, 76 | Data: map[string]string{ 77 | "config.yaml": configYaml, 78 | }, 79 | }, 80 | ClusterRoles: []*rbacv1.ClusterRole{ 81 | { 82 | TypeMeta: metav1.TypeMeta{ 83 | APIVersion: rbacv1.SchemeGroupVersion.String(), 84 | Kind: "ClusterRole", 85 | }, 86 | ObjectMeta: metav1.ObjectMeta{ 87 | Name: plugin.Name + "-cr", 88 | Namespace: namespace, 89 | }, 90 | Rules: []rbacv1.PolicyRule{ 91 | { 92 | APIGroups: []string{"tempo.grafana.com"}, 93 | Resources: []string{"tempostacks", "tempomonolithics"}, 94 | Verbs: []string{"list"}, 95 | }, 96 | }, 97 | }, 98 | }, 99 | ClusterRoleBindings: []*rbacv1.ClusterRoleBinding{ 100 | { 101 | TypeMeta: metav1.TypeMeta{ 102 | APIVersion: rbacv1.SchemeGroupVersion.String(), 103 | Kind: "ClusterRoleBinding", 104 | }, 105 | ObjectMeta: metav1.ObjectMeta{ 106 | Name: plugin.Name + "-crb", 107 | Namespace: namespace, 108 | }, 109 | Subjects: []rbacv1.Subject{{ 110 | APIGroup: corev1.SchemeGroupVersion.Group, 111 | Kind: "ServiceAccount", 112 | Name: plugin.Name + "-sa", 113 | Namespace: namespace, 114 | }}, 115 | RoleRef: rbacv1.RoleRef{ 116 | APIGroup: rbacv1.SchemeGroupVersion.Group, 117 | Kind: "ClusterRole", 118 | Name: plugin.Name + "-cr", 119 | }, 120 | }, 121 | }, 122 | } 123 | 124 | return pluginInfo, nil 125 | } 126 | 127 | func marshalDistributedTracingPluginConfig(cfg *uiv1alpha1.DistributedTracingConfig) (string, error) { 128 | if cfg == nil { 129 | return "", nil 130 | } 131 | 132 | if cfg.Timeout == "" { 133 | return "", nil 134 | } 135 | 136 | pluginCfg := struct { 137 | Timeout string `yaml:"timeout"` 138 | }{ 139 | Timeout: cfg.Timeout, 140 | } 141 | 142 | buf := &bytes.Buffer{} 143 | if err := yaml.NewEncoder(buf).Encode(pluginCfg); err != nil { 144 | return "", err 145 | } 146 | 147 | return buf.String(), nil 148 | } 149 | -------------------------------------------------------------------------------- /must-gather/collection-scripts/gather: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # safeguards 4 | set -o nounset 5 | set -o errexit 6 | set -o pipefail 7 | 8 | # global readonly constants 9 | declare -r BASE_COLLECTION_PATH="../must-gather" 10 | declare -r COLLECTION_PATH="${BASE_COLLECTION_PATH}/monitoring/observability-operator" 11 | 12 | source "$(dirname "$0")"/common.sh 13 | 14 | # init initializes global variables that need to be computed. 15 | # E.g. get token of the default ServiceAccount 16 | init() { 17 | mkdir -p "${COLLECTION_PATH}" 18 | 19 | readarray -t MON_STACK_NSS < <( 20 | oc get monitoringstacks --all-namespaces --no-headers -o custom-columns=":metadata.namespace" 21 | ) || true 22 | } 23 | 24 | operants_get() { 25 | oc get pods --all-namespaces -l app.kubernetes.io/managed-by=observability-operator -o yaml > "$COLLECTION_PATH"/operants.yaml 26 | oc get pods --all-namespaces -l app.kubernetes.io/part-of=observability-operator -o yaml >> "$COLLECTION_PATH"/operants.yaml 27 | oc get pods --all-namespaces -l app.kubernetes.io/name=observability-operator > "$COLLECTION_PATH"/operator.yaml 28 | } 29 | 30 | # prom_get makes http GET requests to prometheus /api/v1/$object and stores 31 | # the stdout and stderr results 32 | prom_get() { 33 | local object="$1"; shift 34 | local ns=$1; shift 35 | local name="$1"; shift 36 | local pod 37 | pod=$(get_first_ready_prom_pod "$ns" "$name") 38 | 39 | local result_path="$COLLECTION_PATH/$ns/$name/prometheus/$object" 40 | mkdir -p "$(dirname "$result_path")" 41 | 42 | echo "INFO: Getting ${object} from ${pod}" 43 | oc exec "${pod}" \ 44 | -c prometheus \ 45 | -n "$ns" \ 46 | -- /bin/bash -c "curl -sG http://localhost:9090/api/v1/${object}" \ 47 | > "${result_path}.json" \ 48 | 2> "${result_path}.stderr" 49 | } 50 | 51 | prom_get_from_replica() { 52 | local replica="$1"; shift 53 | local object="$1"; shift 54 | local ns=$1; shift 55 | local name="$1"; shift 56 | local path="${1:-$object}"; shift || true 57 | 58 | local result_path="$COLLECTION_PATH/$ns/$name/prometheus/$path" 59 | mkdir -p "$(dirname "${result_path}")" 60 | 61 | echo "INFO: Getting ${object} from ${replica}" 62 | oc exec "${replica}" \ 63 | -c prometheus \ 64 | -n "$ns" \ 65 | -- /bin/bash -c "curl -sG http://localhost:9090/api/v1/${object}" \ 66 | > "${result_path}.json" \ 67 | 2> "${result_path}.stderr" 68 | } 69 | 70 | prom_get_from_replicas() { 71 | local object="$1"; shift 72 | local ns=$1; shift 73 | local name="$1"; shift 74 | local path="${1:-$object}"; shift || true 75 | 76 | readarray -t stss < <( 77 | oc get sts -n "$ns" -l app.kubernetes.io/part-of="$name" --no-headers -o custom-columns=":metadata.uid" 78 | ) || true 79 | for sts in "${stss[@]}"; do 80 | readarray -t pods < <( 81 | oc get pods -n "$ns" -l app.kubernetes.io/component=prometheus -o json | jq -r '.items[] | select(.metadata.ownerReferences[].uid | test("'"$sts"'")).metadata.name' 82 | ) || true 83 | for pod in "${pods[@]}"; do 84 | prom_get_from_replica "${pod}" "${object}" "$ns" "$name" "${pod}/${path}" || true 85 | done 86 | done 87 | } 88 | 89 | alertmanager_get() { 90 | local object="$1"; shift 91 | local ns=$1; shift 92 | local name="$1"; shift 93 | local pod 94 | pod=$(get_first_ready_alertmanager_pod "$ns" "$name") 95 | 96 | local result_path="$COLLECTION_PATH/$ns/$name/alertmanager/$object" 97 | mkdir -p "$(dirname "$result_path")" 98 | 99 | echo "INFO: Getting ${object} from ${pod}" 100 | oc exec "${pod}" \ 101 | -c alertmanager\ 102 | -n openshift-monitoring \ 103 | -- /bin/bash -c "curl -sG http://localhost:9093/api/v2/${object}" \ 104 | > "${result_path}.json" \ 105 | 2> "${result_path}.stderr" 106 | } 107 | 108 | 109 | monitoring_gather(){ 110 | init 111 | 112 | # begin gathering 113 | # NOTE || true ignores failures 114 | 115 | operants_get || true 116 | 117 | for ns in "${MON_STACK_NSS[@]}"; do 118 | readarray -t MON_STACK_NAMES < <( 119 | oc get monitoringstacks -n "$ns" --no-headers -o custom-columns=":metadata.name" 120 | ) || true 121 | for name in "${MON_STACK_NAMES[@]}"; do 122 | prom_get alertmanagers "$ns" "$name" || true 123 | prom_get rules "$ns" "$name" || true 124 | prom_get status/config "$ns" "$name" || true 125 | prom_get status/flags "$ns" "$name" || true 126 | 127 | # using prom_get_from_replica as the state differs for each replica 128 | prom_get_from_replicas status/runtimeinfo "$ns" "$name" || true 129 | prom_get_from_replicas 'targets?state=active' "$ns" "$name" targets-active || true 130 | prom_get_from_replicas status/tsdb "$ns" "$name"|| true 131 | 132 | alertmanager_get status "$ns" "$name" || true 133 | done 134 | done 135 | 136 | sync 137 | } 138 | 139 | monitoring_gather 140 | -------------------------------------------------------------------------------- /pkg/reconciler/reconciler.go: -------------------------------------------------------------------------------- 1 | package reconciler 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 8 | "k8s.io/apimachinery/pkg/runtime" 9 | "sigs.k8s.io/controller-runtime/pkg/client" 10 | "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 11 | 12 | "github.com/rhobs/observability-operator/pkg/controllers/util" 13 | ) 14 | 15 | const ( 16 | // OpenshiftMonitoringNamespace is the namespace in which the OpenShift 17 | // monitoring components are deployed. 18 | OpenshiftMonitoringNamespace = "openshift-monitoring" 19 | ) 20 | 21 | // This interface is used by the resourceManagers to reconicle the resources they 22 | // watch. If any component needs special treatment in the reconcile loop, create 23 | // a new type that implements this interface. 24 | type Reconciler interface { 25 | Reconcile(ctx context.Context, c client.Client, scheme *runtime.Scheme) error 26 | } 27 | 28 | // Updater simply updates a resource by setting a controller reference 29 | // for resourceOwner and calling Patch on it. 30 | type Updater struct { 31 | resourceOwner metav1.Object 32 | resource client.Object 33 | shouldBypassSetCtrlRef bool 34 | } 35 | 36 | func (r Updater) Reconcile(ctx context.Context, c client.Client, scheme *runtime.Scheme) error { 37 | // Only set the controller reference if the bypass flag is false. 38 | // Bypassing allows other operators to own the resource 39 | // (e.g. Observability-operator creates the perses instance. But Perses-operator manages the perses instance) 40 | if !r.shouldBypassSetCtrlRef { 41 | // If the resource owner is in the same namespace as the resource, or if the resource owner is cluster scoped set the owner reference. 42 | if r.resourceOwner.GetNamespace() == r.resource.GetNamespace() || r.resourceOwner.GetNamespace() == "" { 43 | if err := controllerutil.SetControllerReference(r.resourceOwner, r.resource, scheme); err != nil { 44 | return fmt.Errorf("%s/%s (%s): updater failed to set owner reference: %w", 45 | r.resource.GetNamespace(), r.resource.GetName(), 46 | r.resource.GetObjectKind().GroupVersionKind().String(), err) 47 | } 48 | } 49 | } 50 | 51 | if err := c.Patch(ctx, r.resource, client.Apply, client.ForceOwnership, client.FieldOwner("observability-operator")); err != nil { 52 | return fmt.Errorf("%s/%s (%s): updater failed to patch: %w", 53 | r.resource.GetNamespace(), r.resource.GetName(), 54 | r.resource.GetObjectKind().GroupVersionKind().String(), err) 55 | } 56 | 57 | return nil 58 | } 59 | 60 | func NewUpdater(resource client.Object, owner metav1.Object) Updater { 61 | return newUpdater(resource, owner, false) 62 | } 63 | 64 | // NewUnmanagedUpdater creates an Updater that does not set a controller reference. 65 | func NewUnmanagedUpdater(resource client.Object, owner metav1.Object) Updater { 66 | return newUpdater(resource, owner, true) 67 | } 68 | 69 | func newUpdater(resource client.Object, owner metav1.Object, bypassOwnerRef bool) Updater { 70 | return Updater{ 71 | resourceOwner: owner, 72 | resource: util.AddCommonLabels(resource, owner.GetName()), 73 | shouldBypassSetCtrlRef: bypassOwnerRef, 74 | } 75 | } 76 | 77 | // Deleter deletes a resource and ignores NotFound errors. 78 | type Deleter struct { 79 | resource client.Object 80 | } 81 | 82 | func (r Deleter) Reconcile(ctx context.Context, c client.Client, scheme *runtime.Scheme) error { 83 | if err := c.Delete(ctx, r.resource); client.IgnoreNotFound(err) != nil { 84 | return fmt.Errorf("%s/%s (%s): deleter failed to delete: %w", 85 | r.resource.GetNamespace(), r.resource.GetName(), 86 | r.resource.GetObjectKind().GroupVersionKind().String(), err) 87 | } 88 | return nil 89 | } 90 | 91 | func NewDeleter(r client.Object) Deleter { 92 | return Deleter{resource: r} 93 | } 94 | 95 | type Merger struct { 96 | resource client.Object 97 | } 98 | 99 | func NewMerger(r client.Object, owner string) Merger { 100 | return Merger{resource: util.AddCommonLabels(r, owner)} 101 | } 102 | 103 | func (r Merger) Reconcile(ctx context.Context, c client.Client, scheme *runtime.Scheme) error { 104 | if err := c.Patch(ctx, r.resource, client.Merge); err != nil { 105 | return fmt.Errorf("%s/%s (%s): merger failed to patch: %w", 106 | r.resource.GetNamespace(), r.resource.GetName(), 107 | r.resource.GetObjectKind().GroupVersionKind().String(), err) 108 | } 109 | return nil 110 | } 111 | 112 | // NewOptionalUpdater ensures that a resource is present or absent depending on the `cond` value (true: present). 113 | func NewOptionalUpdater(r client.Object, c metav1.Object, cond bool) Reconciler { 114 | if cond { 115 | return NewUpdater(r, c) 116 | } 117 | return NewDeleter(r) 118 | } 119 | 120 | func NewOptionalUnmanagedUpdater(r client.Object, c metav1.Object, cond bool) Reconciler { 121 | if cond { 122 | return NewUnmanagedUpdater(r, c) 123 | } 124 | return NewDeleter(r) 125 | } 126 | -------------------------------------------------------------------------------- /test/run-e2e-ocp.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e -u -o pipefail 3 | 4 | trap cleanup EXIT 5 | 6 | # NOTE: install ObO and run e2e against the installation 7 | 8 | PROJECT_ROOT="$(git rev-parse --show-toplevel)" 9 | declare -r PROJECT_ROOT 10 | 11 | # shellcheck source=/dev/null 12 | source "$PROJECT_ROOT/test/lib/utils.bash" 13 | 14 | # NOTE: openshift-operators is the namespace used in subscription.yaml to install 15 | # obo, so this is harded coded for the test as well. 16 | declare -r OPERATORS_NS="openshift-operators" 17 | 18 | ### Configuration 19 | declare NO_INSTALL=false 20 | declare NO_UNINSTALL=false 21 | declare SHOW_USAGE=false 22 | 23 | cleanup() { 24 | # skip cleanup if user requested help 25 | $SHOW_USAGE && return 0 26 | 27 | delete_obo || true 28 | return 0 29 | } 30 | 31 | install_obo() { 32 | header "Install ObO" 33 | 34 | $NO_INSTALL && { 35 | skip "installation of obo " 36 | return 0 37 | } 38 | 39 | # NOTE: catalog-src is added to "openshift-marketplace" namespace 40 | oc apply -f ./hack/olm/catalog-src.yaml 41 | 42 | # NOTE: obo gets installed to "openshift-operators" namespace 43 | oc apply -f ./hack/olm/subscription.yaml 44 | 45 | oc -n "$OPERATORS_NS" wait --for=condition=CatalogSourcesUnhealthy=False \ 46 | subscription.operators.coreos.com observability-operator --timeout=60s 47 | 48 | ok "ObO subscription is ready" 49 | wait_for_operators_ready "$OPERATORS_NS" 50 | 51 | enable_ocp 52 | } 53 | 54 | enable_ocp() { 55 | # Get ObO CSV json file 56 | CSV_NAME=$(oc -n "$OPERATORS_NS" get sub observability-operator -o jsonpath='{.status.installedCSV}') 57 | CSV_JSON_FILE=$(mktemp /tmp/"$CSV_NAME"XXXXXX.json) 58 | if [ -e "$CSV_JSON_FILE" ]; then 59 | rm -f "$CSV_JSON_FILE" 60 | fi 61 | 62 | # Retry logic 63 | max_retries=3 64 | retry_count=0 65 | while [ "$retry_count" -lt "$max_retries" ]; do 66 | oc -n "$OPERATORS_NS" get csv "${CSV_NAME}" -o json > "$CSV_JSON_FILE" 67 | # Update CSV json file to enable OCP mode 68 | ARGS_JSON=$(printf '%s\n' "--openshift.enabled=true" | jq -R . | jq -s .) 69 | jq --arg container_name operator --argjson args "$ARGS_JSON" ' 70 | (.spec.install.spec.deployments[].spec.template.spec.containers[] | select(.name == $container_name) | .args) += $args 71 | ' "$CSV_JSON_FILE" > /tmp/tmp.$$.json && mv /tmp/tmp.$$.json "$CSV_JSON_FILE" 72 | ok "Added arguments to container operator in '$CSV_JSON_FILE'." 73 | if oc -n "$OPERATORS_NS" apply -f "$CSV_JSON_FILE"; then 74 | ok "Successfully updated CSV ${CSV_NAME}" 75 | break 76 | else 77 | echo "oc apply failed (attempt $((retry_count+1))/$max_retries), retrying..." 78 | fi 79 | sleep 10 80 | ((retry_count++)) 81 | if [ "$retry_count" -eq "$max_retries" ]; then 82 | err "Failed to update CSV ${CSV_NAME} after $max_retries attempts" 83 | exit 1 84 | fi 85 | done 86 | 87 | rm -f "$CSV_JSON_FILE" 88 | 89 | # enable platform monitoring 90 | oc label ns "$OPERATORS_NS" openshift.io/cluster-monitoring=true 91 | 92 | oc wait --for=condition=Established crd/uiplugins.observability.openshift.io --timeout=60s 93 | ok "Enable OCP mode successfully" 94 | } 95 | 96 | delete_obo() { 97 | header "Deleting ObO subscription" 98 | 99 | $NO_UNINSTALL && { 100 | skip "uninstallation of obo" 101 | return 0 102 | } 103 | 104 | oc delete -n "$OPERATORS_NS" csv \ 105 | -l operators.coreos.com/observability-operator."$OPERATORS_NS"= || true 106 | 107 | oc delete -n "$OPERATORS_NS" installplan,subscriptions \ 108 | -l operators.coreos.com/observability-operator."$OPERATORS_NS"= || true 109 | 110 | oc delete -f hack/olm/subscription.yaml || true 111 | oc delete -f hack/olm/catalog-src.yaml || true 112 | oc delete crds "$(oc api-resources --api-group=monitoring.rhobs -o name)" 113 | ok "uninstalled ObO" 114 | } 115 | 116 | parse_args() { 117 | ### while there are args parse them 118 | while [[ -n "${1+xxx}" ]]; do 119 | case $1 in 120 | -h | --help) 121 | SHOW_USAGE=true 122 | break 123 | ;; # exit the loop 124 | --no-install) 125 | NO_INSTALL=true 126 | shift 127 | ;; 128 | --no-uninstall) 129 | NO_UNINSTALL=true 130 | shift 131 | ;; 132 | *) return 1 ;; # show usage on everything else 133 | esac 134 | done 135 | return 0 136 | } 137 | 138 | print_usage() { 139 | local scr 140 | scr="$(basename "$0")" 141 | 142 | read -r -d '' help <<-EOF_HELP || true 143 | Usage: 144 | $scr 145 | $scr --no-install 146 | $scr --no-uninstall 147 | $scr -h|--help 148 | 149 | Options: 150 | -h|--help show this help 151 | --no-install do not install OBO, useful for rerunning tests 152 | --no-uninstall do not uninstall OBO after test 153 | EOF_HELP 154 | 155 | echo -e "$help" 156 | return 0 157 | } 158 | 159 | main() { 160 | parse_args "$@" || die "parse args failed" 161 | $SHOW_USAGE && { 162 | print_usage 163 | exit 0 164 | } 165 | 166 | cd "$PROJECT_ROOT" 167 | install_obo 168 | 169 | local -i ret=0 170 | ./test/run-e2e.sh --no-deploy --ns "$OPERATORS_NS" --ci || ret=$? 171 | 172 | # NOTE: delete_obo will be automatically called when script exits 173 | return $ret 174 | } 175 | 176 | main "$@" 177 | --------------------------------------------------------------------------------