├── .jujuignore
├── CODEOWNERS
├── tests
├── unit
│ ├── test_config
│ │ ├── alertmanager_empty.yml
│ │ ├── test_templates.tmpl
│ │ ├── alertmanager_invalid.yml
│ │ ├── alertmanager.yml
│ │ └── alertmanager_with_templates.yml
│ ├── conftest.py
│ ├── test_brute_isolated.py
│ ├── helpers.py
│ ├── test_self_scrape_jobs.py
│ ├── test_server_scheme.py
│ ├── test_external_url.py
│ ├── test_alertmanager_client.py
│ ├── test_remote_configuration_requirer.py
│ ├── test_push_config_to_workload_on_startup.py
│ ├── test_consumer.py
│ ├── test_remote_configuration_provider.py
│ └── test_charm.py
├── integration
│ ├── remote_configuration_tester
│ │ ├── lib
│ │ │ └── charms
│ │ │ │ └── alertmanager_k8s
│ │ │ │ └── v0
│ │ │ │ └── .gitkeep
│ │ ├── charmcraft.yaml
│ │ ├── pyproject.toml
│ │ └── src
│ │ │ └── charm.py
│ ├── am_config.yaml
│ ├── test_kubectl_delete.py
│ ├── test_grafana_source.py
│ ├── test_persistence.py
│ ├── conftest.py
│ ├── test_rescale_charm.py
│ ├── test_upgrade_charm.py
│ ├── test_remote_configuration.py
│ ├── test_tls_web.py
│ ├── test_templates.py
│ └── helpers.py
└── manual
│ └── bundle_1_e2e_tls.yaml
├── .wokeignore
├── terraform
├── versions.tf
├── main.tf
├── outputs.tf
├── variables.tf
└── README.md
├── .github
├── renovate.json5
├── workflows
│ ├── pull-request.yaml
│ ├── release.yaml
│ ├── tiobe-scan.yaml
│ ├── update-libs.yaml
│ ├── quality-gates.yaml
│ └── promote.yaml
├── .jira_sync_config.yaml
├── pull_request_template.md
└── ISSUE_TEMPLATE
│ ├── enhancement_proposal.yml
│ └── bug_report.yml
├── src
├── prometheus_alert_rules
│ ├── heartbeat.rule
│ ├── alertmanager_notifications_failed.rule
│ └── alertmanager_configuration_reload_failure.rule
├── config_builder.py
└── alertmanager.py
├── SECURITY.md
├── .gitignore
├── RELEASE.md
├── tox.ini
├── pyproject.toml
├── icon.svg
├── INTEGRATING.md
├── CONTRIBUTING.md
├── README.md
├── lib
└── charms
│ └── catalogue_k8s
│ └── v1
│ └── catalogue.py
├── charmcraft.yaml
└── LICENSE
/.jujuignore:
--------------------------------------------------------------------------------
1 | /venv
2 | **/__pycache__
3 |
--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @canonical/Observability
2 |
--------------------------------------------------------------------------------
/tests/unit/test_config/alertmanager_empty.yml:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.wokeignore:
--------------------------------------------------------------------------------
1 | tests/integration/remote_configuration_tester/lib
2 |
--------------------------------------------------------------------------------
/tests/integration/remote_configuration_tester/lib/charms/alertmanager_k8s/v0/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/unit/test_config/test_templates.tmpl:
--------------------------------------------------------------------------------
1 | {{define "myTemplate"}}do something else{{end}}
--------------------------------------------------------------------------------
/tests/unit/test_config/alertmanager_invalid.yml:
--------------------------------------------------------------------------------
1 | just:
2 | some:
3 | placeholder: config
4 | which:
5 | - is
6 | - not
7 | - valid
8 |
--------------------------------------------------------------------------------
/terraform/versions.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | required_version = ">= 1.5"
3 | required_providers {
4 | juju = {
5 | source = "juju/juju"
6 | version = "~> 1.0"
7 | }
8 | }
9 | }
--------------------------------------------------------------------------------
/.github/renovate.json5:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json",
3 | "extends": [
4 | "github>canonical/observability//.github/renovate/charms.json5",
5 | ],
6 | }
7 |
--------------------------------------------------------------------------------
/tests/integration/am_config.yaml:
--------------------------------------------------------------------------------
1 | route:
2 | receiver: test_receiver
3 | group_by:
4 | - alertname
5 | group_wait: 1234s
6 | group_interval: 4321s
7 | repeat_interval: 1111h
8 | receivers:
9 | - name: test_receiver
10 |
--------------------------------------------------------------------------------
/.github/workflows/pull-request.yaml:
--------------------------------------------------------------------------------
1 | name: Pull Requests
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | - main
7 | - track/**
8 |
9 | jobs:
10 | pull-request:
11 | name: PR
12 | uses: canonical/observability/.github/workflows/charm-pull-request.yaml@v1
13 | secrets: inherit
14 |
--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
1 | name: Release Charm
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | - track/**
8 |
9 | jobs:
10 | release:
11 | uses: canonical/observability/.github/workflows/charm-release.yaml@v1
12 | secrets: inherit
13 | with:
14 | default-track: dev
15 |
--------------------------------------------------------------------------------
/.github/workflows/tiobe-scan.yaml:
--------------------------------------------------------------------------------
1 | name: Tiobe TiCS Analysis
2 |
3 | on:
4 | workflow_dispatch:
5 | schedule:
6 | - cron: "0 0 * * 1" # Runs at midnight UTC every Monday
7 |
8 | jobs:
9 | tics:
10 | name: TiCs
11 | uses: canonical/observability/.github/workflows/charm-tiobe-scan.yaml@v1
12 | secrets: inherit
13 |
--------------------------------------------------------------------------------
/src/prometheus_alert_rules/heartbeat.rule:
--------------------------------------------------------------------------------
1 | # Based on https://awesome-prometheus-alerts.grep.to/rules.html#prometheus-self-monitoring-1
2 | groups:
3 | - name: Watchdog
4 | rules:
5 | - alert: Watchdog
6 | expr: vector(1)
7 | labels:
8 | severity: none
9 | annotations:
10 | summary: Continuously firing alert to ensure Alertmanager is working
11 |
--------------------------------------------------------------------------------
/.github/.jira_sync_config.yaml:
--------------------------------------------------------------------------------
1 | settings:
2 | jira_project_key: "OBC"
3 | status_mapping:
4 | opened: Untriaged
5 | closed: done
6 | not_planned: rejected
7 |
8 | components:
9 | - alertmanager
10 |
11 | add_gh_comment: false
12 | sync_description: false
13 | sync_comments: false
14 |
15 | label_mapping:
16 | "Type: Enhancement": Story
17 |
--------------------------------------------------------------------------------
/.github/workflows/update-libs.yaml:
--------------------------------------------------------------------------------
1 | name: Auto-update Charm Libraries
2 | on:
3 | # Manual trigger
4 | workflow_dispatch:
5 | # Check regularly the upstream every four hours
6 | schedule:
7 | - cron: "0 0,4,8,12,16,20 * * *"
8 |
9 | jobs:
10 | update-lib:
11 | name: Check libraries
12 | uses: canonical/observability/.github/workflows/charm-update-libs.yaml@v1
13 | secrets: inherit
14 |
15 |
--------------------------------------------------------------------------------
/tests/unit/test_config/alertmanager.yml:
--------------------------------------------------------------------------------
1 | global:
2 | http_config:
3 | tls_config:
4 | insecure_skip_verify: true
5 | receivers:
6 | - name: placeholder
7 | webhook_configs:
8 | - url: http://127.0.0.1:5001/
9 | route:
10 | group_by:
11 | - juju_application
12 | - juju_model
13 | - juju_model_uuid
14 | group_interval: 5m
15 | group_wait: 30s
16 | receiver: placeholder
17 | repeat_interval: 1h
18 |
--------------------------------------------------------------------------------
/terraform/main.tf:
--------------------------------------------------------------------------------
1 | resource "juju_application" "alertmanager" {
2 | name = var.app_name
3 | config = var.config
4 | constraints = var.constraints
5 | model_uuid = var.model_uuid
6 | storage_directives = var.storage_directives
7 | trust = true
8 | units = var.units
9 |
10 | charm {
11 | name = "alertmanager-k8s"
12 | channel = var.channel
13 | revision = var.revision
14 | }
15 | }
--------------------------------------------------------------------------------
/.github/workflows/quality-gates.yaml:
--------------------------------------------------------------------------------
1 | name: Quality Gates
2 |
3 | on:
4 | # Manual trigger
5 | workflow_dispatch:
6 | # Run the quality checks periodically
7 | # FIXME: adjust the frequency as needed once we have actual gates in place
8 | # schedule:
9 | # - cron: "0 0 * * Tue"
10 |
11 |
12 | jobs:
13 | quality-gates:
14 | name: Run quality gates
15 | uses: canonical/observability/.github/workflows/charm-quality-gates.yaml@v1
16 | secrets: inherit
17 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | ## Issue
2 |
3 |
4 |
5 | ## Solution
6 |
7 |
8 |
9 | ## Context
10 |
11 |
12 |
13 | ## Testing Instructions
14 |
15 |
16 |
17 | ## Upgrade Notes
18 |
19 |
--------------------------------------------------------------------------------
/tests/unit/test_config/alertmanager_with_templates.yml:
--------------------------------------------------------------------------------
1 | global:
2 | http_config:
3 | tls_config:
4 | insecure_skip_verify: true
5 | receivers:
6 | - name: placeholder
7 | webhook_configs:
8 | - url: http://127.0.0.1:5001/
9 | route:
10 | group_by:
11 | - juju_application
12 | - juju_model
13 | - juju_model_uuid
14 | group_interval: 5m
15 | group_wait: 30s
16 | receiver: placeholder
17 | repeat_interval: 1h
18 | templates:
19 | - ./tests/unit/test_config/test_templates.tmpl
20 |
--------------------------------------------------------------------------------
/.github/workflows/promote.yaml:
--------------------------------------------------------------------------------
1 | name: Promote Charm
2 |
3 | on:
4 | workflow_dispatch:
5 | inputs:
6 | promotion:
7 | type: choice
8 | description: Channel to promote from
9 | options:
10 | - edge -> beta
11 | - beta -> candidate
12 | - candidate -> stable
13 |
14 | jobs:
15 | promote:
16 | name: Promote
17 | uses: canonical/observability/.github/workflows/charm-promote.yaml@v1
18 | with:
19 | promotion: ${{ github.event.inputs.promotion }}
20 | secrets: inherit
21 |
--------------------------------------------------------------------------------
/src/prometheus_alert_rules/alertmanager_notifications_failed.rule:
--------------------------------------------------------------------------------
1 | groups:
2 | - name: AlertmanagerNotificationsFailed
3 | rules:
4 | - alert: AlertmanagerNotificationsFailed
5 | expr: rate(alertmanager_notifications_failed_total{integration=~".*"}[5m]) > 0
6 | for: 0m
7 | labels:
8 | severity: warning
9 | annotations:
10 | summary: Alertmanager notifications failure (application {{ $labels.juju_application }} in model {{ $labels.juju_model }})
11 | description: |
12 | Alertmanager notifications failure
13 | VALUE = {{ $value }}
14 | LABELS = {{ $labels }}
15 |
--------------------------------------------------------------------------------
/terraform/outputs.tf:
--------------------------------------------------------------------------------
1 | output "app_name" {
2 | value = juju_application.alertmanager.name
3 | }
4 |
5 | output "endpoints" {
6 | value = {
7 | # Requires
8 | catalogue = "catalogue",
9 | certificates = "certificates",
10 | ingress = "ingress",
11 | tracing = "tracing",
12 | remote_configuration = "remote-configuration"
13 |
14 | # Provides
15 | alerting = "alerting"
16 | karma_dashboard = "karma-dashboard"
17 | self_metrics_endpoint = "self-metrics-endpoint"
18 | grafana_dashboard = "grafana-dashboard"
19 | grafana_source = "grafana-source"
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/src/prometheus_alert_rules/alertmanager_configuration_reload_failure.rule:
--------------------------------------------------------------------------------
1 | # Based on https://awesome-prometheus-alerts.grep.to/rules.html#prometheus-self-monitoring-1
2 | groups:
3 | - name: AlertmanagerConfigurationReloadFailure
4 | rules:
5 | - alert: AlertmanagerConfigurationReloadFailure
6 | expr: alertmanager_config_last_reload_successful{} != 1
7 | for: 0m
8 | labels:
9 | severity: warning
10 | annotations:
11 | summary: Alertmanager configuration reload failure (application {{ $labels.juju_application }} in model {{ $labels.juju_model }})
12 | description: |
13 | Alertmanager configuration reload error
14 | VALUE = {{ $value }}
15 | LABELS = {{ $labels }}
16 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/enhancement_proposal.yml:
--------------------------------------------------------------------------------
1 | name: Enhancement Proposal
2 | description: File an enhancement proposal
3 | labels: ["Type: Enhancement", "Status: Triage"]
4 | body:
5 | - type: markdown
6 | attributes:
7 | value: >
8 | Thanks for taking the time to fill out this enhancement proposal! Before submitting your issue, please make
9 | sure there isn't already a prior issue concerning this. If there is, please join that discussion instead.
10 | - type: textarea
11 | id: enhancement-proposal
12 | attributes:
13 | label: Enhancement Proposal
14 | description: >
15 | Describe the enhancement you would like to see in as much detail as needed.
16 | validations:
17 | required: true
18 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | The easiest way to report a security issue is through a [Github Private Security Report](https://github.com/canonical/alertmanager-k8s-operator/security/advisories/new)
2 | with a description of the issue, the steps you took to create the issue, affected versions, and, if known, mitigations for the issue.
3 |
4 | Alternatively, to report a security issue via email, please email [security@ubuntu.com](mailto:security@ubuntu.com) with a description of the issue,
5 | the steps you took to create the issue, affected versions, and, if known, mitigations for the issue.
6 |
7 | The [Ubuntu Security disclosure and embargo policy](https://ubuntu.com/security/disclosure-policy) contains more information about what you can expect
8 | when you contact us and what we expect from you.
9 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | venv/
2 | build/
3 | *.charm
4 | *.orig
5 | .coverage
6 | **/__pycache__/
7 | *.py[cod]
8 | .hypothesis/
9 | .idea/
10 | .tox/
11 | .mypy_cache
12 | **/*.egg-info/
13 |
14 | # Exclude all .tfvars files, which are likely to contain sensitive data, such as
15 | # password, private keys, and other secrets. These should not be part of version
16 | # control as they are data points which are potentially sensitive and subject
17 | # to change depending on the environment.
18 | *.tfvars
19 | *.tfvars.json
20 |
21 | # Ignore override files as they are usually used to override resources locally and so
22 | # are not checked in
23 | .terraform
24 | override.tf
25 | override.tf.json
26 | *_override.tf
27 | *_override.tf.json
28 |
29 | # Include override files you do wish to add to version control using negated pattern
30 | # !example_override.tf
31 |
32 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
33 | # example: *tfplan*
34 |
35 | # Ignore CLI configuration files
36 | .terraformrc
37 | terraform.rc
38 | .terraform.lock.hcl
39 |
40 | *.tfstate
41 | *.tfstate.backup
--------------------------------------------------------------------------------
/tests/integration/remote_configuration_tester/charmcraft.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Canonical Ltd.
2 | # See LICENSE file for licensing details.
3 | name: remote-configuration-tester
4 | type: charm
5 | summary: A charm to test the Alertmanager Remote Configuration library
6 | description: A charm to test the Alertmanager Remote Configuration library
7 |
8 | platforms:
9 | ubuntu@24.04:amd64:
10 |
11 | parts:
12 | charm:
13 | source: .
14 | plugin: uv
15 | build-packages: [git]
16 | build-snaps: [astral-uv]
17 |
18 | containers:
19 | remote-configuration-tester:
20 | resource: remote-configuration-tester-image
21 | mounts:
22 | - storage: config
23 | location: /etc/alertmanager
24 |
25 | storage:
26 | config:
27 | type: filesystem
28 | location: /etc/alertmanager
29 |
30 | resources:
31 | remote-configuration-tester-image:
32 | type: oci-image
33 | description: upstream docker image for remote-configuration-tester
34 | upstream-source: python:slim
35 |
36 | provides:
37 | remote-configuration:
38 | interface: alertmanager_remote_configuration
39 |
40 | config:
41 | options:
42 | config_file:
43 | type: string
44 | default: ""
45 | description: |
46 | Alertmanager configuration file (yaml).
47 |
--------------------------------------------------------------------------------
/tests/unit/conftest.py:
--------------------------------------------------------------------------------
1 | from unittest.mock import patch
2 |
3 | import pytest
4 | from charms.tempo_coordinator_k8s.v0.charm_tracing import charm_tracing_disabled
5 | from ops.testing import Context
6 |
7 | from src.alertmanager import WorkloadManager
8 | from src.charm import AlertmanagerCharm
9 |
10 |
11 | @pytest.fixture(autouse=True)
12 | def patch_buffer_file_for_charm_tracing(tmp_path):
13 | with patch(
14 | "charms.tempo_coordinator_k8s.v0.charm_tracing.BUFFER_DEFAULT_CACHE_FILE_NAME",
15 | str(tmp_path / "foo.json"),
16 | ):
17 | yield
18 |
19 |
20 | @pytest.fixture(autouse=True)
21 | def silence_tracing():
22 | with charm_tracing_disabled():
23 | yield
24 |
25 |
26 | def tautology(*_, **__) -> bool:
27 | return True
28 |
29 |
30 | @pytest.fixture(autouse=True)
31 | def alertmanager_charm():
32 | with patch("lightkube.core.client.GenericSyncClient"), patch.multiple(
33 | "charm.KubernetesComputeResourcesPatch",
34 | _namespace="test-namespace",
35 | _patch=tautology,
36 | is_ready=tautology,
37 | ), patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", "")), patch.object(
38 | WorkloadManager, "_alertmanager_version", property(lambda *_: "0.0.0")
39 | ), patch("subprocess.run"):
40 | yield AlertmanagerCharm
41 |
42 |
43 | @pytest.fixture(scope="function")
44 | def context(alertmanager_charm):
45 | return Context(charm_type=alertmanager_charm)
46 |
--------------------------------------------------------------------------------
/terraform/variables.tf:
--------------------------------------------------------------------------------
1 | variable "app_name" {
2 | description = "Name to give the deployed application"
3 | type = string
4 | default = "alertmanager"
5 | }
6 |
7 | variable "channel" {
8 | description = "Channel that the charm is deployed from"
9 | type = string
10 | }
11 |
12 | variable "config" {
13 | description = "Map of the charm configuration options"
14 | type = map(string)
15 | default = {}
16 | }
17 |
18 | # We use constraints to set AntiAffinity in K8s
19 | # https://discourse.charmhub.io/t/pod-priority-and-affinity-in-juju-charms/4091/13?u=jose
20 | variable "constraints" {
21 | description = "String listing constraints for this application"
22 | type = string
23 | # FIXME: Passing an empty constraints value to the Juju Terraform provider currently
24 | # causes the operation to fail due to https://github.com/juju/terraform-provider-juju/issues/344
25 | default = "arch=amd64"
26 | }
27 |
28 | variable "model_uuid" {
29 | description = "Reference to an existing model resource or data source for the model to deploy to"
30 | type = string
31 | }
32 |
33 | variable "revision" {
34 | description = "Revision number of the charm"
35 | type = number
36 | default = null
37 | }
38 |
39 | variable "storage_directives" {
40 | description = "Map of storage used by the application, which defaults to 1 GB, allocated by Juju"
41 | type = map(string)
42 | default = {}
43 | }
44 |
45 | variable "units" {
46 | description = "Unit count/scale"
47 | type = number
48 | default = 1
49 | }
50 |
--------------------------------------------------------------------------------
/tests/manual/bundle_1_e2e_tls.yaml:
--------------------------------------------------------------------------------
1 | bundle: kubernetes
2 | applications:
3 | alertmanager:
4 | charm: ../../alertmanager-k8s_ubuntu-20.04-amd64.charm
5 | series: focal
6 | resources:
7 | alertmanager-image: ghcr.io/canonical/alertmanager:0.25.0
8 | scale: 1
9 | trust: true
10 | prometheus:
11 | charm: prometheus-k8s
12 | channel: edge
13 | scale: 1
14 | trust: true
15 | avalanche:
16 | # The avalanche charm has always-firing alerts that can be used to verify prometheus is able to
17 | # post alerts to alertmanager.
18 | charm: avalanche-k8s
19 | channel: edge
20 | scale: 1
21 | trust: true
22 | options:
23 | metric_count: 10
24 | series_count: 2
25 | local-ca:
26 | charm: self-signed-certificates
27 | channel: edge
28 | scale: 1
29 | external-ca:
30 | charm: self-signed-certificates
31 | channel: edge
32 | scale: 1
33 | traefik:
34 | charm: traefik-k8s
35 | channel: edge
36 | series: focal
37 | scale: 1
38 | trust: true
39 | relations:
40 | - - traefik:ingress
41 | - alertmanager:ingress
42 | - - local-ca:send-ca-cert
43 | - traefik:receive-ca-cert
44 | - - local-ca:certificates
45 | - alertmanager:certificates
46 | - - local-ca:certificates
47 | - prometheus:certificates
48 | - - traefik:certificates
49 | - external-ca:certificates
50 | - - alertmanager:alerting
51 | - prometheus:alertmanager
52 | - - traefik:ingress-per-unit
53 | - prometheus:ingress
54 | - - alertmanager:self-metrics-endpoint
55 | - prometheus:metrics-endpoint
56 | - - avalanche:metrics-endpoint
57 | - prometheus:metrics-endpoint
58 |
--------------------------------------------------------------------------------
/tests/integration/remote_configuration_tester/pyproject.toml:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Canonical Ltd.
2 | # See LICENSE file for licensing details.
3 | [project]
4 | name = "remote-configuration-tester"
5 | version = "0.0"
6 | requires-python = "~=3.8"
7 |
8 | dependencies = [
9 | "ops",
10 | "pyyaml",
11 | "jsonschema",
12 | "requests",
13 | ]
14 |
15 | # Testing tools configuration
16 | [tool.coverage.run]
17 | branch = true
18 |
19 | [tool.coverage.report]
20 | show_missing = true
21 |
22 | # Formatting tools configuration
23 | [tool.black]
24 | line-length = 99
25 | target-version = ["py38"]
26 |
27 | # Linting tools configuration
28 | [tool.ruff]
29 | line-length = 99
30 | extend-exclude = ["__pycache__", "*.egg_info"]
31 |
32 | [tool.ruff.lint]
33 | select = ["E", "W", "F", "C", "N", "R", "D", "I001"]
34 | # Ignore E501 because using black creates errors with this
35 | # Ignore D107 Missing docstring in __init__
36 | ignore = ["E501", "D107", "N818", "RET504"]
37 | # D100, D101, D102, D103: Ignore missing docstrings in tests
38 | per-file-ignores = {"tests/*" = ["D100","D101","D102","D103"]}
39 |
40 | [tool.ruff.lint.pydocstyle]
41 | convention = "google"
42 |
43 | # Static analysis tools configuration
44 | [tool.pyright]
45 | extraPaths = ["src", "lib"]
46 | pythonVersion = "3.8"
47 | pythonPlatform = "All"
48 | exclude = [
49 | "tests/integration/remote_configuration_tester/**",
50 | ]
51 |
52 | [tool.pytest.ini_options]
53 | minversion = "6.0"
54 | log_cli_level = "INFO"
55 | asyncio_mode = "auto"
56 | addopts = "--tb=native --verbose --capture=no --log-cli-level=INFO"
57 |
58 | [tool.codespell]
59 | skip = ".git,.tox,build,venv*"
60 | ignore-words-list = "assertIn"
61 |
--------------------------------------------------------------------------------
/tests/integration/test_kubectl_delete.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2022 Canonical Ltd.
3 | # See LICENSE file for licensing details.
4 |
5 |
6 | import logging
7 | from pathlib import Path
8 |
9 | import pytest
10 | import sh
11 | import yaml
12 | from helpers import is_alertmanager_up
13 | from pytest_operator.plugin import OpsTest
14 |
15 | logger = logging.getLogger(__name__)
16 |
17 | METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
18 | app_name = METADATA["name"]
19 | resources = {"alertmanager-image": METADATA["resources"]["alertmanager-image"]["upstream-source"]}
20 |
21 |
22 | @pytest.mark.abort_on_fail
23 | async def test_deploy_from_local_path(ops_test: OpsTest, charm_under_test):
24 | """Deploy the charm-under-test."""
25 | assert ops_test.model
26 | logger.debug("deploy local charm")
27 |
28 | await ops_test.model.deploy(
29 | charm_under_test, application_name=app_name, resources=resources, trust=True
30 | )
31 | await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000)
32 | await is_alertmanager_up(ops_test, app_name)
33 |
34 |
35 | @pytest.mark.abort_on_fail
36 | async def test_kubectl_delete_pod(ops_test: OpsTest):
37 | assert ops_test.model
38 | assert ops_test.model_name
39 | pod_name = f"{app_name}-0"
40 |
41 | sh.kubectl.delete.pod(pod_name, namespace=ops_test.model_name) # pyright: ignore
42 |
43 | application = ops_test.model.applications[app_name]
44 | assert application
45 | await ops_test.model.block_until(lambda: len(application.units) > 0)
46 | await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000)
47 | assert await is_alertmanager_up(ops_test, app_name)
48 |
--------------------------------------------------------------------------------
/tests/integration/remote_configuration_tester/src/charm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2022 Canonical Ltd.
3 | # See LICENSE file for licensing details.
4 |
5 | """A Charm to functionally test the Alertmanager Operator."""
6 |
7 | import logging
8 | import typing
9 |
10 | from charms.alertmanager_k8s.v0.alertmanager_remote_configuration import (
11 | ConfigReadError,
12 | RemoteConfigurationProvider,
13 | )
14 | from ops.charm import CharmBase, PebbleReadyEvent
15 | from ops.main import main
16 | from ops.model import ActiveStatus, WaitingStatus
17 |
18 | logger = logging.getLogger(__name__)
19 |
20 |
21 | class AlertmanagerTesterCharm(CharmBase):
22 | """A Charm to functionally test the Alertmanager Operator."""
23 |
24 | ALERTMANAGER_CONFIG_FILE = "/etc/alertmanager/alertmanager.yml"
25 |
26 | def __init__(self, *args):
27 | super().__init__(*args)
28 | self.container = self.unit.get_container("remote-configuration-tester")
29 |
30 | try:
31 | self.remote_configuration_consumer = RemoteConfigurationProvider.with_config_file(
32 | charm=self, config_file=self.ALERTMANAGER_CONFIG_FILE
33 | )
34 | except ConfigReadError:
35 | pass
36 |
37 | self.framework.observe(self.on.remote_configuration_tester_pebble_ready, self._on_ready)
38 |
39 | def _on_ready(self, event: PebbleReadyEvent) -> None:
40 | if not self.container.can_connect():
41 | self.unit.status = WaitingStatus("Waiting for the container to be ready")
42 | event.defer()
43 | return
44 | self.container.push(
45 | self.ALERTMANAGER_CONFIG_FILE, typing.cast(str, self.config["config_file"])
46 | )
47 | self.unit.status = ActiveStatus()
48 |
49 |
50 | if __name__ == "__main__":
51 | main(AlertmanagerTesterCharm)
52 |
--------------------------------------------------------------------------------
/RELEASE.md:
--------------------------------------------------------------------------------
1 | # Release Process
2 |
3 | ## Overview
4 |
5 | At any given time there are three revisions of the Alertmanager charm [available on CharmHub.io](https://charmhub.io/alertmanager-k8s), for each of the following channels:
6 |
7 | 1. `latest/stable` is a well tested production ready version of the Charm.
8 | 2. `latest/candidate` is a feature ready next version of the stable release, currently in testing.
9 | 3. `latest/edge` is the bleeding edge developer version of the charm. While we really try not to, it may break and introduce regressions.
10 |
11 | Currently, the Alertmanager charm does not make use of the `latest/beta` channel.
12 | For more information about CharmHub channels, refer to the [Juju charm store](https://discourse.charmhub.io/t/the-juju-charm-store) documentation.
13 |
14 | ## When to create which revisions
15 |
16 | * **Stable revisions** are done in consultation with product manager and engineering manager when the `candidate` revision has been well tested and is deemed ready for production.
17 | * **Candidate revisions** are done when the charm reaches a state of feature completion with respect to the next planned `stable` release.
18 | * **Edge revisions** are released at the developer's discretion, potentially every time something is merged into `main` and the unit tests pass.
19 |
20 | ## How to publish revisions
21 |
22 | Refer to the [Publish your operator in Charmhub](https://discourse.charmhub.io/t/publish-your-operator-in-charmhub) documentation.
23 | After a `latest/stable` release, it is expected that the version of the charm is the same as the one in `latest/candidate`, and those two channels will diverge again when we are ramping up through `latest/candidate` releases for a new `latest/stable` release.
24 |
25 | ## A note on granularity of revisions
26 |
27 | We believe in shipping often and with confidence.
28 | It is perfectly acceptable to have a new `latest/stable` release containing just one bug fix or a small new feature with respect to the last one.
29 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Canonical Ltd.
2 | # See LICENSE file for licensing details.
3 |
4 | [tox]
5 | skipsdist=True
6 | skip_missing_interpreters = True
7 | envlist = lint, static, unit
8 |
9 | [vars]
10 | src_path = {toxinidir}/src
11 | tst_path = {toxinidir}/tests
12 | lib_path = {toxinidir}/lib/charms/alertmanager_k8s
13 | all_path = {[vars]src_path} {[vars]tst_path} {[vars]lib_path}
14 | uv_flags = --frozen --isolated --extra=dev
15 |
16 | [testenv]
17 | allowlist_externals = uv
18 | basepython = python3
19 | setenv =
20 | PYTHONPATH = {toxinidir}:{toxinidir}/lib:{[vars]src_path}
21 | PYTHONBREAKPOINT=ipdb.set_trace
22 | PY_COLORS=1
23 | passenv =
24 | PYTHONPATH
25 | CHARM_PATH
26 |
27 | [testenv:lock]
28 | description = Update uv.lock with the latest deps
29 | commands =
30 | uv lock --upgrade --no-cache
31 |
32 | [testenv:lint]
33 | description = Lint the code
34 | commands =
35 | uv run {[vars]uv_flags} ruff check {[vars]all_path}
36 |
37 | [testenv:static]
38 | description = Run static checks
39 | allowlist_externals =
40 | {[testenv]allowlist_externals}
41 | /usr/bin/env
42 | commands =
43 | uv run {[vars]uv_flags} pyright {[vars]all_path}
44 | /usr/bin/env sh -c 'for m in $(git diff main --name-only {[vars]lib_path}); do if ! git diff main $m | grep -q "+LIBPATCH\|+LIBAPI"; then echo "You forgot to bump the version on $m!"; exit 1; fi; done'
45 |
46 | [testenv:fmt]
47 | description = "Format the code"
48 | commands =
49 | uv run {[vars]uv_flags} ruff check --fix-only {[vars]all_path}
50 |
51 | [testenv:unit]
52 | description = Run unit tests
53 | setenv =
54 | {[testenv]setenv}
55 | JUJU_VERSION=3.0.3
56 | passenv =
57 | PYTHONPATH
58 | allowlist_externals =
59 | {[testenv]allowlist_externals}
60 | /usr/bin/env
61 | commands =
62 | uv run {[vars]uv_flags} coverage run --source={[vars]src_path},{[vars]lib_path} -m pytest \
63 | {[vars]tst_path}/unit {posargs}
64 | uv run {[vars]uv_flags} coverage report
65 |
66 | [testenv:integration]
67 | description = Run integration tests
68 | commands =
69 | uv run {[vars]uv_flags} pytest --exitfirst {[vars]tst_path}/integration {posargs}
70 |
--------------------------------------------------------------------------------
/tests/unit/test_brute_isolated.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Canonical Ltd.
2 | # See LICENSE file for licensing details.
3 |
4 | from unittest.mock import patch
5 |
6 | import pytest
7 | from helpers import add_relation_sequence, begin_with_initial_hooks_isolated
8 | from ops.testing import Context, Relation, State
9 |
10 | """Some brute-force tests, so that other tests can remain focused."""
11 |
12 |
13 | def test_startup_shutdown_sequence(context: Context):
14 | state = begin_with_initial_hooks_isolated(context)
15 | state = context.run(context.on.update_status(), state)
16 |
17 | for peer_rel in state.get_relations("replicas"):
18 | state = context.run(context.on.relation_departed(peer_rel, remote_unit=2), state)
19 |
20 | state = context.run(context.on.stop(), state)
21 | context.run(context.on.remove(), state)
22 |
23 |
24 | @pytest.mark.parametrize("fqdn", ["localhost", "am-0.endpoints.cluster.local"])
25 | @pytest.mark.parametrize("leader", [True, False])
26 | class TestAlertingRelationDataUniformity:
27 | """Scenario: The charm is related to several different prometheus apps."""
28 |
29 | @pytest.fixture
30 | def post_startup(self, context, fqdn, leader) -> State:
31 | with patch("socket.getfqdn", new=lambda *args: fqdn):
32 | state = begin_with_initial_hooks_isolated(context, leader=leader)
33 |
34 | # Add several relations TODO: how to obtain the next rel_id automatically?
35 | prom_rels = [Relation("alerting", id=rel_id) for rel_id in (10, 11, 12)]
36 | for prom_rel in prom_rels:
37 | state = add_relation_sequence(context, state, prom_rel)
38 | return state
39 |
40 | def test_relation_data_is_the_same_for_all_related_apps(self, post_startup, fqdn):
41 | # GIVEN an isolated alertmanager charm after the startup sequence is complete
42 | state = post_startup
43 |
44 | # THEN the "alerting" relation data has the same contents for all related apps
45 | relations = state.get_relations("alerting")
46 | for i in range(1, len(relations)):
47 | assert relations[0].local_unit_data == relations[i].local_unit_data
48 | assert relations[0].local_app_data == relations[i].local_app_data
49 |
--------------------------------------------------------------------------------
/terraform/README.md:
--------------------------------------------------------------------------------
1 | # Terraform module for alertmanager-k8s
2 |
3 | This is a Terraform module facilitating the deployment of alertmanager-k8s, using the [Terraform juju provider](https://github.com/juju/terraform-provider-juju/). For more information, refer to the provider [documentation](https://registry.terraform.io/providers/juju/juju/latest/docs).
4 |
5 |
6 | ## Requirements
7 |
8 | | Name | Version |
9 | |------|---------|
10 | | [terraform](#requirement\_terraform) | >= 1.5 |
11 | | [juju](#requirement\_juju) | ~> 1.0 |
12 |
13 | ## Providers
14 |
15 | | Name | Version |
16 | |------|---------|
17 | | [juju](#provider\_juju) | ~> 1.0 |
18 |
19 | ## Modules
20 |
21 | No modules.
22 |
23 | ## Inputs
24 |
25 | | Name | Description | Type | Default | Required |
26 | |------|-------------|------|---------|:--------:|
27 | | [app\_name](#input\_app\_name) | Name to give the deployed application | `string` | `"alertmanager"` | no |
28 | | [channel](#input\_channel) | Channel that the charm is deployed from | `string` | n/a | yes |
29 | | [config](#input\_config) | Map of the charm configuration options | `map(string)` | `{}` | no |
30 | | [constraints](#input\_constraints) | String listing constraints for this application | `string` | `"arch=amd64"` | no |
31 | | [model\_uuid](#input\_model\_uuid) | Reference to an existing model resource or data source for the model to deploy to | `string` | n/a | yes |
32 | | [revision](#input\_revision) | Revision number of the charm | `number` | `null` | no |
33 | | [storage\_directives](#input\_storage\_directives) | Map of storage used by the application, which defaults to 1 GB, allocated by Juju | `map(string)` | `{}` | no |
34 | | [units](#input\_units) | Unit count/scale | `number` | `1` | no |
35 |
36 | ## Outputs
37 |
38 | | Name | Description |
39 | |------|-------------|
40 | | [app\_name](#output\_app\_name) | n/a |
41 | | [endpoints](#output\_endpoints) | n/a |
42 |
43 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
1 | name: Bug Report
2 | description: File a bug report
3 | labels: ["Type: Bug", "Status: Triage"]
4 | body:
5 | - type: markdown
6 | attributes:
7 | value: >
8 | Thanks for taking the time to fill out this bug report! Before submitting your issue, please make
9 | sure you are using the latest version of the charm. If not, please try upgrading to the latest edge release prior to
10 | posting your report to make sure it's not already solved.
11 | - type: textarea
12 | id: bug-description
13 | attributes:
14 | label: Bug Description
15 | description: >
16 | If applicable, add screenshots to
17 | help explain the problem you are facing.
18 | validations:
19 | required: true
20 | - type: textarea
21 | id: reproduction
22 | attributes:
23 | label: To Reproduce
24 | description: >
25 | Please provide the output of `juju export-bundle` and step-by-step instructions for how to reproduce the behavior.
26 | A deployment diagram could be handy too. See https://discourse.charmhub.io/t/9269 for examples.
27 | placeholder: |
28 | 1. `juju deploy ...`
29 | 2. `juju relate ...`
30 | 3. `juju status --relations`
31 | validations:
32 | required: true
33 | - type: textarea
34 | id: environment
35 | attributes:
36 | label: Environment
37 | description: >
38 | We need to know a bit more about the context in which you run the charm.
39 | - Are you running Juju locally, on lxd, in multipass or on some other platform?
40 | - What track and channel you deployed the charm from (ie. `latest/edge` or similar).
41 | - Version of any applicable components, like the juju snap, the model controller, lxd, microk8s, and/or multipass.
42 | validations:
43 | required: true
44 | - type: textarea
45 | id: logs
46 | attributes:
47 | label: Relevant log output
48 | description: >
49 | Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
50 | Fetch the logs using `juju debug-log --replay` and `kubectl logs ...`. Additional details available in the juju docs
51 | at https://juju.is/docs/olm/juju-logs
52 | render: shell
53 | validations:
54 | required: true
55 | - type: textarea
56 | id: additional-context
57 | attributes:
58 | label: Additional context
59 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Canonical Ltd.
2 | # See LICENSE file for licensing details.
3 | [project]
4 | name = "alertmanager-k8s"
5 | version = "0.0"
6 | requires-python = "~=3.8"
7 |
8 | dependencies = [
9 | "ops",
10 | "pyyaml",
11 | "lightkube>=0.11", # observability_libs
12 | "lightkube-models", # observability_libs
13 | "jsonschema", # traefik_k8s, tls_certificates
14 | "cryptography", # tls_certificates
15 | "pydantic>=2", # traefik_k8s.v2.ingress
16 | "opentelemetry-exporter-otlp-proto-http>=1.21.0", # tracing
17 | "tenacity",
18 | "cosl",
19 | "charmed-service-mesh-helpers>=0.2.0",
20 | "lightkube-extensions@git+https://github.com/canonical/lightkube-extensions.git@main",
21 | ]
22 |
23 | [project.optional-dependencies]
24 | dev = [
25 | # Linting
26 | "ruff",
27 | "codespell",
28 | # Static
29 | "pyright<1.1.399", # 1.1.399 vendors typeshed that dropped Python 3.8 support
30 | # Unit
31 | "pytest",
32 | "coverage[toml]",
33 | "deepdiff",
34 | "hypothesis",
35 | "validators>=0.21.2",
36 | "ops[testing]",
37 | "pytest-interface-tester>0.3",
38 | # Integration
39 | "juju<=3.3.0,>=3.0",
40 | "websockets<14.0",
41 | "pytest-operator",
42 | "pytest-httpserver",
43 | "sh",
44 | ]
45 |
46 | # Testing tools configuration
47 | [tool.coverage.run]
48 | branch = true
49 |
50 | [tool.coverage.report]
51 | show_missing = true
52 |
53 | # Formatting tools configuration
54 | [tool.black]
55 | line-length = 99
56 | target-version = ["py38"]
57 |
58 | # Linting tools configuration
59 | [tool.ruff]
60 | line-length = 99
61 | extend-exclude = ["__pycache__", "*.egg_info"]
62 |
63 | [tool.ruff.lint]
64 | select = ["E", "W", "F", "C", "N", "R", "D", "I001"]
65 | # Ignore E501 because using black creates errors with this
66 | # Ignore D107 Missing docstring in __init__
67 | ignore = ["E501", "D107", "N818", "RET504"]
68 | # D100, D101, D102, D103: Ignore missing docstrings in tests
69 | per-file-ignores = {"tests/*" = ["D100","D101","D102","D103"]}
70 |
71 | [tool.ruff.lint.pydocstyle]
72 | convention = "google"
73 |
74 | # Static analysis tools configuration
75 | [tool.pyright]
76 | extraPaths = ["src", "lib"]
77 | pythonVersion = "3.8"
78 | pythonPlatform = "All"
79 | exclude = [
80 | "tests/integration/remote_configuration_tester/**",
81 | ]
82 |
83 | [tool.pytest.ini_options]
84 | minversion = "6.0"
85 | log_cli_level = "INFO"
86 | asyncio_mode = "auto"
87 | addopts = "--tb=native --verbose --capture=no --log-cli-level=INFO"
88 |
89 | [tool.codespell]
90 | skip = ".git,.tox,build,venv*"
91 | ignore-words-list = "assertIn"
92 |
--------------------------------------------------------------------------------
/icon.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/tests/integration/test_grafana_source.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import logging
3 | from pathlib import Path
4 |
5 | import pytest
6 | import yaml
7 | from helpers import grafana_datasources
8 | from pytest_operator.plugin import OpsTest
9 | from tenacity import retry, stop_after_attempt, wait_fixed
10 |
11 | # pyright: reportAttributeAccessIssue = false
12 | # pyright: reportOptionalMemberAccess = false
13 |
14 | logger = logging.getLogger(__name__)
15 |
16 | METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
17 | app_name = METADATA["name"]
18 | resources = {"alertmanager-image": METADATA["resources"]["alertmanager-image"]["upstream-source"]}
19 |
20 | """We need to ensure that, even if there are multiple units for Alertmanager, only one is shown as a datasouce in Grafana.
21 | We use this test to simulate multiple units of Alertmanager, and then check that only the leader has the key `grafana_source_host` written to relation data with Grafana.
22 | """
23 |
24 | @pytest.mark.abort_on_fail
25 | async def test_build_and_deploy(ops_test: OpsTest, charm_under_test):
26 | """Build the charm-under-test, deploy the charm from charmhub, and upgrade from path."""
27 | await asyncio.gather(
28 | ops_test.model.deploy(charm_under_test, "am", resources=resources, trust=True, num_units=2),
29 | ops_test.model.deploy("grafana-k8s", "grafana", channel="2/edge", trust=True),
30 | )
31 |
32 | await ops_test.model.add_relation("grafana:grafana-source", "am")
33 | await ops_test.model.wait_for_idle(apps=["am", "grafana"], status="active")
34 |
35 | @retry(wait=wait_fixed(10), stop=stop_after_attempt(6))
36 | async def test_grafana_datasources(ops_test: OpsTest):
37 | # We have 2 units of Alertmanager, but only one datasource should be shown as a Grafana source.
38 | datasources = await grafana_datasources(ops_test, "grafana")
39 | assert len(datasources) == 1
40 |
41 | # The datasource URL should point to the service, not to a specific pod unit.
42 | # This check is safe, because we name the application `am` and we're not using TLS, so the service will always start with `http://am-endpoints`.
43 | assert datasources[0]["url"].startswith("http://am-endpoints")
44 |
45 | @pytest.mark.abort_on_fail
46 | async def test_deploy_and_integrate_traefik(ops_test: OpsTest):
47 | """Build the charm-under-test, deploy the charm from charmhub, and upgrade from path."""
48 | await ops_test.model.deploy("traefik-k8s", "traefik", channel="edge", trust=True)
49 |
50 | await ops_test.model.add_relation("traefik:ingress", "am")
51 | await ops_test.model.wait_for_idle(apps=["am", "grafana", "traefik"], status="active")
52 |
53 | async def test_grafana_datasources_when_ingress_available(ops_test: OpsTest):
54 | # We have 2 units of Alertmanager, but only one datasource should be shown as a Grafana source.
55 | datasources = await grafana_datasources(ops_test, "grafana")
56 | assert len(datasources) == 1
57 |
58 | assert "am-endpoints" not in datasources[0]["url"]
59 |
--------------------------------------------------------------------------------
/tests/integration/test_persistence.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2022 Canonical Ltd.
3 | # See LICENSE file for licensing details.
4 |
5 | import logging
6 | from datetime import datetime, timedelta, timezone
7 | from pathlib import Path
8 |
9 | import pytest
10 | import sh
11 | import yaml
12 | from helpers import get_unit_address, is_alertmanager_up
13 | from pytest_operator.plugin import OpsTest
14 |
15 | from src.alertmanager_client import Alertmanager
16 |
17 | # pyright: reportAttributeAccessIssue = false
18 |
19 | logger = logging.getLogger(__name__)
20 |
21 | METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
22 | app_name = METADATA["name"]
23 | resources = {"alertmanager-image": METADATA["resources"]["alertmanager-image"]["upstream-source"]}
24 |
25 |
26 | @pytest.mark.abort_on_fail
27 | async def test_silences_persist_across_upgrades(ops_test: OpsTest, charm_under_test, httpserver):
28 | assert ops_test.model
29 | # deploy alertmanager charm from charmhub
30 | logger.info("deploy charm from charmhub")
31 | sh.juju.deploy("alertmanager-k8s", model=ops_test.model.name, channel="2/edge", trust=True)
32 | await ops_test.model.wait_for_idle(
33 | apps=[app_name], status="active", timeout=1000, raise_on_error=False
34 | )
35 | await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=30)
36 |
37 | # set a silencer for an alert and check it is set
38 | unit_address = await get_unit_address(ops_test, app_name, 0)
39 | alertmanager = Alertmanager(f"http://{unit_address}:9093")
40 |
41 | silence_start = datetime.now(timezone.utc)
42 | silence_end = silence_start + timedelta(minutes=30)
43 | matchers = [
44 | {
45 | "name": "alertname",
46 | "value": "fake-alert",
47 | "isRegex": False,
48 | }
49 | ]
50 | alertmanager.set_silences(matchers, silence_start, silence_end)
51 | silences_before = alertmanager.get_silences()
52 | assert len(silences_before)
53 |
54 | application = ops_test.model.applications[app_name]
55 | assert application
56 | await ops_test.model.block_until(lambda: len(application.units) > 0)
57 | await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000)
58 | assert await is_alertmanager_up(ops_test, app_name)
59 |
60 | # upgrade alertmanger using charm built locally
61 | logger.info("upgrade deployed charm with local charm %s", charm_under_test)
62 | sh.juju.refresh(app_name, model=ops_test.model.name, path=charm_under_test)
63 | await ops_test.model.wait_for_idle(
64 | apps=[app_name], status="active", timeout=1000, raise_on_error=False
65 | )
66 | await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=30)
67 | assert await is_alertmanager_up(ops_test, app_name)
68 |
69 | # check silencer is still set
70 | unit_address = await get_unit_address(ops_test, app_name, 0)
71 | alertmanager = Alertmanager(f"http://{unit_address}:9093")
72 | silences_after = alertmanager.get_silences()
73 | assert len(silences_after)
74 |
75 | assert silences_before == silences_after
76 |
--------------------------------------------------------------------------------
/tests/unit/helpers.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2021 Canonical Ltd.
3 | # See LICENSE file for licensing details.
4 |
5 | """Helper functions for writing tests."""
6 |
7 | import dataclasses
8 | from unittest.mock import patch
9 |
10 | from ops.testing import Container, Context, Exec, PeerRelation, Relation, State
11 |
12 |
13 | def no_op(*_, **__) -> None:
14 | pass
15 |
16 |
17 | def tautology(*_, **__) -> bool:
18 | return True
19 |
20 |
21 | def cli_arg(plan, cli_opt):
22 | plan_dict = plan.to_dict()
23 | args = plan_dict["services"]["alertmanager"]["command"].split()
24 | for arg in args:
25 | opt_list = arg.split("=")
26 | if len(opt_list) == 2 and opt_list[0] == cli_opt:
27 | return opt_list[1]
28 | if len(opt_list) == 1 and opt_list[0] == cli_opt:
29 | return opt_list[0]
30 | return None
31 |
32 |
33 | k8s_resource_multipatch = patch.multiple(
34 | "charm.KubernetesComputeResourcesPatch",
35 | _namespace="test-namespace",
36 | _patch=tautology,
37 | is_ready=tautology,
38 | )
39 |
40 |
41 | def begin_with_initial_hooks_isolated(context: Context, *, leader: bool = True) -> State:
42 | container = Container(
43 | "alertmanager",
44 | can_connect=False,
45 | execs={
46 | Exec(["update-ca-certificates", "--fresh"]),
47 | Exec(
48 | ["alertmanager", "--version"],
49 | stdout="alertmanager, version 0.23.0 (branch: HEAD, ...",
50 | ),
51 | Exec(["/usr/bin/amtool", "check-config", "/etc/alertmanager/alertmanager.yml"]),
52 | },
53 | )
54 | state = State(config={"config_file": ""}, containers=[container])
55 | peer_rel = PeerRelation("replicas")
56 |
57 | state = context.run(context.on.install(), state)
58 |
59 | state = dataclasses.replace(state, relations=[peer_rel])
60 | state = context.run(context.on.relation_created(peer_rel), state)
61 |
62 | if leader:
63 | state = dataclasses.replace(state, leader=True)
64 | state = context.run(context.on.leader_elected(), state)
65 | else:
66 | state = dataclasses.replace(state, leader=False)
67 |
68 | state = context.run(context.on.config_changed(), state)
69 |
70 | # state = state.with_can_connect("alertmanger")
71 | container = dataclasses.replace(container, can_connect=True)
72 | state = dataclasses.replace(state, containers=[container])
73 | state = context.run(context.on.pebble_ready(container), state)
74 |
75 | state = context.run(context.on.start(), state)
76 |
77 | return state
78 |
79 |
80 | def add_relation_sequence(context: Context, state: State, relation: Relation):
81 | """Helper to simulate a relation-added sequence."""
82 | # TODO consider adding to scenario.sequences
83 | state_with_relation = dataclasses.replace(state, relations={*state.relations, relation})
84 | state_after_relation_created = context.run(context.on.relation_created(relation), state_with_relation)
85 | state_after_relation_joined = context.run(context.on.relation_joined(relation), state_after_relation_created)
86 | state_after_relation_changed = context.run(
87 | context.on.relation_changed(state_after_relation_joined.get_relation(relation.id)),
88 | state_after_relation_joined,
89 | )
90 | return state_after_relation_changed
91 |
--------------------------------------------------------------------------------
/tests/integration/conftest.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2021 Canonical Ltd.
3 | # See LICENSE file for licensing details.
4 |
5 | import functools
6 | import logging
7 | import os
8 | import socket
9 | from collections import defaultdict
10 | from datetime import datetime
11 | from pathlib import Path
12 |
13 | import juju.utils
14 | import pytest
15 | from pytest_operator.plugin import OpsTest
16 |
17 | PYTEST_HTTP_SERVER_PORT = 8000
18 | logger = logging.getLogger(__name__)
19 |
20 |
21 | class Store(defaultdict):
22 | def __init__(self):
23 | super(Store, self).__init__(Store)
24 |
25 | def __getattr__(self, key):
26 | """Override __getattr__ so dot syntax works on keys."""
27 | try:
28 | return self[key]
29 | except KeyError:
30 | raise AttributeError(key)
31 |
32 | def __setattr__(self, key, value):
33 | """Override __setattr__ so dot syntax works on keys."""
34 | self[key] = value
35 |
36 |
37 | store = Store()
38 |
39 |
40 | def timed_memoizer(func):
41 | @functools.wraps(func)
42 | async def wrapper(*args, **kwargs):
43 | fname = func.__qualname__
44 | logger.info("Started: %s" % fname)
45 | start_time = datetime.now()
46 | if fname in store.keys():
47 | ret = store[fname]
48 | else:
49 | logger.info("Return for {} not cached".format(fname))
50 | ret = await func(*args, **kwargs)
51 | store[fname] = ret
52 | logger.info("Finished: {} in: {} seconds".format(fname, datetime.now() - start_time))
53 | return ret
54 |
55 | return wrapper
56 |
57 |
58 | @pytest.fixture(scope="module")
59 | @timed_memoizer
60 | async def charm_under_test(ops_test: OpsTest) -> Path:
61 | """Charm used for integration testing."""
62 | if charm_file := os.environ.get("CHARM_PATH"):
63 | return Path(charm_file)
64 |
65 | path_to_built_charm = await ops_test.build_charm(".", verbosity="debug")
66 | return path_to_built_charm
67 |
68 |
69 | @pytest.fixture(scope="session")
70 | def httpserver_listen_address():
71 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
72 | s.settimeout(0)
73 | try:
74 | # ip address does not need to be reachable
75 | s.connect(("8.8.8.8", 1))
76 | local_ip_address = s.getsockname()[0]
77 | except Exception:
78 | local_ip_address = "127.0.0.1"
79 | finally:
80 | s.close()
81 | return local_ip_address, PYTEST_HTTP_SERVER_PORT
82 |
83 |
84 | @pytest.fixture(autouse=True, scope="module")
85 | async def setup_env(ops_test: OpsTest):
86 | assert ops_test.model
87 | # Prevent "update-status" from interfering with the test:
88 | # - if fired "too quickly", traefik will flip between active/idle and maintenance;
89 | # - make sure charm code does not rely on update-status for correct operation.
90 | await ops_test.model.set_config(
91 | {"update-status-hook-interval": "60m", "logging-config": "=WARNING; unit=DEBUG"}
92 | )
93 |
94 |
95 | @pytest.fixture(scope="module")
96 | def temp_dir(tmp_path_factory):
97 | return tmp_path_factory.mktemp("data")
98 |
99 | @pytest.fixture(scope="module", autouse=True)
100 | def patch_pylibjuju_series_2404():
101 | juju.utils.ALL_SERIES_VERSIONS["noble"] = "24.04"
102 | juju.utils.UBUNTU_SERIES["noble"] = "24.04"
103 |
104 | yield
105 |
106 | del juju.utils.ALL_SERIES_VERSIONS["noble"]
107 | del juju.utils.UBUNTU_SERIES["noble"]
108 |
--------------------------------------------------------------------------------
/tests/unit/test_self_scrape_jobs.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2021 Canonical Ltd.
3 | # See LICENSE file for licensing details.
4 | import unittest
5 | from unittest.mock import PropertyMock, patch
6 |
7 | from helpers import k8s_resource_multipatch
8 | from ops.testing import Harness
9 |
10 | from alertmanager import WorkloadManager
11 | from charm import AlertmanagerCharm
12 |
13 |
14 | class TestWithInitialHooks(unittest.TestCase):
15 | container_name: str = "alertmanager"
16 |
17 | @patch("lightkube.core.client.GenericSyncClient")
18 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
19 | @k8s_resource_multipatch
20 | @patch.object(WorkloadManager, "_alertmanager_version", property(lambda *_: "0.0.0"))
21 | def setUp(self, *unused):
22 | self.harness = Harness(AlertmanagerCharm)
23 | self.addCleanup(self.harness.cleanup)
24 |
25 | self.harness.set_leader(True)
26 | self.app_name = "am"
27 | # Create the peer relation before running harness.begin_with_initial_hooks(), because
28 | # otherwise it will create it for you and we don't know the rel_id
29 | self.peer_rel_id = self.harness.add_relation("replicas", self.app_name)
30 |
31 | self.harness.begin_with_initial_hooks()
32 |
33 | @patch.object(AlertmanagerCharm, "_internal_url", new_callable=PropertyMock)
34 | @patch.object(AlertmanagerCharm, "_scheme", new_callable=PropertyMock)
35 | def test_self_scraping_job_with_no_peers(self, _mock_scheme, _mock_internal_url):
36 | scheme = "https"
37 | _mock_scheme.return_value = scheme
38 | url_no_scheme = f"test-internal.url:{self.harness.charm._ports.api}"
39 | _mock_internal_url.return_value = f"{scheme}://{url_no_scheme}"
40 | jobs_expected = [
41 | {
42 | "metrics_path": "/metrics",
43 | "scheme": scheme,
44 | "static_configs": [{"targets": [url_no_scheme]}],
45 | }
46 | ]
47 |
48 | jobs = self.harness.charm.self_scraping_job
49 | self.assertEqual(jobs, jobs_expected)
50 |
51 | @patch.object(WorkloadManager, "check_config")
52 | @patch.object(AlertmanagerCharm, "_internal_url", new_callable=PropertyMock)
53 | @patch.object(AlertmanagerCharm, "_scheme", new_callable=PropertyMock)
54 | def test_self_scraping_job_with_peers(
55 | self, _mock_scheme, _mock_internal_url, _mock_check_config
56 | ):
57 | scheme = "https"
58 | _mock_scheme.return_value = scheme
59 |
60 | targets = [
61 | f"test-internal-0.url:{self.harness.charm._ports.api}",
62 | f"test-internal-1.url:{self.harness.charm._ports.api}",
63 | f"test-internal-2.url:{self.harness.charm._ports.api}",
64 | ]
65 | metrics_path = "/metrics"
66 | _mock_internal_url.return_value = f"{scheme}://{targets[0]}"
67 |
68 | jobs_expected = [
69 | {
70 | "metrics_path": metrics_path,
71 | "scheme": scheme,
72 | "static_configs": [{"targets": targets}],
73 | }
74 | ]
75 |
76 | # Add peers
77 | for i, target in enumerate(targets[1:], 1):
78 | unit_name = f"{self.app_name}/{i}"
79 | self.harness.add_relation_unit(self.peer_rel_id, unit_name)
80 | self.harness.update_relation_data(
81 | self.peer_rel_id, unit_name, {"private_address": f"{scheme}://{target}"}
82 | )
83 |
84 | jobs = self.harness.charm.self_scraping_job
85 | self.assertEqual(jobs_expected, jobs)
86 |
--------------------------------------------------------------------------------
/tests/integration/test_rescale_charm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2021 Canonical Ltd.
3 | # See LICENSE file for licensing details.
4 |
5 | """This test module tests rescaling.
6 |
7 | 1. Deploys multiple units of the charm under test and waits for them to become active
8 | 2. Reset and repeat the above until the leader unit is not the zero unit
9 | 3. Scales up the application by a few units and waits for them to become active
10 | 4. Scales down the application to below the leader unit, to trigger a leadership change event
11 | """
12 |
13 | import logging
14 | from pathlib import Path
15 |
16 | import pytest
17 | import yaml
18 | from helpers import block_until_leader_elected, get_leader_unit_num, is_alertmanager_up
19 | from pytest_operator.plugin import OpsTest
20 |
21 | logger = logging.getLogger(__name__)
22 |
23 | METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
24 | app_name = METADATA["name"]
25 | resources = {"alertmanager-image": METADATA["resources"]["alertmanager-image"]["upstream-source"]}
26 |
27 |
28 | # @pytest.mark.abort_on_fail
29 | @pytest.mark.xfail
30 | async def test_deploy_multiple_units(ops_test: OpsTest, charm_under_test):
31 | """Deploy the charm-under-test."""
32 | assert ops_test.model
33 | logger.info("build charm from local source folder")
34 |
35 | logger.info("deploy charm")
36 | await ops_test.model.deploy(
37 | charm_under_test, application_name=app_name, resources=resources, num_units=10, trust=True
38 | )
39 | await block_until_leader_elected(ops_test, app_name)
40 |
41 | if await get_leader_unit_num(ops_test, app_name) == 0:
42 | # We're unlucky this time: unit/0 is the leader, which means no scale down could trigger a
43 | # leadership change event.
44 | # Fail the test instead of model.reset() and repeat, because this hangs on github actions.
45 | logger.info("Elected leader is unit/0 - resetting and repeating")
46 | assert 0, "No luck in electing a leader that is not the zero unit. Try re-running?"
47 |
48 | await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000)
49 |
50 |
51 | # @pytest.mark.abort_on_fail
52 | @pytest.mark.xfail
53 | async def test_scale_down_to_single_unit_with_leadership_change(ops_test: OpsTest):
54 | """Scale down below current leader to trigger a leadership change event."""
55 | assert ops_test.model
56 | application = ops_test.model.applications[app_name]
57 | assert application
58 | await application.scale(scale=1)
59 | await ops_test.model.wait_for_idle(
60 | apps=[app_name], status="active", timeout=1000, wait_for_exact_units=1
61 | )
62 | assert await is_alertmanager_up(ops_test, app_name)
63 |
64 |
65 | # @pytest.mark.abort_on_fail
66 | @pytest.mark.xfail
67 | async def test_scale_up_from_single_unit(ops_test: OpsTest):
68 | """Add a few more units."""
69 | assert ops_test.model
70 | application = ops_test.model.applications[app_name]
71 | assert application
72 | await application.scale(scale_change=2)
73 | await ops_test.model.wait_for_idle(
74 | apps=[app_name], status="active", timeout=1000, wait_for_exact_units=3
75 | )
76 | assert await is_alertmanager_up(ops_test, app_name)
77 |
78 |
79 | # @pytest.mark.abort_on_fail
80 | @pytest.mark.xfail
81 | async def test_scale_down_to_single_unit_without_leadership_change(ops_test):
82 | """Remove a few units."""
83 | await ops_test.model.applications[app_name].scale(scale_change=-2)
84 | await ops_test.model.wait_for_idle(
85 | apps=[app_name], status="active", timeout=1000, wait_for_exact_units=1
86 | )
87 | assert await is_alertmanager_up(ops_test, app_name)
88 |
--------------------------------------------------------------------------------
/tests/integration/test_upgrade_charm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2021 Canonical Ltd.
3 | # See LICENSE file for licensing details.
4 |
5 | """This test module tests alertmanager upgrade with and without relations present.
6 |
7 | 1. Deploy the charm under test _from charmhub_.
8 | 2. Refresh with locally built charm.
9 | 3. Add all supported relations.
10 | 4. Refresh with locally built charm.
11 | 5. Add unit and refresh again (test multi unit upgrade with relations).
12 | """
13 |
14 | import logging
15 | from pathlib import Path
16 |
17 | import pytest
18 | import sh
19 | import yaml
20 | from helpers import is_alertmanager_up
21 | from pytest_operator.plugin import OpsTest
22 |
23 | # pyright: reportAttributeAccessIssue = false
24 |
25 | logger = logging.getLogger(__name__)
26 |
27 | METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
28 | app_name = METADATA["name"]
29 | resources = {"alertmanager-image": METADATA["resources"]["alertmanager-image"]["upstream-source"]}
30 |
31 |
32 | @pytest.mark.abort_on_fail
33 | async def test_setup_env(ops_test: OpsTest):
34 | assert ops_test.model
35 | await ops_test.model.set_config(
36 | {"update-status-hook-interval": "60m", "logging-config": "=WARNING; unit=DEBUG"}
37 | )
38 |
39 |
40 | @pytest.mark.abort_on_fail
41 | async def test_upgrade_edge_with_local_in_isolation(ops_test: OpsTest, charm_under_test):
42 | """Build the charm-under-test, deploy the charm from charmhub, and upgrade from path."""
43 | logger.info("deploy charm from charmhub")
44 | assert ops_test.model
45 | sh.juju.deploy(app_name, model=ops_test.model.name, channel="2/edge", trust=True)
46 | await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000)
47 |
48 | logger.info("upgrade deployed charm with local charm %s", charm_under_test)
49 | application = ops_test.model.applications[app_name]
50 | assert application
51 | sh.juju.refresh(app_name, model=ops_test.model.name, path=charm_under_test)
52 | await ops_test.model.wait_for_idle(
53 | apps=[app_name], status="active", timeout=1000, raise_on_error=False
54 | )
55 | await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=30)
56 | assert await is_alertmanager_up(ops_test, app_name)
57 |
58 |
59 | @pytest.mark.abort_on_fail
60 | async def test_upgrade_local_with_local_with_relations(ops_test: OpsTest, charm_under_test):
61 | # Deploy related apps
62 | assert ops_test.model
63 | sh.juju.deploy(
64 | "prometheus-k8s", "prom", model=ops_test.model.name, channel="2/edge", trust=True
65 | )
66 | sh.juju.deploy("karma-k8s", "karma", model=ops_test.model.name, channel="2/edge", trust=True)
67 |
68 | # Relate apps
69 | sh.juju.relate(app_name, "prom:alertmanager", model=ops_test.model.name)
70 | sh.juju.relate(app_name, "karma", model=ops_test.model.name)
71 |
72 | # Refresh from path
73 | application = ops_test.model.applications[app_name]
74 | assert application
75 | sh.juju.refresh(app_name, model=ops_test.model.name, path=charm_under_test)
76 | await ops_test.model.wait_for_idle(
77 | apps=[app_name, "prom", "karma"],
78 | status="active",
79 | timeout=2500,
80 | raise_on_error=False,
81 | )
82 | assert await is_alertmanager_up(ops_test, app_name)
83 |
84 |
85 | @pytest.mark.abort_on_fail
86 | async def test_upgrade_with_multiple_units(ops_test: OpsTest, charm_under_test):
87 | assert ops_test.model
88 | # Add unit
89 | application = ops_test.model.applications[app_name]
90 | assert application
91 | await application.scale(scale_change=1)
92 | await ops_test.model.wait_for_idle(
93 | apps=[app_name, "prom", "karma"], status="active", timeout=1000
94 | )
95 |
96 | # Refresh from path
97 | sh.juju.refresh(app_name, model=ops_test.model.name, path=charm_under_test)
98 | await ops_test.model.wait_for_idle(
99 | apps=[app_name, "prom", "karma"], status="active", timeout=2500
100 | )
101 | assert await is_alertmanager_up(ops_test, app_name)
102 |
--------------------------------------------------------------------------------
/tests/unit/test_server_scheme.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Canonical Ltd.
2 | # See LICENSE file for licensing details.
3 |
4 | """Feature: The workload's scheme is reflected in the pebble command and in relation data.
5 |
6 | This feature spans:
7 | - manifest generation (pebble layer)
8 | - schema generation (alertmanager_dispatch provider)
9 |
10 | The alertmanager server can serve over HTTP or HTTPS. The requirer side of the relation may be
11 | design to take URL parts rather than a full URL. Prometheus takes URL parts and would need to
12 | generate its "alertmanagers" config section differently depending on the scheme.
13 | """
14 |
15 | import json
16 | from unittest.mock import patch
17 |
18 | import pytest
19 | from helpers import add_relation_sequence, begin_with_initial_hooks_isolated
20 | from ops.testing import Relation, State
21 |
22 |
23 | @pytest.mark.parametrize("fqdn", ["localhost", "am-0.endpoints.cluster.local"])
24 | @pytest.mark.parametrize("leader", [True, False])
25 | class TestServerScheme:
26 | """Scenario: The workload is deployed to operate in HTTP mode, then switched to HTTPS."""
27 |
28 | @pytest.fixture
29 | def initial_state(self, context, fqdn, leader) -> State: # pyright: ignore
30 | """This is the initial state for this test class."""
31 | # GIVEN an isolated alertmanager charm after the startup sequence is complete
32 |
33 | # No "tls-certificates" relation, no config options
34 | with patch("socket.getfqdn", new=lambda *args: fqdn):
35 | state = begin_with_initial_hooks_isolated(context, leader=leader)
36 |
37 | # Add relation
38 | prom_rel = Relation("alerting", id=10)
39 | state = add_relation_sequence(context, state, prom_rel)
40 | yield state # keep the patch active for so long as this fixture is needed # pyright:ignore
41 |
42 | def test_initial_state_has_http_scheme_in_pebble_layer(self, context, initial_state, fqdn):
43 | # THEN the pebble command has 'http' and the correct hostname in the 'web.external-url' arg
44 | container = initial_state.get_container("alertmanager")
45 | command = container.layers["alertmanager"].services["alertmanager"].command
46 | assert f"--web.external-url=http://{fqdn}:9093" in command
47 |
48 | @pytest.mark.xfail
49 | def test_pebble_layer_scheme_becomes_https_if_tls_relation_added(
50 | self, context, initial_state, fqdn
51 | ):
52 | # WHEN a tls_certificates relation joins
53 | ca = Relation(
54 | "certificates",
55 | id=100,
56 | remote_app_data={
57 | "certificates": json.dumps(
58 | [
59 | {
60 | # fixme: the problem is: instead of "placeholder" here we need a forward ref to the
61 | # CSR that AM will generate on certificates_relation_joined.
62 | # Otherwise, as it stands, charms/tls_certificates_interface/v2/tls_certificates.py:1336 will not find
63 | # this csr and ignore it. Hence no handlers are triggered.
64 | "certificate": "placeholder",
65 | "certificate_signing_request": "placeholder",
66 | "ca": "placeholder",
67 | "chain": ["first", "second"],
68 | }
69 | ]
70 | )
71 | },
72 | ) # TODO figure out how to easily figure out structure of remote data
73 | state = add_relation_sequence(context, initial_state, ca)
74 | # TODO figure out why relation-changed observer in tls_certificates is not being called
75 |
76 | # THEN the pebble command has 'https' in the 'web.external-url' arg
77 | container = state.get_container("alertmanager")
78 | command = container.layers["alertmanager"].services["alertmanager"].command
79 | assert f"--web.external-url=https://{fqdn}:9093" in command
80 |
81 | def test_alerting_relation_data_scheme(self, initial_state, fqdn):
82 | # FIXME: should rely on interface tests for this kind of test.
83 |
84 | # THEN the "alerting" relation data has 'http' and the correct hostname
85 | relation = initial_state.get_relations("alerting")[0]
86 | assert relation.local_unit_data["public_address"] == f"{fqdn}:9093"
87 | assert relation.local_unit_data["scheme"] == "http"
88 |
89 | # WHEN a tls_certificates relation joins
90 | # TODO
91 |
92 | # THEN the "alerting" relation data has 'http' and the correct hostname
93 | # TODO
94 |
95 | def test_self_monitoring_scrape_job_scheme(self, fqdn, leader):
96 | # TODO
97 | pass
98 |
--------------------------------------------------------------------------------
/INTEGRATING.md:
--------------------------------------------------------------------------------
1 | # Integrating alertmanager-k8s
2 |
3 | Alermanager can handle different types of relations in the `provides` side and in the `requires` side.
4 |
5 | ## Provides
6 |
7 | ### Alerting
8 |
9 | ```yaml
10 | alerting:
11 | interface: alertmanager_dispatch
12 | ```
13 |
14 | Over the
15 | [`alertmanager_dispatch`](https://charmhub.io/alertmanager-k8s/libraries/alertmanager_dispatch)
16 | relation interface Alermanager can be related to charms that can forward alerts to it,
17 | for example: [Prometheus][Prometheus operator], [Loki][Loki operator].
18 |
19 | ```
20 | juju relate alertmanager-k8s:alerting prometheus-k8s:alerting
21 | ```
22 |
23 | ### Karma dashboard
24 |
25 | ```yaml
26 | karma-dashboard:
27 | interface: karma_dashboard
28 | ```
29 |
30 | The [`karma_dashboard`](https://charmhub.io/karma-k8s/libraries/karma_dashboard)
31 | relation interface links an entire Alertmanager cluster to a
32 | [Karma](https://charmhub.io/karma-k8s) dashboard.
33 | Scaling alertmanager would automatically cause karma to group alerts by
34 | cluster.
35 |
36 | ```
37 | juju relate alertmanager-k8s:karma_dashboard karma-k8s:karma_dashboard
38 | ```
39 |
40 | ### Self metrics endpoint
41 |
42 |
43 | ```yaml
44 | self-metrics-endpoint:
45 | interface: prometheus_scrape
46 | ```
47 | This Alertmanager charm may forward information about its metrics endpoint and associated alert rules to a Prometheus charm over the `self-metrics-endpoint` relation using the [`prometheus_scrape`](https://charmhub.io/prometheus-k8s/libraries/prometheus_scrape) interface. In order for these metrics to be aggregated by the remote Prometheus charm all that is required is to relate the two charms as in:
48 |
49 | ```bash
50 | juju relate alertmanager-k8s:self-metrics-endpoint prometheus:metrics-endpoint
51 | ```
52 |
53 |
54 | ### Grafana dashboard
55 |
56 | ```yaml
57 | grafana-dashboard:
58 | interface: grafana_dashboard
59 | ```
60 |
61 | Over the `grafana-dashboard` relation using the [`grafana-dashboard`](https://charmhub.io/grafana-k8s/libraries/grafana_dashboard) interface, this Alertmanager charm also provides meaningful dashboards about its metrics to be shown in a [Grafana Charm ](https://charmhub.io/grafana-k8s).
62 |
63 | In order to add these dashboards to Grafana all that is required is to relate the two charms in the following way:
64 |
65 | ```bash
66 | juju relate alertmanager-k8s:grafana-dashboard grafana-k8s:grafana-dashboard
67 | ```
68 |
69 | ### Grafana source
70 |
71 | ```yaml
72 | grafana-source:
73 | interface: grafana_datasource
74 | ```
75 |
76 | This charm may provide a data source to Grafana through the `grafana-source` relation using the [`grafana_datasource`](https://charmhub.io/grafana-k8s/libraries/grafana_source) interface.
77 |
78 | ```
79 | juju relate alertmanager-k8s:grafana-source grafana-k8s:grafana-source
80 | ```
81 |
82 | ## Requires
83 |
84 |
85 | ### Ingress
86 |
87 | ```yaml
88 | ingress:
89 | interface: ingress
90 | limit: 1
91 | ```
92 |
93 | Interactions with the Alertmanager charm can not be assumed to originate within the same Juju model, let alone the same Kubernetes cluster, or even the same Juju cloud. Hence the charm also supports an Ingress relation.
94 |
95 | Since Alertmanager units automatically form a cluster, the charm only needs a "per app" Ingress. The ingress relation is available in the [traefik-k8s](https://charmhub.io/traefik-k8s) charm and this Alertmanager charm does support that relation over [`ingress`](https://charmhub.io/traefik-k8s/libraries/ingress) interface.
96 |
97 |
98 | ```
99 | juju relate alertmanager-k8s:ingress traefik-k8s:ingress
100 | ```
101 |
102 | ### Remote Configuration
103 |
104 | ```yaml
105 | remote-configuration:
106 | interface: alertmanager_remote_configuration
107 | limit: 1
108 | ```
109 |
110 | Remote Configuration relation offers the option of configuring Alertmanager via relation data.
111 | This method assumes usage of another charm providing the configuration
112 | (i.e. [alertmanager-configurer-k8s]).
113 |
114 | Remote configuration and local configuration (using charm's config parameters) are mutually
115 | exclusive. In case configuration is provided through both channels simultaneously, charm will go
116 | to `Blocked` state, awaiting conflict resolution by the user.
117 |
118 | ```bash
119 | juju relate alertmanager-k8s:remote-configuration SOME_PROVIDER_CHARM:remote-configuration
120 | ```
121 |
122 | ```mermaid
123 | graph LR
124 |
125 | subgraph observability["Observability"]
126 | alertmanager[Alertmanager]
127 | end
128 |
129 | subgraph alertmanager-configurer["Remote Configuration Provider"]
130 | am_config[alertmanager-configurer-k8s]
131 | end
132 |
133 | am_config --->|remote_configuration| alertmanager
134 | user{User} -.-> |REST API CALLS| am_config
135 | ```
136 |
137 | [Loki operator]: https://charmhub.io/loki-k8s
138 | [Prometheus operator]: https://charmhub.io/prometheus-k8s
139 | [Karma operator]: https://charmhub.io/karma-k8s/
140 | [alertmanager-configurer-k8s]: https://github.com/canonical/alertmanager-configurer-k8s-operator
141 |
--------------------------------------------------------------------------------
/tests/integration/test_remote_configuration.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2022 Canonical Ltd.
3 | # See LICENSE file for licensing details.
4 |
5 | """This test module tests remote configuration support in Alertmanager.
6 |
7 | 0. Deploy `alertmanager-k8s` and `remote-configuration-tester`.
8 | 1. Create `remote-configuration` relation.
9 | 2. Verify that the configuration provided by `remote-configuration-tester` has been applied in
10 | `alertmanager-k8s`.
11 | """
12 |
13 | import os
14 | import shutil
15 | from pathlib import Path
16 |
17 | import helpers
18 | import pytest
19 | import sh
20 | import yaml
21 | from deepdiff import DeepDiff # type: ignore[import]
22 | from pytest_operator.plugin import OpsTest
23 |
24 | METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
25 | APP_NAME = METADATA["name"]
26 | RESOURCES = {"alertmanager-image": METADATA["resources"]["alertmanager-image"]["upstream-source"]}
27 |
28 | TESTER_CHARM_PATH = "./tests/integration/remote_configuration_tester"
29 | TESTER_APP_METADATA = yaml.safe_load(
30 | Path(os.path.join(TESTER_CHARM_PATH, "charmcraft.yaml")).read_text()
31 | )
32 | TESTER_APP_NAME = TESTER_APP_METADATA["name"]
33 | TESTER_APP_RESOURCES = {
34 | f"{TESTER_APP_NAME}-image": TESTER_APP_METADATA["resources"][f"{TESTER_APP_NAME}-image"][
35 | "upstream-source"
36 | ]
37 | }
38 |
39 | TESTER_CHARM_CONFIG = """route:
40 | receiver: test_receiver
41 | group_by:
42 | - alertname
43 | group_wait: 1234s
44 | group_interval: 4321s
45 | repeat_interval: 1111h
46 | receivers:
47 | - name: test_receiver
48 | """
49 |
50 |
51 | @pytest.fixture(scope="module")
52 | async def tester_charm(ops_test: OpsTest):
53 | assert ops_test.model
54 | _copy_alertmanager_remote_configuration_library_into_tester_charm()
55 | tester_charm = await ops_test.build_charm(TESTER_CHARM_PATH)
56 | await ops_test.model.deploy(
57 | tester_charm,
58 | resources=TESTER_APP_RESOURCES,
59 | application_name=TESTER_APP_NAME,
60 | config={"config_file": TESTER_CHARM_CONFIG},
61 | trust=True,
62 | )
63 | await ops_test.model.wait_for_idle(apps=[TESTER_APP_NAME], status="active", timeout=1000)
64 |
65 |
66 | @pytest.fixture(scope="module")
67 | @pytest.mark.abort_on_fail
68 | async def setup(ops_test: OpsTest, charm_under_test, tester_charm):
69 | assert ops_test.model
70 | await ops_test.model.deploy(
71 | charm_under_test,
72 | resources=RESOURCES,
73 | application_name=APP_NAME,
74 | trust=True,
75 | )
76 | await ops_test.model.wait_for_idle(
77 | apps=[APP_NAME, TESTER_APP_NAME], status="active", timeout=1000
78 | )
79 |
80 |
81 | @pytest.mark.abort_on_fail
82 | async def test_remote_configuration_applied_on_relation_created(ops_test: OpsTest, setup):
83 | assert ops_test.model
84 | await ops_test.model.add_relation(
85 | relation1=f"{APP_NAME}:remote-configuration", relation2=TESTER_APP_NAME
86 | )
87 | expected_config = _add_juju_details_to_alertmanager_config(TESTER_CHARM_CONFIG)
88 | await ops_test.model.wait_for_idle(
89 | apps=[APP_NAME],
90 | status="active",
91 | timeout=1000,
92 | idle_period=5,
93 | )
94 |
95 | _, actual_config, _ = await helpers.get_alertmanager_config_from_file(
96 | ops_test=ops_test,
97 | app_name=APP_NAME,
98 | container_name="alertmanager",
99 | config_file_path="/etc/alertmanager/alertmanager.yml",
100 | )
101 |
102 | assert (
103 | DeepDiff(
104 | yaml.safe_load(actual_config),
105 | yaml.safe_load(expected_config),
106 | ignore_order=True,
107 | )
108 | == {}
109 | )
110 |
111 |
112 | @pytest.mark.abort_on_fail
113 | async def test_remote_configuration_file_wrongly_applied(ops_test: OpsTest, setup):
114 | assert ops_test.model
115 | sh.juju( # pyright: ignore
116 | [
117 | "config",
118 | f"{APP_NAME}",
119 | "-m",
120 | ops_test.model_name,
121 | "config_file=tests/integration/am_config.yaml",
122 | ]
123 | )
124 |
125 | await ops_test.model.wait_for_idle(
126 | apps=[APP_NAME],
127 | status="blocked",
128 | timeout=1000,
129 | idle_period=5,
130 | )
131 |
132 |
133 | def _copy_alertmanager_remote_configuration_library_into_tester_charm():
134 | """Ensure that the tester charm uses the current Alertmanager Remote Configuration library."""
135 | library_path = "lib/charms/alertmanager_k8s/v0/alertmanager_remote_configuration.py"
136 | install_path = "tests/integration/remote_configuration_tester/" + library_path
137 | shutil.copyfile(library_path, install_path)
138 |
139 |
140 | def _add_juju_details_to_alertmanager_config(config: str) -> str:
141 | juju_details = ["juju_application", "juju_model", "juju_model_uuid"]
142 | config_dict = yaml.safe_load(config)
143 | group_by = config_dict["route"]["group_by"]
144 | group_by.extend(juju_details)
145 | config_dict["route"]["group_by"] = group_by
146 | return yaml.safe_dump(config_dict)
147 |
--------------------------------------------------------------------------------
/src/config_builder.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Canonical Ltd.
2 | # See LICENSE file for licensing details.
3 |
4 | """Config builder for charmed alertmanager."""
5 |
6 | import logging
7 | from dataclasses import dataclass
8 | from typing import Optional
9 |
10 | import yaml
11 |
12 | logger = logging.getLogger(__name__)
13 |
14 |
15 | class ConfigError(Exception):
16 | """Custom exception for failed config updates."""
17 |
18 |
19 | default_config = {
20 | "global": {"http_config": {"tls_config": {"insecure_skip_verify": False}}},
21 | "route": {
22 | "group_wait": "30s",
23 | "group_interval": "5m",
24 | "repeat_interval": "1h",
25 | "receiver": "placeholder",
26 | },
27 | "receivers": [{"name": "placeholder"}],
28 | }
29 |
30 |
31 | @dataclass(frozen=True)
32 | class ConfigSuite:
33 | """Represents all the configuration files managed by this module, and their contents."""
34 |
35 | alertmanager: str
36 | web: Optional[str]
37 | templates: Optional[str]
38 | amtool: str
39 |
40 |
41 | class ConfigBuilder:
42 | """A 'config builder' for alertmanager."""
43 |
44 | def __init__(
45 | self,
46 | *,
47 | api_port: int = 9093,
48 | web_route_prefix: Optional[str] = None,
49 | ):
50 | self._api_port = api_port
51 |
52 | # Sanitize `web_route_prefix` so it has a leading `/` and no trailing `/`
53 | web_route_prefix = web_route_prefix.strip("/") if web_route_prefix else ""
54 | self._web_route_prefix = "/" + web_route_prefix
55 |
56 | self._config = default_config.copy()
57 | self._templates = None
58 | self._templates_path = "/etc/alertmanager/templates.tmpl"
59 |
60 | self._cert_file_path = None
61 | self._key_file_path = None
62 |
63 | def set_config(self, config: Optional[dict]):
64 | """Set the main config file contents."""
65 | if config is not None:
66 | self._config = config
67 | return self
68 |
69 | def set_templates(self, templates: Optional[str], path: Optional[str] = None):
70 | """Set templates."""
71 | if templates is not None:
72 | self._templates = templates
73 | if path:
74 | self._templates_path = path
75 | return self
76 |
77 | def set_tls_server_config(self, *, cert_file_path: str, key_file_path: str):
78 | """Set TLS server config."""
79 | self._cert_file_path = cert_file_path
80 | self._key_file_path = key_file_path
81 | return self
82 |
83 | @property
84 | def _alertmanager_config(self) -> str:
85 | config = self._config.copy()
86 |
87 | # On disk, alertmanager rewrites the config and automatically adds an empty placeholder,
88 | # `templates: []`, so `get` is more robust than `if "templates" in config`.
89 | if config.get("templates"):
90 | logger.error(
91 | "alertmanager config file must not have a 'templates' section; "
92 | "use the 'templates' config option instead."
93 | )
94 | raise ConfigError("Invalid config file: use charm's 'templates' config option instead")
95 |
96 | if self._templates:
97 | config["templates"] = [self._templates_path]
98 |
99 | # add juju topology to "group_by"
100 | # `route` is a mandatory field so don't need to be too careful
101 | route = config.get("route", {})
102 | group_by = set(route.get("group_by", []))
103 |
104 | # The special value '...' disables aggregation entirely. Do not add topology in that case.
105 | # Ref: https://prometheus.io/docs/alerting/latest/configuration/#route
106 | if group_by != {"..."}:
107 | group_by = list(group_by.union(["juju_application", "juju_model", "juju_model_uuid"]))
108 | route["group_by"] = list(group_by)
109 | config["route"] = route
110 | return yaml.safe_dump(config)
111 |
112 | @property
113 | def _amtool_config(self) -> str:
114 | # When amtool is run, it is always in the same container as alertmanager so we can use
115 | # `localhost` in the url.
116 | url = f"http://localhost:{self._api_port}" + self._web_route_prefix
117 | # Make sure url ends with `/`
118 | url = url.rstrip("/") + "/"
119 | return yaml.safe_dump({"alertmanager.url": url})
120 |
121 | @property
122 | def _web_config(self) -> Optional[str]:
123 | if self._cert_file_path and self._key_file_path:
124 | web_config = {
125 | # https://prometheus.io/docs/prometheus/latest/configuration/https/
126 | "tls_server_config": {
127 | # Certificate and key files for server to use to authenticate to client.
128 | "cert_file": self._cert_file_path,
129 | "key_file": self._key_file_path,
130 | },
131 | }
132 | return yaml.safe_dump(web_config)
133 | if self._cert_file_path or self._key_file_path:
134 | raise ConfigError("Must provide both cert and key files")
135 | return None
136 |
137 | def build(self) -> ConfigSuite:
138 | """Return the entire config suite rendered."""
139 | return ConfigSuite(
140 | alertmanager=self._alertmanager_config,
141 | web=self._web_config,
142 | templates=self._templates,
143 | amtool=self._amtool_config,
144 | )
145 |
--------------------------------------------------------------------------------
/tests/integration/test_tls_web.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2023 Ubuntu
3 | # See LICENSE file for licensing details.
4 |
5 | import logging
6 | from pathlib import Path
7 | from types import SimpleNamespace
8 |
9 | import pytest
10 | import sh
11 | import yaml
12 | from helpers import curl, get_unit_address
13 | from pytest_operator.plugin import OpsTest
14 |
15 | # pyright: reportAttributeAccessIssue = false
16 |
17 | logger = logging.getLogger(__name__)
18 |
19 | METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
20 | alertmanager_image_rev = METADATA["resources"]["alertmanager-image"]["upstream-source"]
21 | am = SimpleNamespace(name="alertmanager", scale=1)
22 | ca = SimpleNamespace(name="ca")
23 |
24 | # FIXME change scale to 2 once the tls_certificate lib issue is fixed
25 | # https://github.com/canonical/tls-certificates-interface/issues/57
26 |
27 |
28 | @pytest.mark.abort_on_fail
29 | async def test_build_and_deploy(ops_test: OpsTest, charm_under_test):
30 | """Deploy 2 alertmanager units, related to a local CA."""
31 | assert ops_test.model
32 | # Deploy the charm and wait for active/idle status
33 | sh.juju.deploy(
34 | charm_under_test,
35 | "alertmanager",
36 | f"--num-units={am.scale}",
37 | model=ops_test.model.name,
38 | resource=f"alertmanager-image={alertmanager_image_rev}",
39 | trust=True,
40 | )
41 | sh.juju.deploy("self-signed-certificates", "ca", model=ops_test.model.name, channel="edge")
42 | sh.juju.relate("alertmanager:certificates", "ca", model=ops_test.model.name)
43 |
44 | await ops_test.model.wait_for_idle(
45 | apps=["alertmanager", "ca"],
46 | status="active",
47 | raise_on_error=False,
48 | timeout=600,
49 | idle_period=30,
50 | )
51 |
52 |
53 | @pytest.mark.abort_on_fail
54 | async def test_tls_files_created(ops_test: OpsTest):
55 | """Make sure charm code created web-config, cert and key files."""
56 | # juju ssh --container alertmanager am/0 ls /etc/alertmanager/
57 | config_path = "/etc/alertmanager/"
58 | for i in range(am.scale):
59 | unit_name = f"{am.name}/{i}"
60 | rc, stdout, stderr = await ops_test.juju(
61 | "ssh", "--container", "alertmanager", unit_name, "ls", f"{config_path}"
62 | )
63 | logger.info("%s: contents of %s: %s", unit_name, config_path, stdout or stderr)
64 |
65 |
66 | @pytest.mark.abort_on_fail
67 | async def test_server_cert(ops_test: OpsTest):
68 | """Inspect server cert and confirm `X509v3 Subject Alternative Name` field is as expected."""
69 | # echo \
70 | # | openssl s_client -showcerts -servername $IPADDR:9093 -connect $IPADDR:9093 2>/dev/null \
71 | # | openssl x509 -inform pem -noout -text
72 | am_ip_addrs = [await get_unit_address(ops_test, am.name, i) for i in range(am.scale)]
73 | for am_ip in am_ip_addrs:
74 | cmd = [
75 | "sh",
76 | "-c",
77 | f"echo | openssl s_client -showcerts -servername {am_ip}:9093 -connect {am_ip}:9093 2>/dev/null | openssl x509 -inform pem -noout -text",
78 | ]
79 | retcode, stdout, stderr = await ops_test.run(*cmd)
80 | fqdn = f"{am.name}-0.{am.name}-endpoints.{ops_test.model_name}.svc.cluster.local"
81 | assert fqdn in stdout
82 |
83 |
84 | @pytest.mark.abort_on_fail
85 | async def test_https_reachable(ops_test: OpsTest, temp_dir):
86 | """Make sure alertmanager's https endpoint is reachable using curl and ca cert."""
87 | for i in range(am.scale):
88 | # Save CA cert locally
89 | # juju show-unit am/0 --format yaml | yq '.am/0."relation-info"[0]."local-unit".data.ca' > /tmp/cacert.pem
90 | # juju run ca/0 get-ca-certificate --format json | jq -r '."ca/0".results."ca-certificate"' > internal.cert
91 | cmd = [
92 | "sh",
93 | "-c",
94 | f'juju run {ca.name}/0 get-ca-certificate --format json | jq -r \'."{ca.name}/0".results."ca-certificate"\'',
95 | ]
96 | logger.info("Obtaining CA cert with command: %s", " ".join(cmd))
97 | retcode, stdout, stderr = await ops_test.run(*cmd)
98 | cert = stdout
99 | cert_path = temp_dir / "local.cert"
100 | with open(cert_path, "wt") as f:
101 | f.writelines(cert)
102 |
103 | # Confirm alertmanager TLS endpoint reachable
104 | # curl --fail-with-body --capath /tmp --cacert /tmp/cacert.pem https://alertmanager.local:9093/-/ready
105 | ip_addr = await get_unit_address(ops_test, am.name, i)
106 | fqdn = f"{am.name}-0.{am.name}-endpoints.{ops_test.model_name}.svc.cluster.local"
107 | response = await curl(
108 | ops_test,
109 | cert_dir=temp_dir,
110 | cert_path=cert_path,
111 | ip_addr=ip_addr,
112 | mock_url=f"https://{fqdn}:9093/-/ready",
113 | )
114 | assert "OK" in response
115 |
116 |
117 | @pytest.mark.abort_on_fail
118 | async def test_https_still_reachable_after_refresh(ops_test: OpsTest, charm_under_test, temp_dir):
119 | """Make sure alertmanager's https endpoint is still reachable after an upgrade."""
120 | assert ops_test.model
121 | sh.juju.refresh("alertmanager", model=ops_test.model.name, path=charm_under_test)
122 | await ops_test.model.wait_for_idle(
123 | apps=["alertmanager", "ca"],
124 | status="active",
125 | raise_on_error=False,
126 | timeout=600,
127 | idle_period=30,
128 | )
129 | await test_https_reachable(ops_test, temp_dir)
130 |
--------------------------------------------------------------------------------
/tests/integration/test_templates.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2022 Canonical Ltd.
3 | # See LICENSE file for licensing details.
4 |
5 | import json
6 | import logging
7 | import time
8 | from pathlib import Path
9 |
10 | import pytest
11 | import sh
12 | import yaml
13 | from helpers import is_alertmanager_up
14 | from pytest_operator.plugin import OpsTest
15 | from werkzeug.wrappers import Request, Response
16 |
17 | logger = logging.getLogger(__name__)
18 |
19 | METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
20 | app_name = METADATA["name"]
21 | resources = {"alertmanager-image": METADATA["resources"]["alertmanager-image"]["upstream-source"]}
22 | receiver_name = "fake-receiver"
23 |
24 | # Define the template to use for testing the charm correctly passes it to the workload.
25 | callback_id = str(int(time.time())) # The slack callback id
26 | template = r'{{ define "slack.default.callbackid" }}' + callback_id + "{{ end }}"
27 |
28 |
29 | @pytest.mark.abort_on_fail
30 | async def test_build_and_deploy(ops_test: OpsTest, charm_under_test):
31 | # deploy charm from local source folder
32 | assert ops_test.model
33 | await ops_test.model.deploy(
34 | charm_under_test, resources=resources, application_name=app_name, trust=True
35 | )
36 | await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000)
37 | application = ops_test.model.applications[app_name]
38 | assert application
39 | assert application.units[0].workload_status == "active"
40 | assert await is_alertmanager_up(ops_test, app_name)
41 |
42 |
43 | @pytest.mark.abort_on_fail
44 | async def test_configure_alertmanager_with_templates(ops_test: OpsTest, httpserver):
45 | # define the alertmanager configuration
46 | assert ops_test.model
47 | aconfig = {
48 | "global": {"http_config": {"tls_config": {"insecure_skip_verify": True}}},
49 | "route": {
50 | "group_by": ["alertname"],
51 | "group_wait": "3s",
52 | "group_interval": "5m",
53 | "repeat_interval": "1h",
54 | "receiver": receiver_name,
55 | },
56 | "receivers": [
57 | {
58 | "name": receiver_name,
59 | "slack_configs": [
60 | {
61 | "api_url": httpserver.url_for("/"),
62 | "channel": "test",
63 | "text": r"https://localhost/alerts/{{ .GroupLabels.alertname }}",
64 | }
65 | ],
66 | }
67 | ],
68 | }
69 |
70 | # set alertmanager configuration and template file
71 | application = ops_test.model.applications[app_name]
72 | assert application
73 | await application.set_config(
74 | {"config_file": yaml.safe_dump(aconfig), "templates_file": template}
75 | )
76 | await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=60)
77 |
78 |
79 | @pytest.mark.abort_on_fail
80 | async def test_receiver_gets_alert(ops_test: OpsTest, httpserver):
81 | request_from_alertmanager = None
82 |
83 | def request_handler(request: Request):
84 | """A request handler.
85 |
86 | Alertmanager's POST request to a slack server looks like this:
87 |
88 | {'attachments': [{'callback_id': '2',
89 | 'color': 'danger',
90 | 'fallback': '[FIRING:1] fake-alert alertmanager-k8s '
91 | 'test-templates-klzm 1234 | '
92 | 'http://alertmanager-k8s-0.fqdn:9093/#/alerts?receiver=name',
93 | 'footer': '',
94 | 'mrkdwn_in': ['fallback', 'pretext', 'text'],
95 | 'text': 'https://localhost/alerts/fake-alert',
96 | 'title': '[FIRING:1] fake-alert alertmanager-k8s '
97 | 'test-templates-klzm 1234 ',
98 | 'title_link': 'http://alertmanager-k8s-0.fqdn:9093/#/alerts?receiver=name'}],
99 | 'channel': 'test',
100 | 'username': 'Alertmanager'}
101 | """
102 | nonlocal request_from_alertmanager
103 | response = Response("OK", status=200, content_type="text/plain")
104 | request_from_alertmanager = json.loads(request.data.decode("utf-8"))
105 | logger.info("Got Request Data : %s", request_from_alertmanager)
106 | return response
107 |
108 | # set the alert
109 | with httpserver.wait(timeout=120) as waiting:
110 | # expect an alert to be forwarded to the receiver
111 | httpserver.expect_oneshot_request("/", method="POST").respond_with_handler(request_handler)
112 |
113 | # Use amtool to fire a stand-in alert
114 | sh.juju( # pyright: ignore
115 | [
116 | "ssh",
117 | "-m",
118 | ops_test.model_name,
119 | "--container",
120 | "alertmanager",
121 | f"{app_name}/0",
122 | "amtool",
123 | "alert",
124 | "add",
125 | "foo",
126 | "node=bar",
127 | "status=firing",
128 | "juju_model_uuid=1234",
129 | f"juju_application={app_name}",
130 | "juju_model=model_name",
131 | "--annotation=summary=summary",
132 | ]
133 | )
134 |
135 | # check receiver got an alert
136 | assert waiting.result
137 | assert request_from_alertmanager["attachments"][0]["callback_id"] == callback_id # type: ignore
138 |
--------------------------------------------------------------------------------
/tests/unit/test_external_url.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2021 Canonical Ltd.
3 | # See LICENSE file for licensing details.
4 |
5 | import logging
6 | import unittest
7 | from typing import Optional
8 | from unittest.mock import patch
9 |
10 | import ops
11 | import yaml
12 | from helpers import cli_arg, k8s_resource_multipatch
13 | from ops.testing import Harness
14 |
15 | from alertmanager import WorkloadManager
16 | from charm import AlertmanagerCharm
17 |
18 | logger = logging.getLogger(__name__)
19 |
20 | ops.testing.SIMULATE_CAN_CONNECT = True # pyright: ignore
21 | CONTAINER_NAME = "alertmanager"
22 | SERVICE_NAME = AlertmanagerCharm._service_name
23 |
24 |
25 | class TestExternalUrl(unittest.TestCase):
26 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
27 | @patch("socket.getfqdn", new=lambda *args: "fqdn")
28 | @k8s_resource_multipatch
29 | @patch("lightkube.core.client.GenericSyncClient")
30 | @patch.object(WorkloadManager, "_alertmanager_version", property(lambda *_: "0.0.0"))
31 | def setUp(self, *unused):
32 | self.harness = Harness(AlertmanagerCharm)
33 | self.harness.set_model_name(self.__class__.__name__)
34 | self.addCleanup(self.harness.cleanup)
35 | self.harness.set_leader(True)
36 |
37 | # Peer relation
38 | self.app_name = "alertmanager-k8s"
39 | self.peer_rel_id = self.harness.add_relation("replicas", self.app_name)
40 |
41 | # Regular relation
42 | self.rel_id = self.harness.add_relation("alerting", "otherapp")
43 | self.harness.add_relation_unit(self.rel_id, "otherapp/0")
44 |
45 | self.harness.begin_with_initial_hooks()
46 | self.fqdn_url = f"http://fqdn:{self.harness.charm.api_port}"
47 |
48 | def get_url_cli_arg(self) -> Optional[str]:
49 | plan = self.harness.get_container_pebble_plan(CONTAINER_NAME)
50 | return cli_arg(plan, "--web.external-url")
51 |
52 | def get_cluster_args(self):
53 | plan = self.harness.get_container_pebble_plan(CONTAINER_NAME).to_dict()
54 | args = plan.get("services", {}).get(SERVICE_NAME, {}).get("command", "").split()
55 | cluster_args = filter(lambda s: s.startswith("--cluster.peer="), args)
56 | cluster_args = sorted((s.split("=")[1] for s in cluster_args))
57 | return cluster_args
58 |
59 | def is_service_running(self) -> bool:
60 | # service = plan.services.get(self.harness.charm._service_name)
61 | service = self.harness.model.unit.get_container(CONTAINER_NAME).get_service(SERVICE_NAME)
62 | return service.is_running()
63 |
64 | @unittest.skip("https://github.com/canonical/operator/issues/736")
65 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
66 | @patch("socket.getfqdn", new=lambda *args: "fqdn")
67 | @k8s_resource_multipatch
68 | def test_traefik_overrides_fqdn(self):
69 | """The config option for external url must override all other external urls."""
70 | # GIVEN a charm with the fqdn as its external URL
71 | self.assertEqual(self.get_url_cli_arg(), self.fqdn_url)
72 | self.assertTrue(self.is_service_running())
73 | self.assertEqual(self.harness.charm._external_url, self.fqdn_url)
74 |
75 | # WHEN a relation with traefik is formed but ingress isn't ready
76 | rel_id = self.harness.add_relation("ingress", "traefik-app")
77 | self.harness.add_relation_unit(rel_id, "traefik-app/0")
78 |
79 | # THEN there is no change to the cli arg
80 | self.assertEqual(self.get_url_cli_arg(), self.fqdn_url)
81 | self.assertTrue(self.is_service_running())
82 | self.assertEqual(self.harness.charm._external_url, self.fqdn_url)
83 |
84 | # WHEN ingress becomes available
85 | external_url_ingress = "http://foo.bar.ingress:80/path/to/mdl-alertmanager-k8s"
86 | app_data = {"ingress": yaml.safe_dump({"url": external_url_ingress})}
87 | self.harness.update_relation_data(rel_id, "traefik-app", app_data)
88 |
89 | # THEN the external url from the ingress relation overrides the fqdn
90 | self.assertEqual(self.get_url_cli_arg(), external_url_ingress)
91 | self.assertTrue(self.is_service_running())
92 |
93 | # NOTE intentionally not emptying out relation data manually
94 | # FIXME: figure out if we do or do not need to manually empty out relation-data
95 | # before relation-broken is emitted.
96 | # https://github.com/canonical/operator/issues/888
97 | app_data = {"ingress": ""}
98 | self.harness.update_relation_data(rel_id, "traefik-app", app_data)
99 |
100 | # AND WHEN the traefik relation is removed
101 | self.harness.remove_relation_unit(rel_id, "traefik-app/0")
102 | self.harness.remove_relation(rel_id)
103 |
104 | # THEN the fqdn is used as external url
105 | self.assertEqual(self.get_url_cli_arg(), self.fqdn_url)
106 |
107 | @unittest.skip("https://github.com/canonical/operator/issues/736")
108 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
109 | @patch("socket.getfqdn", new=lambda *args: "fqdn-0")
110 | @k8s_resource_multipatch
111 | def test_cluster_addresses(self, *_):
112 | # GIVEN an alertmanager charm with 3 units in total
113 | for u in [1, 2]:
114 | unit_name = self.app_name + f"/{u}"
115 | self.harness.add_relation_unit(self.peer_rel_id, unit_name)
116 | self.harness.update_relation_data(
117 | self.peer_rel_id, unit_name, {"private_address": f"http://fqdn-{u}:9093"}
118 | )
119 |
120 | # THEN the `--cluster.peer` args are made up of the hostname and HA port
121 | cluster_args = self.get_cluster_args()
122 | self.assertEqual(cluster_args, ["fqdn-1:9094", "fqdn-2:9094"]) # cluster is on ha-port
123 |
--------------------------------------------------------------------------------
/tests/unit/test_alertmanager_client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2021 Canonical Ltd.
3 | # See LICENSE file for licensing details.
4 |
5 | import json
6 | import unittest
7 | from datetime import datetime, timedelta, timezone
8 | from unittest.mock import patch
9 |
10 | from alertmanager_client import Alertmanager, AlertmanagerBadResponse
11 |
12 |
13 | class TestAlertmanagerAPIClient(unittest.TestCase):
14 | def setUp(self):
15 | self.path = "custom/path"
16 | self.api = Alertmanager(f"http://address:12345/{self.path}/")
17 |
18 | def test_base_url(self):
19 | """Check that regardless of the passed url, base_url ends with a slash."""
20 | api_with_slash = Alertmanager(f"http://address:12345/{self.path}/")
21 | self.assertEqual(f"http://address:12345/{self.path}/", api_with_slash.base_url)
22 | api_without_slash = Alertmanager(f"http://address:12345/{self.path}")
23 | self.assertEqual(f"http://address:12345/{self.path}/", api_without_slash.base_url)
24 |
25 | @patch("alertmanager_client.urllib.request.urlopen")
26 | def test_reload_succeed(self, urlopen_mock):
27 | urlopen_mock.return_value.code = 200
28 | urlopen_mock.return_value.reason = "OK"
29 |
30 | self.api.reload()
31 | urlopen_mock.assert_called()
32 |
33 | @patch("alertmanager_client.urllib.request.urlopen")
34 | def test_status_succeed(self, urlopen_mock):
35 | urlopen_mock.return_value.read = lambda: json.dumps({"status": "fake"})
36 | urlopen_mock.return_value.code = 200
37 | urlopen_mock.return_value.reason = "OK"
38 |
39 | status = self.api.status()
40 | self.assertIsNotNone(status)
41 | self.assertDictEqual({"status": "fake"}, status)
42 |
43 | def test_reload_and_status_fail(self):
44 | def mock_connection_error(*args, **kwargs):
45 | import urllib.error
46 |
47 | raise urllib.error.HTTPError(
48 | url="mock://url",
49 | code=500,
50 | msg="mock msg",
51 | hdrs={"mock hdr": "mock smth"}, # type: ignore[arg-type]
52 | fp=None,
53 | )
54 |
55 | with patch("alertmanager_client.urllib.request.urlopen", mock_connection_error):
56 | self.assertRaises(AlertmanagerBadResponse, self.api.reload)
57 |
58 | with patch("alertmanager_client.urllib.request.urlopen", mock_connection_error):
59 | self.assertRaises(AlertmanagerBadResponse, self.api.status)
60 |
61 | @patch("alertmanager_client.urllib.request.urlopen")
62 | def test_version(self, urlopen_mock):
63 | urlopen_mock.return_value.read = lambda: json.dumps({"versionInfo": {"version": "0.1.2"}})
64 | urlopen_mock.return_value.code = 200
65 | urlopen_mock.return_value.reason = "OK"
66 |
67 | self.assertEqual(self.api.version, "0.1.2")
68 |
69 | @patch("alertmanager_client.urllib.request.urlopen")
70 | def test_alerts_can_be_set(self, urlopen_mock):
71 | msg = "HTTP 200 OK"
72 | urlopen_mock.return_value = msg
73 | alerts = [
74 | {
75 | "startsAt": datetime.now().isoformat("T"),
76 | "status": "firing",
77 | "annotations": {
78 | "summary": "A fake alert",
79 | },
80 | "labels": {
81 | "alertname": "fake alert",
82 | },
83 | }
84 | ]
85 | status = self.api.set_alerts(alerts)
86 | urlopen_mock.assert_called()
87 | self.assertEqual(status, msg)
88 |
89 | @patch("alertmanager_client.urllib.request.urlopen")
90 | def test_available_alerts_are_returned(self, urlopen_mock):
91 | fake_alerts = [
92 | {
93 | "labels": {"name": "fake-alert"},
94 | "startsAt": datetime.now().isoformat("T"),
95 | }
96 | ]
97 | urlopen_mock.return_value.read = lambda: json.dumps(fake_alerts)
98 | urlopen_mock.return_value.code = 200
99 | urlopen_mock.return_value.reason = "OK"
100 |
101 | alerts = self.api.get_alerts()
102 | self.assertListEqual(alerts, fake_alerts)
103 |
104 | @patch("alertmanager_client.urllib.request.urlopen")
105 | def test_silences_can_be_set(self, urlopen_mock):
106 | msg = "HTTP 200 OK"
107 | urlopen_mock.return_value = msg
108 | matchers = [
109 | {
110 | "name": "alertname",
111 | "value": "fake-alert",
112 | "isRegex": False,
113 | }
114 | ]
115 | silence_start = datetime.now(timezone.utc)
116 | silence_end = silence_start + timedelta(minutes=60)
117 | status = self.api.set_silences(
118 | matchers=matchers, start_time=silence_start, end_time=silence_end
119 | )
120 | urlopen_mock.assert_called()
121 | self.assertEqual(status, msg)
122 |
123 | @patch("alertmanager_client.urllib.request.urlopen")
124 | def test_available_silences_are_returned(self, urlopen_mock):
125 | fake_silences = [
126 | {
127 | "id": "fake-silencer",
128 | "status": {"state": "active"},
129 | "startsAt": datetime.now().isoformat("T"),
130 | "endsAt": (datetime.now() + timedelta(minutes=60)).isoformat("T"),
131 | "matchers": [
132 | {
133 | "name": "alertname",
134 | "value": "fake-alert",
135 | "isRegex": False,
136 | }
137 | ],
138 | }
139 | ]
140 | urlopen_mock.return_value.read = lambda: json.dumps(fake_silences)
141 | urlopen_mock.return_value.code = 200
142 | urlopen_mock.return_value.reason = "OK"
143 |
144 | alerts = self.api.get_silences()
145 | self.assertListEqual(alerts, fake_silences)
146 |
147 | @patch("alertmanager_client.urllib.request.urlopen")
148 | def test_silences_can_be_deleted(self, urlopen_mock):
149 | msg = "HTTP 200 OK"
150 | urlopen_mock.return_value = msg
151 |
152 | status = self.api.delete_silence("fake-id")
153 | urlopen_mock.assert_called()
154 | self.assertEqual(status, msg)
155 |
--------------------------------------------------------------------------------
/tests/unit/test_remote_configuration_requirer.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Canonical Ltd.
2 | # See LICENSE file for licensing details.
3 |
4 | import json
5 | import logging
6 | import unittest
7 | from typing import cast
8 | from unittest.mock import patch
9 |
10 | import yaml
11 | from charms.alertmanager_k8s.v0.alertmanager_remote_configuration import (
12 | DEFAULT_RELATION_NAME,
13 | )
14 | from deepdiff import DeepDiff # type: ignore[import]
15 | from helpers import k8s_resource_multipatch
16 | from ops import testing
17 | from ops.model import BlockedStatus
18 |
19 | from alertmanager import WorkloadManager
20 | from charm import AlertmanagerCharm
21 |
22 | logger = logging.getLogger(__name__)
23 |
24 | testing.SIMULATE_CAN_CONNECT = True # pyright: ignore
25 |
26 | TEST_ALERTMANAGER_CONFIG_FILE = "/test/rules/dir/config_file.yml"
27 | TEST_ALERTMANAGER_DEFAULT_CONFIG = """route:
28 | receiver: placeholder
29 | receivers:
30 | - name: placeholder
31 | """
32 | TEST_ALERTMANAGER_REMOTE_CONFIG = """receivers:
33 | - name: test_receiver
34 | route:
35 | receiver: test_receiver
36 | group_by:
37 | - alertname
38 | group_wait: 1234s
39 | group_interval: 4321s
40 | repeat_interval: 1111h
41 | """
42 |
43 |
44 | @patch("subprocess.run")
45 | class TestAlertmanagerRemoteConfigurationRequirer(unittest.TestCase):
46 | @patch("subprocess.run")
47 | @patch("lightkube.core.client.GenericSyncClient")
48 | @patch.object(AlertmanagerCharm, "_update_ca_certs", lambda *a, **kw: None)
49 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
50 | @k8s_resource_multipatch
51 | def setUp(self, *_) -> None:
52 | self.harness = testing.Harness(AlertmanagerCharm)
53 | self.addCleanup(self.harness.cleanup)
54 | self.harness.set_leader(True)
55 |
56 | self.harness.handle_exec("alertmanager", ["update-ca-certificates", "--fresh"], result="")
57 | self.harness.handle_exec(
58 | "alertmanager",
59 | [WorkloadManager._amtool_path, "check-config", AlertmanagerCharm._config_path],
60 | result="",
61 | )
62 |
63 | # TODO: Once we're on ops 2.0.0+ this can be removed as begin_with_initial_hooks()
64 | # now does it.
65 | self.harness.set_can_connect("alertmanager", True)
66 |
67 | # In ops 2.0.0+, we need to mock the version, as begin_with_initial_hooks() now triggers
68 | # pebble-ready, which attempts to obtain the workload version.
69 | patcher = patch.object(
70 | WorkloadManager, "_alertmanager_version", property(lambda *_: "0.0.0")
71 | )
72 | self.mock_version = patcher.start()
73 | self.addCleanup(patcher.stop)
74 |
75 | self.harness.begin_with_initial_hooks()
76 |
77 | self.relation_id = self.harness.add_relation(
78 | DEFAULT_RELATION_NAME, "remote-config-provider"
79 | )
80 | self.harness.add_relation_unit(self.relation_id, "remote-config-provider/0")
81 |
82 | @k8s_resource_multipatch
83 | def test_valid_config_pushed_to_relation_data_bag_updates_alertmanager_config(
84 | self,
85 | *_,
86 | ):
87 | expected_config = remote_config = yaml.safe_load(TEST_ALERTMANAGER_REMOTE_CONFIG)
88 | # add juju topology to "group_by"
89 | route = cast(dict, expected_config.get("route", {}))
90 | route["group_by"] = list(
91 | set(route.get("group_by", [])).union(
92 | ["juju_application", "juju_model", "juju_model_uuid"]
93 | )
94 | )
95 | expected_config["route"] = route
96 |
97 | self.harness.update_relation_data(
98 | relation_id=self.relation_id,
99 | app_or_unit="remote-config-provider",
100 | key_values={"alertmanager_config": json.dumps(remote_config)},
101 | )
102 | config = self.harness.charm.container.pull(self.harness.charm._config_path)
103 |
104 | self.assertEqual(
105 | DeepDiff(yaml.safe_load(config.read()), expected_config, ignore_order=True),
106 | {},
107 | )
108 |
109 | @k8s_resource_multipatch
110 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
111 | def test_configs_available_from_both_relation_data_bag_and_charm_config_block_charm(
112 | self,
113 | *_,
114 | ):
115 | sample_remote_config = yaml.safe_load(TEST_ALERTMANAGER_REMOTE_CONFIG)
116 | self.harness.update_relation_data(
117 | relation_id=self.relation_id,
118 | app_or_unit="remote-config-provider",
119 | key_values={"alertmanager_config": json.dumps(sample_remote_config)},
120 | )
121 | self.harness.update_config({"config_file": TEST_ALERTMANAGER_DEFAULT_CONFIG})
122 |
123 | self.assertEqual(
124 | self.harness.charm.unit.status, BlockedStatus("Multiple configs detected")
125 | )
126 |
127 | @patch("config_builder.default_config", yaml.safe_load(TEST_ALERTMANAGER_DEFAULT_CONFIG))
128 | @k8s_resource_multipatch
129 | def test_invalid_config_pushed_to_the_relation_data_bag_does_not_update_alertmanager_config(
130 | self,
131 | *_,
132 | ):
133 | invalid_config = yaml.safe_load("some: invalid_config")
134 |
135 | self.harness.update_relation_data(
136 | relation_id=self.relation_id,
137 | app_or_unit="remote-config-provider",
138 | key_values={"alertmanager_config": json.dumps(invalid_config)},
139 | )
140 | config = self.harness.charm.container.pull(self.harness.charm._config_path)
141 |
142 | self.assertNotIn("invalid_config", yaml.safe_load(config.read()))
143 |
144 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
145 | @k8s_resource_multipatch
146 | def test_templates_pushed_to_relation_data_bag_are_saved_to_templates_file_in_alertmanager(
147 | self,
148 | *_,
149 | ):
150 | sample_remote_config = yaml.safe_load(TEST_ALERTMANAGER_REMOTE_CONFIG)
151 | test_template = '{{define "myTemplate"}}do something{{end}}'
152 |
153 | self.harness.update_relation_data(
154 | relation_id=self.relation_id,
155 | app_or_unit="remote-config-provider",
156 | key_values={
157 | "alertmanager_config": json.dumps(sample_remote_config),
158 | "alertmanager_templates": json.dumps([test_template]),
159 | },
160 | )
161 | updated_templates = self.harness.charm.container.pull(self.harness.charm._templates_path)
162 |
163 | self.assertEqual(updated_templates.read(), test_template)
164 |
--------------------------------------------------------------------------------
/tests/unit/test_push_config_to_workload_on_startup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2021 Canonical Ltd.
3 | # See LICENSE file for licensing details.
4 |
5 | import logging
6 | import unittest
7 | from unittest.mock import patch
8 |
9 | import hypothesis.strategies as st
10 | import ops
11 | import validators
12 | import yaml
13 | from helpers import k8s_resource_multipatch
14 | from hypothesis import given
15 | from ops.model import ActiveStatus, BlockedStatus
16 | from ops.testing import Harness
17 |
18 | from alertmanager import WorkloadManager
19 | from charm import AlertmanagerCharm
20 |
21 | logger = logging.getLogger(__name__)
22 | ops.testing.SIMULATE_CAN_CONNECT = True # pyright: ignore
23 | CONTAINER_NAME = "alertmanager"
24 |
25 |
26 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("0.0.0", ""))
27 | @patch("subprocess.run")
28 | class TestPushConfigToWorkloadOnStartup(unittest.TestCase):
29 | """Feature: Push config to workload on startup.
30 |
31 | Background: Charm starts up with initial hooks.
32 | """
33 |
34 | @patch("subprocess.run")
35 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("0.0.0", ""))
36 | @k8s_resource_multipatch
37 | @patch("lightkube.core.client.GenericSyncClient")
38 | @patch.object(WorkloadManager, "_alertmanager_version", property(lambda *_: "0.0.0"))
39 | def setUp(self, *_):
40 | self.harness = Harness(AlertmanagerCharm)
41 | self.addCleanup(self.harness.cleanup)
42 |
43 | # self.harness.charm.app.name does not exist before .begin()
44 | # https://github.com/canonical/operator/issues/675
45 | self.app_name = "alertmanager-k8s"
46 | self.peer_rel_id = self.harness.add_relation("replicas", self.app_name)
47 | self.harness.begin_with_initial_hooks()
48 |
49 | @given(st.booleans())
50 | def test_single_unit_cluster(self, is_leader, _):
51 | """Scenario: Current unit is the only unit present."""
52 | # WHEN only one unit is
53 | self.assertEqual(self.harness.model.app.planned_units(), 1)
54 | self.harness.set_leader(is_leader)
55 |
56 | # THEN amtool config is rendered
57 | amtool_config = yaml.safe_load(
58 | self.harness.charm.container.pull(self.harness.charm._amtool_config_path)
59 | )
60 | self.assertTrue(validators.url(amtool_config["alertmanager.url"], simple_host=True))
61 |
62 | # AND alertmanager config is rendered
63 | am_config = yaml.safe_load(
64 | self.harness.charm.container.pull(self.harness.charm._config_path)
65 | )
66 | self.assertGreaterEqual(am_config.keys(), {"global", "route", "receivers"})
67 |
68 | # AND path to config file is part of pebble layer command
69 | command = (
70 | self.harness.get_container_pebble_plan(self.harness.charm._container_name)
71 | .services[self.harness.charm._service_name]
72 | .command
73 | )
74 | self.assertIn(f"--config.file={self.harness.charm._config_path}", command)
75 |
76 | # AND peer clusters cli arg is not present in pebble layer command
77 | self.assertNotIn("--cluster.peer=", command)
78 |
79 | @unittest.skip("https://github.com/canonical/operator/issues/736")
80 | @k8s_resource_multipatch
81 | def test_multi_unit_cluster(self, *_):
82 | """Scenario: Current unit is a part of a multi-unit cluster."""
83 | self.harness.set_leader(False)
84 |
85 | # WHEN multiple units are present
86 | num_units = 3
87 | for i in range(1, num_units):
88 | self.harness.add_relation_unit(self.peer_rel_id, f"{self.app_name}/{i}")
89 | self.harness.update_relation_data(
90 | self.peer_rel_id,
91 | f"{self.app_name}/{i}",
92 | {"private_address": f"http://fqdn-{i}"},
93 | )
94 |
95 | self.assertEqual(self.harness.model.app.planned_units(), num_units)
96 |
97 | # THEN peer clusters cli arg is present in pebble layer command
98 | command = (
99 | self.harness.get_container_pebble_plan(self.harness.charm._container_name)
100 | .services[self.harness.charm._service_name]
101 | .command
102 | )
103 | self.assertIn("--cluster.peer=", command)
104 |
105 | def test_charm_blocks_on_connection_error(self, *_):
106 | self.assertIsInstance(self.harness.charm.unit.status, ActiveStatus)
107 | self.harness.set_can_connect(CONTAINER_NAME, False)
108 | self.harness.update_config({"templates_file": "doesn't matter"})
109 | self.assertNotIsInstance(self.harness.charm.unit.status, ActiveStatus)
110 |
111 |
112 | @patch("subprocess.run")
113 | class TestInvalidConfig(unittest.TestCase):
114 | """Feature: Charm must block when invalid config is provided.
115 |
116 | Background: alertmanager exits when config is invalid, so this must be guarded against,
117 | otherwise pebble will keep trying to restart it, resulting in an idle crash-loop.
118 | """
119 |
120 | def setUp(self):
121 | self.harness = Harness(AlertmanagerCharm)
122 | self.addCleanup(self.harness.cleanup)
123 |
124 | self.harness.handle_exec("alertmanager", ["update-ca-certificates", "--fresh"], result="")
125 |
126 | @k8s_resource_multipatch
127 | @patch("lightkube.core.client.GenericSyncClient")
128 | @patch.object(WorkloadManager, "_alertmanager_version", property(lambda *_: "0.0.0"))
129 | def test_charm_blocks_on_invalid_config_on_startup(self, *_):
130 | # GIVEN an invalid config file
131 | self.harness.update_config({"config_file": "templates: [wrong]"})
132 |
133 | # WHEN the charm starts
134 | self.harness.begin_with_initial_hooks()
135 |
136 | # THEN the charm goes into blocked status
137 | self.assertIsInstance(self.harness.charm.unit.status, BlockedStatus)
138 |
139 | @k8s_resource_multipatch
140 | @patch("lightkube.core.client.GenericSyncClient")
141 | @patch.object(WorkloadManager, "_alertmanager_version", property(lambda *_: "0.0.0"))
142 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("0.0.0", ""))
143 | def test_charm_blocks_on_invalid_config_changed(self, *_):
144 | # GIVEN a valid configuration
145 | self.harness.update_config({"config_file": "templates: []"})
146 |
147 | # WHEN the charm starts
148 | self.harness.begin_with_initial_hooks()
149 |
150 | # THEN the charm goes into active status
151 | self.assertIsInstance(self.harness.charm.unit.status, ActiveStatus)
152 |
153 | # AND WHEN the config is updated and invalid (mocked below)
154 | self.harness.update_config({"config_file": "templates: [wrong]"})
155 |
156 | # THEN the charm goes into blocked status
157 | self.assertIsInstance(self.harness.charm.unit.status, BlockedStatus)
158 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to alertmanager-k8s
2 | 
3 | 
4 | 
5 | 
6 |   
7 |
8 | ## Overview
9 |
10 | This documents explains the processes and practices recommended for
11 | contributing enhancements or bug fixing to the Alertmanager Charmed Operator.
12 |
13 | The intended use case of this operator is to be deployed as part of the
14 | [COS Lite] bundle, although that is not necessary.
15 |
16 |
17 | ## Setup
18 |
19 | A typical setup using [snaps](https://snapcraft.io/) can be found in the
20 | [Juju docs](https://juju.is/docs/sdk/dev-setup).
21 |
22 |
23 | ## Developing
24 |
25 | - Prior to getting started on a pull request, we first encourage you to open an
26 | issue explaining the use case or bug.
27 | This gives other contributors a chance to weigh in early in the process.
28 | - To author PRs you should be familiar with [juju](https://juju.is/#what-is-juju)
29 | and [how operators are written](https://juju.is/docs/sdk).
30 | - The best way to get a head start is to join the conversation on our
31 | [Mattermost channel] or [Discourse].
32 | - All enhancements require review before being merged. Besides the
33 | code quality and test coverage, the review will also take into
34 | account the resulting user experience for Juju administrators using
35 | this charm. To be able to merge you would have to rebase
36 | onto the `main` branch. We do this to avoid merge commits and to have a
37 | linear Git history.
38 | - We use [`tox`](https://tox.wiki/en/latest/#) to manage all virtualenvs for
39 | the development lifecycle.
40 |
41 |
42 | ### Testing
43 | Unit tests are written with the Operator Framework [test harness] and
44 | integration tests are written using [pytest-operator] and [python-libjuju].
45 |
46 | The default test environments - lint, static and unit - will run if you start
47 | `tox` without arguments.
48 |
49 | You can also manually run a specific test environment:
50 |
51 | ```shell
52 | tox -e fmt # update your code according to linting rules
53 | tox -e lint # code style
54 | tox -e static # static analysis
55 | tox -e unit # unit tests
56 | tox -e integration # integration tests
57 | tox -e integration-lma # integration tests for the lma-light bundle
58 | ```
59 |
60 | `tox` creates a virtual environment for every tox environment defined in
61 | [tox.ini](tox.ini). To activate a tox environment for manual testing,
62 |
63 | ```shell
64 | source .tox/unit/bin/activate
65 | ```
66 |
67 |
68 | #### Manual testing
69 | Alerts can be created using
70 | [`amtool`](https://manpages.debian.org/testing/prometheus-alertmanager/amtool.1.en.html),
71 |
72 | ```shell
73 | amtool alert add alertname=oops service="my-service" severity=warning \
74 | instance="oops.example.net" --annotation=summary="High latency is high!" \
75 | --generator-url="http://prometheus.int.example.net"
76 | ```
77 |
78 | or using [Alertmanager's HTTP API][Alertmanager API browser],
79 | [for example](https://gist.github.com/cherti/61ec48deaaab7d288c9fcf17e700853a):
80 |
81 | ```shell
82 | alertmanager_ip=$(juju status alertmanager/0 --format=json | \
83 | jq -r ".applications.alertmanager.units.\"alertmanager/0\".address")
84 |
85 | curl -XPOST http://$alertmanager_ip:9093/api/v1/alerts -d "[{
86 | \"status\": \"firing\",
87 | \"labels\": {
88 | \"alertname\": \"$name\",
89 | \"service\": \"my-service\",
90 | \"severity\":\"warning\",
91 | \"instance\": \"$name.example.net\"
92 | },
93 | \"annotations\": {
94 | \"summary\": \"High latency is high!\"
95 | },
96 | \"generatorURL\": \"http://prometheus.int.example.net\"
97 | }]"
98 | ```
99 |
100 | The alert should then be listed,
101 |
102 | ```shell
103 | curl http://$alertmanager_ip:9093/api/v1/alerts
104 | ```
105 |
106 | and visible on a karma dashboard, if configured.
107 |
108 | Relations between alertmanager and prometheus can be verified by
109 | [querying prometheus](https://prometheus.io/docs/prometheus/latest/querying/api/#alertmanagers)
110 | for active alertmanagers:
111 |
112 | ```shell
113 | curl -X GET "http://$prom_ip:9090/api/v1/alertmanagers"
114 | ```
115 |
116 | ## Build charm
117 |
118 | Build the charm in this git repository using
119 |
120 | ```shell
121 | charmcraft pack
122 | ```
123 |
124 | which will create a `*.charm` file you can deploy with:
125 |
126 | ```shell
127 | juju deploy ./alertmanager-k8s.charm \
128 | --resource alertmanager-image=ubuntu/prometheus-alertmanager \
129 | --config config_file='@path/to/alertmanager.yml' \
130 | --config templates_file='@path/to/templates.tmpl'
131 | ```
132 |
133 |
134 | ## Code overview
135 | - The main charm class is `AlertmanagerCharm`, which responds to config changes
136 | (via `ConfigChangedEvent`) and cluster changes (via `RelationJoinedEvent`,
137 | `RelationChangedEvent` and `RelationDepartedEvent`).
138 | - All lifecycle events call a common hook, `_common_exit_hook` after executing
139 | their own business logic. This pattern simplifies state tracking and improves
140 | consistency.
141 | - On startup, the charm waits for `PebbleReadyEvent` and for an IP address to
142 | become available before starting the karma service and declaring
143 | `ActiveStatus`. The charm must be related to an alertmanager instance,
144 | otherwise the charm will go into blocked state.
145 |
146 | ## Design choices
147 | - The `alertmanager.yml` config file is created in its entirety by the charm
148 | code on startup (the default `alertmanager.yml` is overwritten). This is done
149 | to maintain consistency across OCI images.
150 | - Hot reload via the alertmanager HTTP API is used whenever possible instead of
151 | service restart, to minimize downtime.
152 |
153 |
154 | [Alertmanager API browser]: https://petstore.swagger.io/?url=https://raw.githubusercontent.com/prometheus/alertmanager/main/api/v2/openapi.yaml
155 | [gh:Prometheus operator]: https://github.com/canonical/prometheus-operator
156 | [Prometheus operator]: https://charmhub.io/prometheus-k8s
157 | [COS Lite]: https://charmhub.io/cos-lite
158 | [Mattermost channel]: https://chat.charmhub.io/charmhub/channels/observability
159 | [Discourse]: https://discourse.charmhub.io/tag/alertmanager
160 | [test harness]: https://ops.readthedocs.io/en/latest/#module-ops.testing
161 | [pytest-operator]: https://github.com/charmed-kubernetes/pytest-operator/blob/main/docs/reference.md
162 | [python-libjuju]: https://pythonlibjuju.readthedocs.io/en/latest/
163 |
--------------------------------------------------------------------------------
/tests/unit/test_consumer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2021 Canonical Ltd.
3 | # See LICENSE file for licensing details.
4 |
5 | import textwrap
6 | import unittest
7 |
8 | import ops
9 | from charms.alertmanager_k8s.v1.alertmanager_dispatch import AlertmanagerConsumer
10 | from ops.charm import CharmBase
11 | from ops.framework import StoredState
12 | from ops.testing import Harness
13 |
14 | ops.testing.SIMULATE_CAN_CONNECT = True # pyright: ignore
15 |
16 |
17 | class SampleConsumerCharm(CharmBase):
18 | """Mimic bare functionality of AlertmanagerCharm needed to test the consumer."""
19 |
20 | # define custom metadata - without this the harness would parse the metadata.yaml in this repo,
21 | # which would result in expressions like self.harness.model.app.name to return
22 | # "alertmanager-k8s", which is not what we want in a consumer test
23 | metadata_yaml = textwrap.dedent(
24 | """
25 | name: SampleConsumerCharm
26 | containers:
27 | consumer-charm:
28 | resource: consumer-charm-image
29 | resources:
30 | consumer-charm-image:
31 | type: oci-image
32 | requires:
33 | alerting:
34 | interface: alertmanager_dispatch
35 | peers:
36 | replicas:
37 | interface: consumer_charm_replica
38 | """
39 | )
40 | _stored = StoredState()
41 |
42 | def __init__(self, *args, **kwargs):
43 | super().__init__(*args)
44 | # relation name must match metadata
45 | self.alertmanager_lib = AlertmanagerConsumer(self, relation_name="alerting")
46 |
47 | self.framework.observe(
48 | self.alertmanager_lib.on.cluster_changed, self._on_alertmanager_cluster_changed
49 | )
50 |
51 | self._stored.set_default(alertmanagers=[], cluster_changed_emitted=0)
52 |
53 | def _on_alertmanager_cluster_changed(self, _):
54 | self._stored.cluster_changed_emitted += 1
55 | self._stored.alertmanagers = self.alertmanager_lib.get_cluster_info()
56 |
57 |
58 | class TestConsumer(unittest.TestCase):
59 | def setUp(self):
60 | self.harness = Harness(SampleConsumerCharm, meta=SampleConsumerCharm.metadata_yaml)
61 | self.addCleanup(self.harness.cleanup)
62 | self.harness.set_leader(True)
63 | self.harness.begin_with_initial_hooks()
64 |
65 | def _relate_to_alertmanager(self) -> int:
66 | """Create relation between 'this app' and a hypothetical (remote) alertmanager."""
67 | rel_id = self.harness.add_relation(relation_name="alerting", remote_app="am")
68 | return rel_id
69 |
70 | def _add_alertmanager_units(self, rel_id: int, num_units: int, start_with=0):
71 | for i in range(start_with, start_with + num_units):
72 | remote_unit_name = f"am/{i}"
73 | self.harness.add_relation_unit(rel_id, remote_unit_name)
74 | self.harness.update_relation_data(
75 | rel_id, remote_unit_name, {"public_address": f"10.20.30.{i}"}
76 | )
77 |
78 | return rel_id
79 |
80 | def test_cluster_updated_after_alertmanager_units_join(self):
81 | # before
82 | self.assertEqual(set(), self.harness.charm.alertmanager_lib.get_cluster_info())
83 | num_events = self.harness.charm._stored.cluster_changed_emitted
84 |
85 | # add relation
86 | rel_id = self._relate_to_alertmanager()
87 | self._add_alertmanager_units(rel_id, num_units=2)
88 |
89 | # after
90 | self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
91 | self.assertSetEqual(
92 | {"http://10.20.30.0", "http://10.20.30.1"},
93 | self.harness.charm.alertmanager_lib.get_cluster_info(),
94 | )
95 |
96 | num_events = self.harness.charm._stored.cluster_changed_emitted
97 |
98 | # add another unit
99 | self._add_alertmanager_units(rel_id, num_units=1, start_with=2)
100 | self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
101 | self.assertSetEqual(
102 | {"http://10.20.30.0", "http://10.20.30.1", "http://10.20.30.2"},
103 | self.harness.charm.alertmanager_lib.get_cluster_info(),
104 | )
105 |
106 | def test_cluster_updated_after_alertmanager_unit_leaves(self):
107 | num_events = self.harness.charm._stored.cluster_changed_emitted
108 |
109 | # add relation
110 | rel_id = self._relate_to_alertmanager()
111 | self._add_alertmanager_units(rel_id, num_units=4)
112 | self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
113 | before = self.harness.charm.alertmanager_lib.get_cluster_info()
114 | self.assertEqual(len(before), 4)
115 |
116 | num_events = self.harness.charm._stored.cluster_changed_emitted
117 |
118 | # remove alertmanager units
119 | self.harness.remove_relation_unit(rel_id, "am/3")
120 | self.harness.remove_relation_unit(rel_id, "am/2")
121 | self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
122 | after = self.harness.charm.alertmanager_lib.get_cluster_info()
123 | self.assertSetEqual(after, {"http://10.20.30.0", "http://10.20.30.1"})
124 |
125 | num_events = self.harness.charm._stored.cluster_changed_emitted
126 |
127 | # remove all remaining units
128 | self.harness.remove_relation_unit(rel_id, "am/1")
129 | self.harness.remove_relation_unit(rel_id, "am/0")
130 | self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
131 | after = self.harness.charm.alertmanager_lib.get_cluster_info()
132 | self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
133 | self.assertSetEqual(after, set())
134 |
135 | def test_cluster_is_empty_after_relation_breaks(self):
136 | # add relation
137 | rel_id = self._relate_to_alertmanager()
138 | self._add_alertmanager_units(rel_id, num_units=4)
139 | before = self.harness.charm.alertmanager_lib.get_cluster_info()
140 | self.assertEqual(len(before), 4)
141 |
142 | num_events = self.harness.charm._stored.cluster_changed_emitted
143 |
144 | # remove relation
145 | self.harness.remove_relation(rel_id)
146 | after = self.harness.charm.alertmanager_lib.get_cluster_info()
147 | self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
148 | self.assertSetEqual(set(), after)
149 |
150 | def test_relation_changed(self):
151 | # add relation
152 | rel_id = self._relate_to_alertmanager()
153 | self._add_alertmanager_units(rel_id, num_units=2)
154 |
155 | # update remote unit's relation data (emulates upgrade-charm)
156 | self.harness.update_relation_data(rel_id, "am/1", {"public_address": "90.80.70.60"})
157 | self.assertSetEqual(
158 | {"http://10.20.30.0", "http://90.80.70.60"},
159 | self.harness.charm.alertmanager_lib.get_cluster_info(),
160 | )
161 |
--------------------------------------------------------------------------------
/tests/unit/test_remote_configuration_provider.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Canonical Ltd.
2 | # See LICENSE file for licensing details.
3 |
4 | import json
5 | import logging
6 | import unittest
7 | from unittest.mock import PropertyMock, patch
8 |
9 | import yaml
10 | from charms.alertmanager_k8s.v0.alertmanager_remote_configuration import (
11 | DEFAULT_RELATION_NAME,
12 | ConfigReadError,
13 | RemoteConfigurationProvider,
14 | )
15 | from ops import testing
16 | from ops.charm import CharmBase, CharmEvents
17 | from ops.framework import EventBase, EventSource, StoredState
18 |
19 | logger = logging.getLogger(__name__)
20 |
21 | testing.SIMULATE_CAN_CONNECT = True # pyright: ignore
22 |
23 | TEST_APP_NAME = "provider-tester"
24 | METADATA = f"""
25 | name: {TEST_APP_NAME}
26 | provides:
27 | {DEFAULT_RELATION_NAME}:
28 | interface: alertmanager_remote_configuration
29 | """
30 | TEST_ALERTMANAGER_CONFIG_WITHOUT_TEMPLATES_FILE_PATH = "./tests/unit/test_config/alertmanager.yml"
31 | TEST_ALERTMANAGER_CONFIG_WITH_TEMPLATES_FILE_PATH = (
32 | "./tests/unit/test_config/alertmanager_with_templates.yml"
33 | )
34 | TEST_ALERTMANAGER_INVALID_CONFIG_FILE_PATH = "./tests/unit/test_config/alertmanager_invalid.yml"
35 | TEST_ALERTMANAGER_TEMPLATES_FILE_PATH = "./tests/unit/test_config/test_templates.tmpl"
36 | TESTER_CHARM = "test_remote_configuration_provider.RemoteConfigurationProviderCharm"
37 |
38 |
39 | class AlertmanagerConfigFileChangedEvent(EventBase):
40 | pass
41 |
42 |
43 | class AlertmanagerConfigFileChangedCharmEvents(CharmEvents):
44 | alertmanager_config_file_changed = EventSource(AlertmanagerConfigFileChangedEvent)
45 |
46 |
47 | class RemoteConfigurationProviderCharm(CharmBase):
48 | ALERTMANAGER_CONFIG_FILE = TEST_ALERTMANAGER_CONFIG_WITHOUT_TEMPLATES_FILE_PATH
49 |
50 | on = AlertmanagerConfigFileChangedCharmEvents() # pyright: ignore
51 | _stored = StoredState()
52 |
53 | def __init__(self, *args):
54 | super().__init__(*args)
55 | self._stored.set_default(configuration_broken_emitted=0)
56 |
57 | alertmanager_config = RemoteConfigurationProvider.load_config_file(
58 | self.ALERTMANAGER_CONFIG_FILE
59 | )
60 | self.remote_configuration_provider = RemoteConfigurationProvider(
61 | charm=self,
62 | alertmanager_config=alertmanager_config,
63 | relation_name=DEFAULT_RELATION_NAME,
64 | )
65 |
66 | self.framework.observe(self.on.alertmanager_config_file_changed, self._update_config)
67 | self.framework.observe(
68 | self.remote_configuration_provider.on.configuration_broken,
69 | self._on_configuration_broken,
70 | )
71 |
72 | def _update_config(self, _):
73 | try:
74 | alertmanager_config = RemoteConfigurationProvider.load_config_file(
75 | self.ALERTMANAGER_CONFIG_FILE
76 | )
77 | self.remote_configuration_provider.update_relation_data_bag(alertmanager_config)
78 | except ConfigReadError:
79 | logger.warning("Error reading Alertmanager config file.")
80 |
81 | def _on_configuration_broken(self, _):
82 | self._stored.configuration_broken_emitted += 1
83 |
84 |
85 | class TestAlertmanagerRemoteConfigurationProvider(unittest.TestCase):
86 | def setUp(self) -> None:
87 | self.harness = testing.Harness(RemoteConfigurationProviderCharm, meta=METADATA)
88 | self.addCleanup(self.harness.cleanup)
89 | self.harness.set_leader(True)
90 | self.harness.begin_with_initial_hooks()
91 |
92 | def test_config_without_templates_updates_only_alertmanager_config_in_the_data_bag(self):
93 | with open(TEST_ALERTMANAGER_CONFIG_WITHOUT_TEMPLATES_FILE_PATH, "r") as config_yaml:
94 | expected_config = yaml.safe_load(config_yaml)
95 |
96 | relation_id = self.harness.add_relation(DEFAULT_RELATION_NAME, "requirer")
97 | self.harness.add_relation_unit(relation_id, "requirer/0")
98 |
99 | self.assertEqual(
100 | json.loads(
101 | self.harness.get_relation_data(relation_id, TEST_APP_NAME)["alertmanager_config"]
102 | ),
103 | expected_config,
104 | )
105 | self.assertEqual(
106 | json.loads(
107 | self.harness.get_relation_data(relation_id, TEST_APP_NAME)[
108 | "alertmanager_templates"
109 | ]
110 | ),
111 | [],
112 | )
113 |
114 | @patch(f"{TESTER_CHARM}.ALERTMANAGER_CONFIG_FILE", new_callable=PropertyMock)
115 | def test_config_with_templates_updates_both_alertmanager_config_and_alertmanager_templates_in_the_data_bag( # noqa: E501
116 | self, patched_alertmanager_config_file
117 | ):
118 | patched_alertmanager_config_file.return_value = (
119 | TEST_ALERTMANAGER_CONFIG_WITH_TEMPLATES_FILE_PATH
120 | )
121 | with open(TEST_ALERTMANAGER_TEMPLATES_FILE_PATH, "r") as templates_file:
122 | expected_templates = templates_file.readlines()
123 | relation_id = self.harness.add_relation(DEFAULT_RELATION_NAME, "requirer")
124 | self.harness.add_relation_unit(relation_id, "requirer/0")
125 |
126 | self.harness.charm.on.alertmanager_config_file_changed.emit()
127 |
128 | self.assertEqual(
129 | json.loads(
130 | self.harness.get_relation_data(relation_id, TEST_APP_NAME)[
131 | "alertmanager_templates"
132 | ]
133 | ),
134 | expected_templates,
135 | )
136 |
137 | @patch(f"{TESTER_CHARM}.ALERTMANAGER_CONFIG_FILE", new_callable=PropertyMock)
138 | def test_invalid_config_emits_remote_configuration_broken_event(
139 | self, patched_alertmanager_config_file
140 | ):
141 | num_events = self.harness.charm._stored.configuration_broken_emitted
142 | patched_alertmanager_config_file.return_value = TEST_ALERTMANAGER_INVALID_CONFIG_FILE_PATH
143 | relation_id = self.harness.add_relation(DEFAULT_RELATION_NAME, "requirer")
144 | self.harness.add_relation_unit(relation_id, "requirer/0")
145 |
146 | self.harness.charm.on.alertmanager_config_file_changed.emit()
147 |
148 | self.assertGreater(
149 | self.harness.charm._stored.configuration_broken_emitted,
150 | num_events,
151 | )
152 |
153 | @patch(f"{TESTER_CHARM}.ALERTMANAGER_CONFIG_FILE", new_callable=PropertyMock)
154 | def test_invalid_config_clears_relation_data_bag(self, patched_alertmanager_config_file):
155 | patched_alertmanager_config_file.return_value = TEST_ALERTMANAGER_INVALID_CONFIG_FILE_PATH
156 | relation_id = self.harness.add_relation(DEFAULT_RELATION_NAME, "requirer")
157 | self.harness.add_relation_unit(relation_id, "requirer/0")
158 |
159 | self.harness.charm.on.alertmanager_config_file_changed.emit()
160 |
161 | with self.assertRaises(KeyError):
162 | _ = self.harness.get_relation_data(relation_id, TEST_APP_NAME)["alertmanager_config"]
163 |
164 | @patch(f"{TESTER_CHARM}.ALERTMANAGER_CONFIG_FILE", new_callable=PropertyMock)
165 | def test_empty_config_file_clears_relation_data_bag(self, patched_alertmanager_config_file):
166 | test_config_file = "./tests/unit/test_config/alertmanager_empty.yml"
167 | patched_alertmanager_config_file.return_value = test_config_file
168 | relation_id = self.harness.add_relation(DEFAULT_RELATION_NAME, "requirer")
169 | self.harness.add_relation_unit(relation_id, "requirer/0")
170 |
171 | self.harness.charm.on.alertmanager_config_file_changed.emit()
172 |
173 | with self.assertRaises(KeyError):
174 | _ = self.harness.get_relation_data(relation_id, TEST_APP_NAME)["alertmanager_config"]
175 |
--------------------------------------------------------------------------------
/tests/integration/helpers.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Canonical Ltd.
2 | # See LICENSE file for licensing details.
3 |
4 | """Helper functions for writing tests."""
5 |
6 | import asyncio
7 | import grp
8 | import json
9 | import logging
10 | import urllib.request
11 | from typing import Dict, Optional, Tuple
12 | from urllib.parse import urlparse
13 |
14 | import requests
15 | from juju.unit import Unit
16 | from pytest_operator.plugin import OpsTest
17 | from requests.auth import HTTPBasicAuth
18 |
19 | logger = logging.getLogger(__name__)
20 |
21 |
22 | async def get_unit_address(ops_test: OpsTest, app_name: str, unit_num: int) -> str:
23 | """Get private address of a unit."""
24 | assert ops_test.model
25 | status = await ops_test.model.get_status() # noqa: F821
26 | return status["applications"][app_name]["units"][f"{app_name}/{unit_num}"]["address"]
27 |
28 |
29 | def interleave(l1: list, l2: list) -> list:
30 | """Interleave two lists.
31 |
32 | >>> interleave([1,2,3], ['a', 'b', 'c'])
33 | [1, 'a', 2, 'b', 3, 'c']
34 |
35 | Reference: https://stackoverflow.com/a/11125298/3516684
36 | """
37 | return [x for t in zip(l1, l2) for x in t]
38 |
39 |
40 | async def cli_upgrade_from_path_and_wait(
41 | ops_test: OpsTest,
42 | path: str,
43 | alias: str,
44 | resources: Optional[Dict[str, str]] = None,
45 | wait_for_status: Optional[str] = None,
46 | ):
47 | assert ops_test.model
48 | if resources is None:
49 | resources = {}
50 |
51 | resource_pairs = [f"{k}={v}" for k, v in resources.items()]
52 | resource_arg_prefixes = ["--resource"] * len(resource_pairs)
53 | resource_args = interleave(resource_arg_prefixes, resource_pairs)
54 |
55 | cmd = [
56 | "juju",
57 | "refresh",
58 | "--path",
59 | path,
60 | alias,
61 | *resource_args,
62 | ]
63 |
64 | retcode, stdout, stderr = await ops_test.run(*cmd)
65 | assert retcode == 0, f"Upgrade failed: {(stderr or stdout).strip()}"
66 | logger.info(stdout)
67 | await ops_test.model.wait_for_idle(apps=[alias], status=wait_for_status, timeout=120)
68 |
69 |
70 | async def get_leader_unit_num(ops_test: OpsTest, app_name: str):
71 | assert ops_test.model
72 | application = ops_test.model.applications[app_name]
73 | assert application
74 | units = application.units
75 | is_leader = [await units[i].is_leader_from_status() for i in range(len(units))]
76 | logger.info("Leaders: %s", is_leader)
77 | return is_leader.index(True)
78 |
79 |
80 | async def is_leader_elected(ops_test: OpsTest, app_name: str):
81 | assert ops_test.model
82 | application = ops_test.model.applications[app_name]
83 | assert application
84 | units = application.units
85 | return any([await units[i].is_leader_from_status() for i in range(len(units))])
86 |
87 |
88 | async def block_until_leader_elected(ops_test: OpsTest, app_name: str):
89 | # await ops_test.model.block_until(is_leader_elected)
90 | # block_until does not take async (yet?) https://github.com/juju/python-libjuju/issues/609
91 | while not await is_leader_elected(ops_test, app_name):
92 | await asyncio.sleep(5)
93 |
94 |
95 | def uk8s_group() -> str:
96 | try:
97 | # Classically confined microk8s
98 | uk8s_group = grp.getgrnam("microk8s").gr_name
99 | except KeyError:
100 | # Strictly confined microk8s
101 | uk8s_group = "snap_microk8s"
102 | return uk8s_group
103 |
104 |
105 | async def is_alertmanage_unit_up(ops_test: OpsTest, app_name: str, unit_num: int):
106 | address = await get_unit_address(ops_test, app_name, unit_num)
107 | url = f"http://{address}:9093"
108 | logger.info("am public address: %s", url)
109 |
110 | response = urllib.request.urlopen(f"{url}/api/v2/status", data=None, timeout=2.0)
111 | return response.code == 200 and "versionInfo" in json.loads(response.read())
112 |
113 |
114 | async def is_alertmanager_up(ops_test: OpsTest, app_name: str):
115 | assert ops_test.model
116 | application = ops_test.model.applications[app_name]
117 | assert application
118 | return all(
119 | [
120 | await is_alertmanage_unit_up(ops_test, app_name, unit_num)
121 | for unit_num in range(len(application.units))
122 | ]
123 | )
124 |
125 |
126 | async def get_alertmanager_config_from_file(
127 | ops_test: OpsTest, app_name: str, container_name: str, config_file_path: str
128 | ) -> Tuple[Optional[int], str, str]:
129 | rc, stdout, stderr = await ops_test.juju(
130 | "ssh", "--container", f"{container_name}", f"{app_name}/0", "cat", f"{config_file_path}"
131 | )
132 | return rc, stdout, stderr
133 |
134 |
135 | async def deploy_literal_bundle(ops_test: OpsTest, bundle: str):
136 | run_args = [
137 | "juju",
138 | "deploy",
139 | "--trust",
140 | "-m",
141 | ops_test.model_name,
142 | str(ops_test.render_bundle(bundle)),
143 | ]
144 |
145 | retcode, stdout, stderr = await ops_test.run(*run_args)
146 | assert retcode == 0, f"Deploy failed: {(stderr or stdout).strip()}"
147 | logger.info(stdout)
148 |
149 |
150 | async def curl(ops_test: OpsTest, *, cert_dir: str, cert_path: str, ip_addr: str, mock_url: str):
151 | p = urlparse(mock_url)
152 |
153 | # Tell curl to resolve the mock url as traefik's IP (to avoid using a custom DNS
154 | # server). This is needed because the certificate issued by the CA would have that same
155 | # hostname as the subject, and for TLS to succeed, the target url's hostname must match
156 | # the one in the certificate.
157 | cmd = [
158 | "curl",
159 | "-s",
160 | "--fail-with-body",
161 | "--resolve",
162 | f"{p.hostname}:{p.port or 443}:{ip_addr}",
163 | "--capath",
164 | str(cert_dir),
165 | "--cacert",
166 | str(cert_path),
167 | mock_url,
168 | ]
169 | logger.info("cURL command: '%s'", " ".join(cmd))
170 | rc, stdout, stderr = await ops_test.run(*cmd)
171 | logger.info("%s: %s", mock_url, (rc, stdout, stderr))
172 | assert rc == 0, (
173 | f"curl exited with rc={rc} for {mock_url}; "
174 | "non-zero return code means curl encountered a >= 400 HTTP code"
175 | )
176 | return stdout
177 |
178 | async def grafana_password(ops_test: OpsTest, app_name: str) -> str:
179 | """Get the admin password. Memoize it to reduce turnaround time.
180 |
181 | Args:
182 | ops_test: pytest-operator plugin
183 | app_name: string name of application
184 |
185 | Returns:
186 | admin password as a string
187 | """
188 | leader: Optional[Unit] = None
189 | for unit in ops_test.model.applications[app_name].units: # type: ignore
190 | is_leader = await unit.is_leader_from_status()
191 | if is_leader:
192 | leader = unit
193 | break
194 |
195 | assert leader
196 | action = await leader.run_action("get-admin-password")
197 | action = await action.wait()
198 | return action.results["admin-password"]
199 |
200 | async def grafana_datasources(ops_test: OpsTest, app_name: str) -> "list[dict]":
201 | """Get the datasources configured in Grafana.
202 |
203 | A sample response from Grafana's /api/datasources endpoint is a list of datasources, similar to below.
204 |
205 | [{"id":1,"uid":"ABC","orgId":1,"name":"",
206 | "type":"alertmanager","typeName":"Alertmanager",
207 | "typeLogoUrl":"public/app/plugins/datasource/alertmanager/img/logo.svg","access":"proxy",
208 | "url":"","user":"","database":"","basicAuth":false,"isDefault":false,
209 | "jsonData":{"implementation":"prometheus","timeout":300},"readOnly":true}}, ...]
210 |
211 | Args:
212 | ops_test: pytest-operator plugin
213 | app_name: string name of application
214 | Returns:
215 | number of datasources as an integer
216 | """
217 | address = await get_unit_address(ops_test, app_name, 0)
218 | url = f"http://{address}:3000/api/datasources"
219 |
220 | admin_password = await grafana_password(ops_test, app_name)
221 | response = requests.get(
222 | url,
223 | auth=HTTPBasicAuth("admin", admin_password),
224 | )
225 | response.raise_for_status()
226 | datasources = response.json()
227 | return datasources
228 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Alertmanager Operator (k8s)
2 | [](https://charmhub.io/alertmanager-k8s)
3 | [](https://github.com/canonical/alertmanager-k8s-operator/actions/workflows/release.yaml)
4 | [](https://discourse.charmhub.io)
5 |
6 | [Charmed Alertmanager (alertmanager-k8s)][Alertmanager operator] is a charm for
7 | [Alertmanager].
8 |
9 | The charm imposes configurable resource limits on the workload, can be readily
10 | integrated with alert sources such as [prometheus][Prometheus operator] or
11 | [loki][Loki operator], and comes with built-in alert rules and dashboards for
12 | self-monitoring.
13 |
14 | It is an essential part of the [COS Lite bundle].
15 |
16 |
17 | [Alertmanager]: https://prometheus.io/docs/alerting/latest/alertmanager/
18 | [COS Lite bundle]: https://charmhub.io/cos-lite
19 | [Loki operator]: https://charmhub.io/loki-k8s
20 | [Prometheus operator]: https://charmhub.io/prometheus-k8s
21 | [Alertmanager operator]: https://charmhub.io/alertmanager-k8s
22 |
23 |
24 | ## Getting started
25 |
26 | ### Basic deployment
27 |
28 | Once you have a controller and model ready, you can deploy alertmanager
29 | using the Juju CLI:
30 |
31 | ```shell
32 | juju deploy --channel=beta alertmanager-k8s
33 | ```
34 |
35 | The available [channels](https://snapcraft.io/docs/channels) are listed at the top
36 | of [the page](https://charmhub.io/alertmanager-k8s) and can also be retrieved with
37 | Charmcraft CLI:
38 |
39 | ```shell
40 | $ charmcraft status alertmanager-k8s
41 |
42 | Track Base Channel Version Revision Resources
43 | latest ubuntu 20.04 (amd64) stable - - -
44 | candidate - - -
45 | beta 9 9 alertmanager-image (r1)
46 | edge 9 9 alertmanager-image (r1)
47 | ```
48 |
49 | Once the Charmed Operator is deployed, the status can be checked by running:
50 |
51 | ```shell
52 | juju status --relations --storage --color
53 | ```
54 |
55 |
56 | ### Configuration
57 |
58 | In order to have alerts dispatched to your receiver(s) of choice,
59 | a [configuration file](https://www.prometheus.io/docs/alerting/latest/configuration/)
60 | must be provided to Alertmanager using the
61 | [`config_file`](https://charmhub.io/alertmanager-k8s/configure#config_file) option:
62 |
63 | ```shell
64 | juju config alertmanager-k8s \
65 | config_file='@path/to/alertmanager.yml'
66 | ```
67 |
68 | Note that if you use templates, you should use the `templates_file` config option
69 | instead of having a `templates` section in your `yaml` configuration file.
70 | (This is a slight deviation from the official alertmanager config spec.)
71 |
72 |
73 | Use the [`templates_file`](https://charmhub.io/alertmanager-k8s/configure#templates_file)
74 | option to push templates that are being used by the configuration file:
75 |
76 | ```shell
77 | juju config alertmanager-k8s \
78 | config_file='@path/to/alertmanager.yml' \
79 | templates_file='@path/to/templates.tmpl'
80 | ```
81 |
82 | All templates need to go into this single config option, instead of
83 | the 'templates' section of the main configuration file. The templates will be
84 | pushed to the workload container, and the configuration file will be updated
85 | accordingly.
86 |
87 | Refer to the
88 | [official templates documentation](https://prometheus.io/docs/alerting/latest/notification_examples/)
89 | for more details.
90 |
91 |
92 | To verify Alertmanager is using the expected configuration you can use the
93 | [`show-config`](https://charmhub.io/alertmanager-k8s/actions#show-config) action:
94 |
95 | ```shell
96 | juju run-action alertmanager-k8s/0 show-config --wait
97 | ```
98 |
99 |
100 | ### Dashboard and HTTP API
101 |
102 | The Alertmanager dashboard and
103 | [HTTP API](https://www.prometheus.io/docs/alerting/latest/management_api/)
104 | can be accessed at the default port (9093) on the Alertmanager IP address,
105 | which is determinable with a `juju status` command.
106 |
107 | To obtain the load-balanaced application IP,
108 |
109 | ```shell
110 | juju status alertmanager-k8s --format=json \
111 | | jq -r '.applications."alertmanager-k8s".address'
112 | ```
113 |
114 | Similarly, to obtain an individual unit's IP address:
115 |
116 | ```shell
117 | juju status alertmanager-k8s --format=json \
118 | | jq -r '.applications."alertmanager-k8s".units."alertmanager-k8s/0".address'
119 | ```
120 |
121 | So, if you navigate to these IPs you will get the Alertmanager dashboard:
122 |
123 | 
124 |
125 | ## Clustering
126 |
127 | ### Forming a cluster
128 |
129 | Alertmanager [supports clustering](https://www.prometheus.io/docs/alerting/latest/alertmanager/#high-availability)
130 | and all you need to do to create/update a cluster is to rescale the application. This can be done in two ways.
131 |
132 | Let's say we have one alertmanager unit running and we want to scale the deployment to three units.
133 |
134 | With `juju add-unit` we can achieve that using the `--num-units` argument and the number of units we want to add:
135 |
136 | ```shell
137 | juju add-unit alertmanager-k8s --num-units 2
138 | ```
139 |
140 | or using `juju scale-application` and the total number of units we want:
141 |
142 | ```shell
143 | juju scale-application alertmanager-k8s 3
144 | ```
145 |
146 | Regardless of which of the two options you use, `juju status --relations --color` will show you the status of the cluster.
147 |
148 |
149 | Internally, HA is achieved by providing each Alertmanager instance at least one IP address of another instance. The cluster would then auto-update with subsequent changes to the units present.
150 |
151 | ### Verification
152 |
153 |
154 |
155 | #### Pebble plan
156 | Cluster information is passed to Alertmanager via [`--cluster.peer` command line arguments](https://github.com/prometheus/alertmanager#high-availability). This can be verified by looking at the current pebble plan:
157 |
158 | ```shell
159 | > juju exec --unit alertmanager-k8s/0 -- \
160 | PEBBLE_SOCKET=/charm/containers/alertmanager/pebble.socket \
161 | pebble plan
162 |
163 | services:
164 | alertmanager:
165 | summary: alertmanager service
166 | startup: enabled
167 | override: replace
168 | command: alertmanager --config.file=/etc/alertmanager/alertmanager.yml --storage.path=/alertmanager --web.listen-address=:9093 --cluster.listen-address=0.0.0.0:9094 --cluster.peer=10.1.179.220:9094 --cluster.peer=10.1.179.221:9094
169 | ```
170 | #### HTTP API
171 | To manually verify a cluster is indeed formed, you can query the alertmanager HTTP API directly:
172 |
173 | ```shell
174 | > curl -s $ALERTMANAGER_IP:9093/api/v1/status \
175 | | jq '.data.clusterStatus.peers[].address'
176 | "10.1.179.220:9094"
177 | "10.1.179.221:9094"
178 | "10.1.179.217:9094"
179 | ```
180 |
181 |
182 | ## OCI Images
183 | This charm is published on Charmhub with alertmanager images from
184 | [ubuntu/prometheus-alertmanager], however, it should also work with the
185 | official [quay.io/prometheus/alertmanager].
186 |
187 | To try the charm with a different image you can use `juju refresh`. For example:
188 |
189 | ```shell
190 | juju refresh alertmanager-k8s \
191 | --resource alertmanager-image=quay.io/prometheus/alertmanager
192 | ```
193 |
194 |
195 | [ubuntu/prometheus-alertmanager]: https://hub.docker.com/r/ubuntu/prometheus-alertmanager
196 | [quay.io/prometheus/alertmanager]: https://quay.io/repository/prometheus/alertmanager?tab=tags
197 |
198 |
199 | ## Official alertmanager documentation
200 |
201 | For further details about Alertmanager configuration and usage, please refer to
202 | the [official Alertmanager documentation](https://www.prometheus.io/docs/alerting/latest/overview/).
203 |
204 |
205 | ## Additional Information
206 | - [Logging, Monitoring, and Alerting](https://discourse.ubuntu.com/t/logging-monitoring-and-alerting/19151) (LMA) -
207 | a tutorial for running Prometheus, Grafana and Alertmanager with LXD.
208 | - [Alertmanager README](https://github.com/prometheus/alertmanager)
209 | - [PromCon 2018: Life of an Alert](https://youtube.com/watch?v=PUdjca23Qa4)
210 |
--------------------------------------------------------------------------------
/lib/charms/catalogue_k8s/v1/catalogue.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Canonical Ltd.
2 | # See LICENSE file for licensing details.
3 |
4 | """Charm for providing services catalogues to bundles or sets of charms.
5 |
6 | This charm library contains two classes (CatalogueProvider and CatalogueConsumer) that handle
7 | both sides of the `catalogue` relation interface.
8 |
9 | ### CatalogueConsumer
10 |
11 | The Consumer allows sending catalogue items to a Catalogue charm.
12 |
13 | Adding it to your charm is very simple:
14 |
15 | ```
16 | from charms.catalogue_k8s.v1.catalogue import (
17 | CatalogueConsumer,
18 | CatalogueItem,
19 | )
20 |
21 | ...
22 | self.catalogue = CatalogueConsumer(
23 | charm=self,
24 | relation_name="catalogue", # optional
25 | item=CatalogueItem(
26 | name="myapp",
27 | url=myapp_url,
28 | icon="rainbow",
29 | description="This is a rainbow app!"
30 | )
31 | )
32 | ```
33 |
34 | The relevant events listeners are already registered by the CatalogueConsumer object.
35 |
36 | ### CatalogueProvider
37 |
38 | The Provider helps you receive catalogue items from other charms to display them however you like.
39 |
40 | To implement this in your charm:
41 |
42 | ```
43 | from charms.catalogue_k8s.v1.catalogue import CatalogueProvider
44 |
45 | ...
46 | self.catalogue = CatalogueProvider(
47 | charm=self,
48 | relation_name="catalogue", # optional
49 | )
50 | ```
51 |
52 |
53 | The relevant events listeners are already registered by the CatalogueProvider object.
54 | """
55 |
56 | import ipaddress
57 | import json
58 | import logging
59 | from typing import Dict, Optional
60 |
61 | from ops.charm import CharmBase
62 | from ops.framework import EventBase, EventSource, Object, ObjectEvents
63 |
64 | LIBID = "fa28b361293b46668bcd1f209ada6983"
65 | LIBAPI = 1
66 | LIBPATCH = 3
67 |
68 | DEFAULT_RELATION_NAME = "catalogue"
69 |
70 | logger = logging.getLogger(__name__)
71 |
72 |
73 | class CatalogueItem:
74 | """`CatalogueItem` represents an application entry sent to a catalogue.
75 |
76 | icon (str): An Iconify Material Design Icon (MDI) string.
77 | (See: https://icon-sets.iconify.design/mdi for more details).
78 | api_docs (str): A URL to the docs relevant to this item (upstream or otherwise).
79 | api_endpoints (dict): A dictionary containing API information, where:
80 | - The key is a description or name of the endpoint (e.g., "Alerts").
81 | - The value is the actual address of the endpoint (e.g., "'http://1.2.3.4:1234/api/v1/targets/metadata'").
82 | - Example for setting the api_endpoints attr:
83 | api_endpoints={"Alerts": f"{self.external_url}/api/v1/alerts"}
84 | """
85 |
86 | def __init__(self, name: str, url: str, icon: str, description: str = "", api_docs: str = "", api_endpoints: Optional[Dict[str,str]] = None):
87 | self.name = name
88 | self.url = url
89 | self.icon = icon
90 | self.description = description
91 | self.api_docs = api_docs
92 | self.api_endpoints = api_endpoints
93 |
94 |
95 | class CatalogueConsumer(Object):
96 | """`CatalogueConsumer` is used to send over a `CatalogueItem`."""
97 |
98 | def __init__(
99 | self,
100 | charm,
101 | relation_name: str = DEFAULT_RELATION_NAME,
102 | item: Optional[CatalogueItem] = None,
103 | ):
104 | super().__init__(charm, relation_name)
105 | self._charm = charm
106 | self._relation_name = relation_name
107 | self._item = item
108 |
109 | events = self._charm.on[self._relation_name]
110 | self.framework.observe(events.relation_joined, self._on_relation_changed)
111 | self.framework.observe(events.relation_broken, self._on_relation_changed)
112 | self.framework.observe(events.relation_changed, self._on_relation_changed)
113 | self.framework.observe(events.relation_departed, self._on_relation_changed)
114 | self.framework.observe(events.relation_created, self._on_relation_changed)
115 |
116 | def _on_relation_changed(self, _):
117 | self._update_relation_data()
118 |
119 | def _update_relation_data(self):
120 | if not self._charm.unit.is_leader():
121 | return
122 |
123 | if not self._item:
124 | return
125 |
126 | for relation in self._charm.model.relations[self._relation_name]:
127 | relation.data[self._charm.model.app]["name"] = self._item.name
128 | relation.data[self._charm.model.app]["description"] = self._item.description
129 | relation.data[self._charm.model.app]["url"] = self.unit_address(relation)
130 | relation.data[self._charm.model.app]["icon"] = self._item.icon
131 | relation.data[self._charm.model.app]["api_docs"] = self._item.api_docs
132 | relation.data[self._charm.model.app]["api_endpoints"] = json.dumps(self._item.api_endpoints)
133 |
134 | def update_item(self, item: CatalogueItem):
135 | """Update the catalogue item."""
136 | self._item = item
137 | self._update_relation_data()
138 |
139 | def unit_address(self, relation):
140 | """Return the unit address of the consumer, on which it is reachable.
141 |
142 | Requires ingress to be connected for it to be routable.
143 | """
144 | if self._item and self._item.url:
145 | return self._item.url
146 | return ""
147 |
148 | def _is_valid_unit_address(self, address: str) -> bool:
149 | """Validate a unit address.
150 |
151 | At present only IP address validation is supported, but
152 | this may be extended to DNS addresses also, as needed.
153 |
154 | Args:
155 | address: a string representing a unit address
156 |
157 | """
158 | try:
159 | _ = ipaddress.ip_address(address)
160 | except ValueError:
161 | return False
162 |
163 | return True
164 |
165 |
166 | class CatalogueItemsChangedEvent(EventBase):
167 | """Event emitted when the catalogue entries change."""
168 |
169 | def __init__(self, handle, items):
170 | super().__init__(handle)
171 | self.items = items
172 |
173 | def snapshot(self):
174 | """Save catalogue entries information."""
175 | return {"items": self.items}
176 |
177 | def restore(self, snapshot):
178 | """Restore catalogue entries information."""
179 | self.items = snapshot["items"]
180 |
181 |
182 | class CatalogueEvents(ObjectEvents):
183 | """Events raised by `CatalogueConsumer`."""
184 |
185 | items_changed = EventSource(CatalogueItemsChangedEvent)
186 |
187 |
188 | class CatalogueProvider(Object):
189 | """`CatalogueProvider` is the side of the relation that serves the actual service catalogue."""
190 |
191 | on = CatalogueEvents() # pyright: ignore
192 |
193 | def __init__(self, charm: CharmBase, relation_name: str = DEFAULT_RELATION_NAME):
194 | super().__init__(charm, relation_name)
195 | self._charm = charm
196 | self._relation_name = relation_name
197 | events = self._charm.on[self._relation_name]
198 | self.framework.observe(events.relation_changed, self._on_relation_changed)
199 | self.framework.observe(events.relation_joined, self._on_relation_changed)
200 | self.framework.observe(events.relation_departed, self._on_relation_changed)
201 | self.framework.observe(events.relation_broken, self._on_relation_broken)
202 |
203 | def _on_relation_broken(self, event):
204 | self.on.items_changed.emit(items=self.items) # pyright: ignore
205 |
206 | def _on_relation_changed(self, event):
207 | self.on.items_changed.emit(items=self.items) # pyright: ignore
208 |
209 | @property
210 | def items(self):
211 | """A list of apps sent over relation data."""
212 | return [
213 | {
214 | "name": relation.data[relation.app].get("name", ""),
215 | "url": relation.data[relation.app].get("url", ""),
216 | "icon": relation.data[relation.app].get("icon", ""),
217 | "description": relation.data[relation.app].get("description", ""),
218 | "api_docs": relation.data[relation.app].get("api_docs", ""),
219 | "api_endpoints": json.loads(relation.data[relation.app].get("api_endpoints", "{}")),
220 | }
221 | for relation in self._charm.model.relations[self._relation_name]
222 | if relation.app and relation.units
223 | ]
224 |
--------------------------------------------------------------------------------
/charmcraft.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Canonical Ltd.
2 | # See LICENSE file for licensing details.
3 | name: alertmanager-k8s
4 | type: charm
5 | summary: Alertmanager handles alerts sent by client applications.
6 | description: >
7 | Alertmanager handles alerts sent by client applications such as the Prometheus server.
8 | It takes care of deduplicating, grouping, and routing them to the correct receiver integrations
9 | such as email, PagerDuty, or OpsGenie. It also takes care of silencing and inhibition of alerts.
10 |
11 | links:
12 | documentation: https://discourse.charmhub.io/t/alertmanager-k8s-docs-index/5788
13 | website: https://charmhub.io/alertmanager-k8s
14 | source: https://github.com/canonical/alertmanager-k8s-operator
15 | issues: https://github.com/canonical/alertmanager-k8s-operator/issues
16 |
17 | assumes:
18 | - k8s-api
19 | - juju >= 3.6
20 |
21 | platforms:
22 | ubuntu@24.04:amd64:
23 |
24 | parts:
25 | charm:
26 | source: .
27 | plugin: uv
28 | build-packages: [git]
29 | build-snaps: [astral-uv]
30 | override-build: |
31 | craftctl default
32 | git describe --always > $CRAFT_PART_INSTALL/version
33 |
34 | containers:
35 | alertmanager: # container key used by pebble
36 | resource: alertmanager-image
37 | mounts:
38 | - storage: data
39 | # nflogs and silences files go here. With a mounted storage for silences, they persist
40 | # across container restarts.
41 | # This path is passed to alertmanager via the `--storage.path` cli argument.
42 | location: /alertmanager
43 |
44 | storage:
45 | data:
46 | type: filesystem
47 | description: >
48 | Storage path passed to alertmanager via --storage.path argument and used for nflog and silences snapshot
49 |
50 | provides:
51 | alerting:
52 | # The provider (alertmanager) adds the following key-value pair to the relation data bag of
53 | # every alertmanager unit:
54 | # "public_address": :
55 | interface: alertmanager_dispatch
56 | optional: true
57 | description: |
58 | Integrates with other charms to send notifications when alert rules are triggered.
59 | karma-dashboard:
60 | interface: karma_dashboard
61 | optional: true
62 | description: |
63 | Links an entire Alertmanager cluster to a Karma[1] dashboard.
64 | Scaling alertmanager would automatically cause karma to group alerts by cluster.
65 |
66 | [1] https://charmhub.io/karma-k8s
67 | self-metrics-endpoint:
68 | interface: prometheus_scrape
69 | optional: true
70 | description: |
71 | Exposes the Prometheus metrics endpoint providing telemetry about the Alertmanager instance.
72 | grafana-dashboard:
73 | interface: grafana_dashboard
74 | optional: true
75 | description: |
76 | Forwards the built-in Grafana dashboard(s) for monitoring Alertmanager.
77 | grafana-source:
78 | interface: grafana_datasource
79 | optional: true
80 | description: |
81 | Configures Grafana to be able to use this Alertmanager instance as a datasource.
82 | provide-cmr-mesh: # server-side-for-cmr-mesh
83 | interface: cross_model_mesh
84 | description: |
85 | Allow cross-model applications to make HTTP requests to alertmanager via the service mesh.
86 | This relation provides additional data required by the service mesh to create cross-model authorization policies
87 |
88 | Announce a subset of juju topology to the other side because a CMR obfuscates identity.
89 | Each pair of charm would need a separate relation of this kind, e.g. otelcol to loki and to prom.
90 |
91 | To make use of this relation, you also must have either the service-mesh relation in place (e.g. istio-beacon) or
92 | have the istio-beacon enroll the entire model (via its config option).
93 | (The service_mesh charm library manages both of these relations.)
94 |
95 | requires:
96 | ingress:
97 | interface: ingress
98 | optional: true
99 | limit: 1
100 | description: |
101 | Alertmanager typically needs a "per app" ingress, which is available in the traefik charm[1].
102 |
103 | [1] https://charmhub.io/traefik-k8s
104 | remote-configuration:
105 | interface: alertmanager_remote_configuration
106 | optional: true
107 | limit: 1
108 | catalogue:
109 | interface: catalogue
110 | optional: true
111 | description: Add Alertmanager as an item to a Catalogue charm.
112 | certificates:
113 | interface: tls-certificates
114 | optional: true
115 | limit: 1
116 | description: |
117 | Certificate and key files for the alertmanager server to use to authenticate to client.
118 | tracing:
119 | interface: tracing
120 | optional: true
121 | limit: 1
122 | description: |
123 | Enables sending workload traces to a distributed tracing backend such as Tempo.
124 | service-mesh:
125 | limit: 1
126 | interface: service_mesh
127 | description: |
128 | Subscribe this charm into a service mesh and create authorization policies.
129 | We forward to the beacon our authorization policies.
130 | The beacon sends the pod and service labels required by this charm to join the mesh.
131 | This relation is a pre-requisite for using the provide-cmr-mesh relation.
132 | require-cmr-mesh:
133 | # TODO: remove this relation when this is fixed:
134 | # https://github.com/canonical/istio-beacon-k8s-operator/issues/91
135 | interface: cross_model_mesh
136 | description: |
137 | Allow a cross-model application access to alertmanager via the service mesh.
138 | This relation provides additional data required by the service mesh to enforce cross-model authorization policies.
139 |
140 | peers:
141 | replicas:
142 | interface: alertmanager_replica
143 | # assumed network type: private
144 |
145 | resources:
146 | alertmanager-image:
147 | type: oci-image
148 | description: |
149 | OCI image for alertmanager. This charms makes the following assumptions about the image:
150 | - location of executable "alertmanager" is in the path
151 | - has `update-ca-certificates`
152 | upstream-source: ubuntu/alertmanager@sha256:368985dfd680291f1888cc339afa7a097981ccb33b3398598e18f0dda2027573 # renovate: oci-image tag: 0.28.0-24.04
153 |
154 | config:
155 | options:
156 | config_file:
157 | type: string
158 | default: ""
159 | description: >
160 | Alertmanager configuration file (yaml), with the exclusion of the templates section.
161 | To send the contents of a file to this configuration option, the symbol `@` must be used.
162 |
163 | Usage: `juju config alertmanager config_file=@alertmanager.yaml`
164 |
165 | For more information on configuring the Alertmanager, refer to:
166 | https://www.prometheus.io/docs/alerting/latest/configuration/
167 | templates_file:
168 | type: string
169 | default: ""
170 | description: >
171 | Alertmanager templates definition file. This is a slight deviation from the official
172 | alertmanager config spec. All templates need to go into this single config option, instead of
173 | the 'templates' section of the main configuration file. The templates will be pushed to the
174 | workload container, and the configuration file will be updated accordingly. Templates can't
175 | be used without `config_file`.
176 | Refer to https://prometheus.io/docs/alerting/latest/notification_examples/ for more details
177 | on templates.
178 | web_external_url:
179 | type: string
180 | default: ""
181 | description: |
182 | DEPRECATED. This config option is no longer used, in favor of "skipPrefix".
183 |
184 | The URL under which Alertmanager is externally reachable (for example, if
185 | Alertmanager is served via a manually configured ingress).
186 |
187 | This config option is used for the `--web.external-url` alertmanager cli
188 | argument. If this charm config option is provided, it takes precedence over the
189 | URL provided over the "ingress" relation.
190 |
191 | Note: this config option shouldn't be included when you're using the "ingress"
192 | relation (e.g. traefik) - the charm will automatically assign an external url
193 | to `--web.external-url` when related to an ingress provider.
194 |
195 | This should be a complete URI, including scheme, or a fully qualified subpath
196 | starting with `/`.
197 | If Alertmanager is being served directly from the root of a fully-qualified
198 | host or a bare A record, this may be omitted.
199 | If the URL has a path portion, Alertmanager will use it to prefix all HTTP
200 | endpoints.
201 | cpu:
202 | description: |
203 | K8s cpu resource limit, e.g. "1" or "500m". Default is unset (no limit). This value is used
204 | for the "limits" portion of the resource requirements (the "requests" portion is
205 | automatically deduced from it).
206 | See https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
207 | type: string
208 | memory:
209 | description: |
210 | K8s memory resource limit, e.g. "1Gi". Default is unset (no limit). This value is used
211 | for the "limits" portion of the resource requirements (the "requests" portion is
212 | automatically deduced from it).
213 | See https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
214 | type: string
215 |
216 | actions:
217 | show-config:
218 | description: Show alertmanager config file.
219 | check-config:
220 | description: |
221 | Run `amtool` inside the workload to validate the configuration file, and
222 | return the resulting output. This can be useful for troubleshooting.
223 |
224 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/tests/unit/test_charm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2021 Canonical Ltd.
3 | # See LICENSE file for licensing details.
4 | import unittest
5 | from unittest.mock import patch
6 |
7 | import ops
8 | import yaml
9 | from helpers import k8s_resource_multipatch
10 | from ops import pebble
11 | from ops.model import ActiveStatus, BlockedStatus
12 | from ops.testing import Harness
13 |
14 | from alertmanager import WorkloadManager
15 | from charm import AlertmanagerCharm
16 |
17 | ops.testing.SIMULATE_CAN_CONNECT = True # pyright: ignore
18 |
19 |
20 | class TestWithInitialHooks(unittest.TestCase):
21 | container_name: str = "alertmanager"
22 |
23 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
24 | @patch("socket.getfqdn", new=lambda *args: "fqdn")
25 | @k8s_resource_multipatch
26 | @patch("lightkube.core.client.GenericSyncClient")
27 | @patch.object(WorkloadManager, "_alertmanager_version", property(lambda *_: "0.0.0"))
28 | def setUp(self, *unused):
29 | self.harness = Harness(AlertmanagerCharm)
30 | self.addCleanup(self.harness.cleanup)
31 |
32 | self.relation_id = self.harness.add_relation("alerting", "otherapp")
33 | self.harness.add_relation_unit(self.relation_id, "otherapp/0")
34 | self.harness.set_leader(True)
35 |
36 | self.harness.begin_with_initial_hooks()
37 |
38 | def test_num_peers(self):
39 | self.assertEqual(0, len(self.harness.charm.peer_relation.units)) # type: ignore
40 |
41 | @patch("socket.getfqdn", new=lambda *args: "fqdn")
42 | def test_pebble_layer_added(self, *unused):
43 | plan = self.harness.get_container_pebble_plan(self.container_name)
44 |
45 | # Check we've got the plan as expected
46 | self.assertIsNotNone(plan.services)
47 | self.assertIsNotNone(service := plan.services.get(self.harness.charm._service_name))
48 | self.assertIsNotNone(command := service.command) # pyright: ignore
49 |
50 | # Check command is as expected
51 | self.assertEqual(
52 | plan.services, self.harness.charm.alertmanager_workload._alertmanager_layer().services
53 | )
54 |
55 | # Check command contains key arguments
56 | self.assertIn("--config.file", command)
57 | self.assertIn("--storage.path", command)
58 | self.assertIn("--web.listen-address", command)
59 | self.assertIn("--cluster.listen-address", command)
60 |
61 | # Check the service was started
62 | service = self.harness.model.unit.get_container("alertmanager").get_service("alertmanager")
63 | self.assertTrue(service.is_running())
64 |
65 | @patch("socket.getfqdn", new=lambda *args: "fqdn")
66 | def test_relation_data_provides_public_address(self):
67 | # to suppress mypy error: Item "None" of "Optional[Any]" has no attribute "get_relation"
68 | model = self.harness.charm.framework.model
69 | assert model is not None
70 |
71 | rel = model.get_relation("alerting", self.relation_id)
72 | assert rel is not None # for static checker
73 | expected_address = "fqdn:{}".format(self.harness.charm.api_port)
74 | expected_rel_data = {
75 | "url": "http://fqdn:9093",
76 | "public_address": expected_address,
77 | "scheme": "http",
78 | }
79 | self.assertEqual(expected_rel_data, rel.data[self.harness.charm.unit])
80 |
81 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
82 | @k8s_resource_multipatch
83 | @patch.object(AlertmanagerCharm, "_update_ca_certs", lambda *a, **kw: None)
84 | def test_topology_added_if_user_provided_config_without_group_by(self, *unused):
85 | new_config = yaml.dump({"not a real config": "but good enough for testing"})
86 | self.harness.update_config({"config_file": new_config})
87 | updated_config = yaml.safe_load(
88 | self.harness.charm.container.pull(self.harness.charm._config_path)
89 | )
90 |
91 | self.assertEqual(updated_config["not a real config"], "but good enough for testing")
92 | self.assertListEqual(
93 | sorted(updated_config["route"]["group_by"]),
94 | sorted(["juju_model", "juju_application", "juju_model_uuid"]),
95 | )
96 |
97 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
98 | @k8s_resource_multipatch
99 | @patch.object(AlertmanagerCharm, "_update_ca_certs", lambda *a, **kw: None)
100 | def test_topology_added_if_user_provided_config_with_group_by(self, *unused):
101 | new_config = yaml.dump({"route": {"group_by": ["alertname", "juju_model"]}})
102 | self.harness.update_config({"config_file": new_config})
103 | updated_config = yaml.safe_load(
104 | self.harness.charm.container.pull(self.harness.charm._config_path)
105 | )
106 |
107 | self.assertListEqual(
108 | sorted(updated_config["route"]["group_by"]),
109 | sorted(["alertname", "juju_model", "juju_application", "juju_model_uuid"]),
110 | )
111 |
112 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
113 | @k8s_resource_multipatch
114 | @patch.object(AlertmanagerCharm, "_update_ca_certs", lambda *a, **kw: None)
115 | def test_topology_is_not_added_if_user_provided_config_with_ellipsis(self, *unused):
116 | """The special value '...' effectively disables aggregation entirely.
117 |
118 | Ref: https://prometheus.io/docs/alerting/latest/configuration/#route
119 | """
120 | new_config = yaml.dump({"route": {"group_by": ["..."]}})
121 | self.harness.update_config({"config_file": new_config})
122 | updated_config = yaml.safe_load(
123 | self.harness.charm.container.pull(self.harness.charm._config_path)
124 | )
125 |
126 | self.assertListEqual(
127 | updated_config["route"]["group_by"],
128 | sorted(["..."]),
129 | )
130 |
131 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
132 | @k8s_resource_multipatch
133 | @patch.object(AlertmanagerCharm, "_update_ca_certs", lambda *a, **kw: None)
134 | def test_charm_blocks_if_user_provided_config_with_templates(self, *unused):
135 | new_config = yaml.dump({"templates": ["/what/ever/*.tmpl"]})
136 | self.harness.update_config({"config_file": new_config})
137 | self.assertIsInstance(self.harness.charm.unit.status, BlockedStatus)
138 |
139 | new_config = yaml.dump({})
140 | self.harness.update_config({"config_file": new_config})
141 | self.assertIsInstance(self.harness.charm.unit.status, ActiveStatus)
142 |
143 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
144 | @k8s_resource_multipatch
145 | @patch.object(AlertmanagerCharm, "_update_ca_certs", lambda *a, **kw: None)
146 | def test_templates_file_not_created_if_user_provides_templates_without_config(self, *unused):
147 | templates = '{{ define "some.tmpl.variable" }}whatever it is{{ end}}'
148 | self.harness.update_config({"templates_file": templates})
149 |
150 | # The testing harness's pull() used to raise FileNotFoundError, but
151 | # now it (correctly) raises pebble.PathError as per the real system,
152 | # so catch both.
153 | # TODO: update to just pebble.PathError when ops 2.1 is released.
154 | with self.assertRaises((pebble.PathError, FileNotFoundError)):
155 | self.harness.charm.container.pull(self.harness.charm._templates_path)
156 |
157 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
158 | @k8s_resource_multipatch
159 | @patch.object(AlertmanagerCharm, "_update_ca_certs", lambda *a, **kw: None)
160 | def test_templates_section_added_if_user_provided_templates(self, *unused):
161 | new_config = yaml.dump({"route": {"group_by": ["alertname", "juju_model"]}})
162 | self.harness.update_config({"config_file": new_config})
163 | templates = '{{ define "some.tmpl.variable" }}whatever it is{{ end}}'
164 | self.harness.update_config({"templates_file": templates})
165 | updated_templates = self.harness.charm.container.pull(self.harness.charm._templates_path)
166 | self.assertEqual(templates, updated_templates.read())
167 |
168 | updated_config = yaml.safe_load(
169 | self.harness.charm.container.pull(self.harness.charm._config_path)
170 | )
171 | self.assertEqual(updated_config["templates"], [f"{self.harness.charm._templates_path}"])
172 |
173 |
174 | class TestWithoutInitialHooks(unittest.TestCase):
175 | container_name: str = "alertmanager"
176 |
177 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
178 | @k8s_resource_multipatch
179 | @patch("lightkube.core.client.GenericSyncClient")
180 | def setUp(self, *unused):
181 | self.harness = Harness(AlertmanagerCharm)
182 | self.addCleanup(self.harness.cleanup)
183 |
184 | self.relation_id = self.harness.add_relation("alerting", "otherapp")
185 | self.harness.add_relation_unit(self.relation_id, "otherapp/0")
186 | self.harness.set_leader(True)
187 |
188 | self.harness.begin()
189 | self.harness.add_relation("replicas", "alertmanager")
190 |
191 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
192 | @k8s_resource_multipatch
193 | @patch.object(WorkloadManager, "_alertmanager_version", property(lambda *_: "0.0.0"))
194 | def test_unit_status_around_pebble_ready(self, *unused):
195 | # before pebble_ready, status should be "maintenance"
196 | self.assertIsInstance(self.harness.charm.unit.status, ops.model.MaintenanceStatus)
197 |
198 | # after pebble_ready, status should be "active"
199 | self.harness.container_pebble_ready(self.container_name)
200 | self.assertIsInstance(self.harness.charm.unit.status, ops.model.ActiveStatus)
201 |
202 | self.assertEqual(self.harness.model.unit.name, "alertmanager-k8s/0")
203 |
204 |
205 | class TestActions(unittest.TestCase):
206 | container_name: str = "alertmanager"
207 |
208 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
209 | @patch("socket.getfqdn", new=lambda *args: "fqdn")
210 | @k8s_resource_multipatch
211 | @patch("lightkube.core.client.GenericSyncClient")
212 | @patch.object(WorkloadManager, "_alertmanager_version", property(lambda *_: "0.0.0"))
213 | def setUp(self, *unused):
214 | self.harness = Harness(AlertmanagerCharm)
215 | self.addCleanup(self.harness.cleanup)
216 |
217 | self.harness.set_leader(True)
218 | self.harness.begin_with_initial_hooks()
219 |
220 | @patch.object(WorkloadManager, "check_config", lambda *a, **kw: ("ok", ""))
221 | @k8s_resource_multipatch
222 | @patch.object(WorkloadManager, "_alertmanager_version", property(lambda *_: "0.0.0"))
223 | def test_show_config(self, *_unused):
224 | tls_paths = {
225 | self.harness.charm._server_cert_path,
226 | self.harness.charm._ca_cert_path,
227 | self.harness.charm._key_path,
228 | }
229 |
230 | # GIVEN an isolated charm (see setUp, decorator)
231 | # WHEN the "show-config" action runs
232 | results = self.harness.run_action("show-config").results
233 |
234 | # THEN the result is a dict some keys
235 | self.assertEqual(results.keys(), {"path", "content", "configs"})
236 |
237 | # AND configs DOES NOT contain cert-related entries
238 | # results.configs is a list of dicts, [{"path": ..., "content": ...}, {...}, ...].
239 | paths_rendered = {d["path"] for d in yaml.safe_load(results["configs"])}
240 | for filepath in tls_paths:
241 | self.assertNotIn(filepath, paths_rendered)
242 |
243 | # AND GIVEN a tls relation is in place
244 | rel_id = self.harness.add_relation("certificates", "ca")
245 | self.harness.add_relation_unit(rel_id, "ca/0")
246 | # AND cert files are on disk
247 | for filepath in tls_paths:
248 | self.harness.model.unit.get_container("alertmanager").push(
249 | filepath, "test", make_dirs=True
250 | )
251 |
252 | # WHEN the "show-config" action runs
253 | results = self.harness.run_action("show-config").results
254 |
255 | # THEN the result is a dict with the same keys as before
256 | self.assertEqual(results.keys(), {"path", "content", "configs"})
257 |
258 | # AND configs contains cert-related entries
259 | paths_rendered = {d["path"] for d in yaml.safe_load(results["configs"])}
260 | for filepath in tls_paths:
261 | self.assertIn(filepath, paths_rendered)
262 |
--------------------------------------------------------------------------------
/src/alertmanager.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2023 Canonical Ltd.
3 | # See LICENSE file for licensing details.
4 |
5 | """Workload manager for alertmanaqger."""
6 |
7 | import logging
8 | import os
9 | import re
10 | from typing import Callable, Dict, List, Optional, Tuple
11 |
12 | from ops.framework import Object
13 | from ops.model import Container
14 | from ops.pebble import ( # type: ignore
15 | ChangeError,
16 | ExecError,
17 | Layer,
18 | )
19 |
20 | from alertmanager_client import Alertmanager, AlertmanagerBadResponse
21 |
22 | logger = logging.getLogger(__name__)
23 |
24 |
25 | class ConfigFileSystemState:
26 | """Class representing the configuration state in a filesystem."""
27 |
28 | def __init__(self, manifest: Optional[Dict[str, Optional[str]]] = None):
29 | self._manifest = manifest.copy() if manifest else {}
30 |
31 | @property
32 | def manifest(self) -> Dict[str, Optional[str]]:
33 | """Return a copy of the planned manifest."""
34 | return self._manifest.copy()
35 |
36 | def add_file(self, path: str, content: str):
37 | """Add a file to the configuration."""
38 | # `None` means it needs to be removed (if present). If paths changed across an upgrade,
39 | # to prevent stale files from remaining (if were previously written to persistent
40 | # storage), hard-code the old paths to None to guarantee their removal.
41 | self._manifest[path] = content
42 |
43 | def delete_file(self, path: str):
44 | """Add a file to the configuration."""
45 | self._manifest[path] = None
46 |
47 | def apply(self, container: Container):
48 | """Apply this manifest onto a container."""
49 | for filepath, content in self._manifest.items():
50 | if content is None:
51 | container.remove_path(filepath, recursive=True)
52 | else:
53 | container.push(filepath, content, make_dirs=True)
54 |
55 |
56 | class WorkloadManagerError(Exception):
57 | """Base class for exceptions raised by WorkloadManager."""
58 |
59 |
60 | class ConfigUpdateFailure(WorkloadManagerError):
61 | """Custom exception for failed config updates."""
62 |
63 |
64 | class ContainerNotReady(WorkloadManagerError):
65 | """Raised when an operation is run that presumes the container being ready.."""
66 |
67 |
68 | class WorkloadManager(Object):
69 | """Workload manager for alertmanager."""
70 |
71 | _layer_name = _service_name = _exe_name = "alertmanager"
72 |
73 | # path, inside the workload container for alertmanager data, e.g. 'nflogs', 'silences'.
74 | _storage_path = "/alertmanager"
75 |
76 | _amtool_path = "/usr/bin/amtool"
77 |
78 | def __init__(
79 | self,
80 | charm,
81 | *,
82 | container_name: str,
83 | peer_netlocs: List[str],
84 | api_port: int,
85 | ha_port: int,
86 | web_external_url: str,
87 | web_route_prefix: str,
88 | config_path: str,
89 | web_config_path: str,
90 | tls_enabled: Callable[[], bool],
91 | cafile: Optional[str],
92 | ):
93 | # Must inherit from ops 'Object' to be able to register events.
94 | super().__init__(charm, f"{self.__class__.__name__}-{container_name}")
95 |
96 | self._unit = charm.unit
97 |
98 | self._service_name = self._container_name = container_name
99 | self._container = charm.unit.get_container(container_name)
100 |
101 | self._peer_netlocs = peer_netlocs
102 |
103 | self._api_port = api_port
104 | self._ha_port = ha_port
105 | self.api = Alertmanager(endpoint_url=web_external_url, cafile=cafile)
106 | self._web_external_url = web_external_url
107 | self._web_route_prefix = web_route_prefix
108 | self._config_path = config_path
109 | self._web_config_path = web_config_path
110 | self._is_tls_enabled = tls_enabled
111 |
112 | # turn the container name to a valid Python identifier
113 | snake_case_container_name = self._container_name.replace("-", "_")
114 | charm.framework.observe(
115 | charm.on[snake_case_container_name].pebble_ready,
116 | self._on_pebble_ready,
117 | )
118 | charm.framework.observe(charm.on.stop, self._on_stop)
119 |
120 | @property
121 | def is_ready(self):
122 | """Is the workload ready to be interacted with?"""
123 | return self._container.can_connect()
124 |
125 | def _on_pebble_ready(self, _):
126 | if version := self._alertmanager_version:
127 | self._unit.set_workload_version(version)
128 | else:
129 | logger.debug(
130 | "Cannot set workload version at this time: could not get Alertmanager version."
131 | )
132 |
133 | def _on_stop(self, _):
134 | self._unit.set_workload_version("")
135 |
136 | @property
137 | def _alertmanager_version(self) -> Optional[str]:
138 | """Returns the version of Alertmanager.
139 |
140 | Returns:
141 | A string equal to the Alertmanager version.
142 | """
143 | if not self.is_ready:
144 | return None
145 | version_output, _ = self._container.exec(
146 | [self._exe_name, "--version"], timeout=30
147 | ).wait_output()
148 | # Output looks like this:
149 | # alertmanager, version 0.23.0 (branch: HEAD, ...
150 | result = re.search(r"version (\d*\.\d*\.\d*)", version_output)
151 | if result is None:
152 | return result
153 | return result.group(1)
154 |
155 | def check_config(self) -> Tuple[str, str]:
156 | """Check config with amtool.
157 |
158 | Returns stdout, stderr.
159 | """
160 | if not self.is_ready:
161 | raise ContainerNotReady(
162 | "cannot check config: alertmanager workload container not ready"
163 | )
164 | proc = self._container.exec(
165 | [self._amtool_path, "check-config", self._config_path], timeout=30
166 | )
167 | try:
168 | output, err = proc.wait_output()
169 | except ExecError as e:
170 | output, err = str(e.stdout), str(e.stderr)
171 | # let ChangeError raise
172 | return output, err
173 |
174 | def _alertmanager_layer(self) -> Layer:
175 | """Returns Pebble configuration layer for alertmanager."""
176 |
177 | def _command():
178 | """Returns full command line to start alertmanager."""
179 | # cluster listen netloc - empty string disables HA mode
180 | listen_netloc_arg = "" if len(self._peer_netlocs) == 0 else f"0.0.0.0:{self._ha_port}"
181 |
182 | # The chosen port in the cluster.listen-address flag is the port that needs to be
183 | # specified in the cluster.peer flag of the other peers.
184 | # Assuming all replicas use the same port.
185 | # Sorting for repeatability in comparing between service layers.
186 | peer_cmd_args = " ".join(
187 | sorted([f"--cluster.peer={netloc}" for netloc in self._peer_netlocs])
188 | )
189 | web_config_arg = (
190 | f"--web.config.file={self._web_config_path} " if self._is_tls_enabled() else ""
191 | )
192 | return (
193 | f"{self._exe_name} "
194 | f"--config.file={self._config_path} "
195 | f"--storage.path={self._storage_path} "
196 | f"--web.listen-address=:{self._api_port} "
197 | f"--cluster.listen-address={listen_netloc_arg} "
198 | f"--web.external-url={self._web_external_url} "
199 | f"--web.route-prefix={self._web_route_prefix} "
200 | f"{web_config_arg}"
201 | f"{peer_cmd_args}"
202 | )
203 |
204 | def _environment():
205 | return {
206 | "https_proxy": os.environ.get("JUJU_CHARM_HTTPS_PROXY", ""),
207 | "http_proxy": os.environ.get("JUJU_CHARM_HTTP_PROXY", ""),
208 | "no_proxy": os.environ.get("JUJU_CHARM_NO_PROXY", ""),
209 | }
210 |
211 | return Layer(
212 | {
213 | "summary": "alertmanager layer",
214 | "description": "pebble config layer for alertmanager",
215 | "services": {
216 | self._service_name: {
217 | "override": "replace",
218 | "summary": "alertmanager service",
219 | "command": _command(),
220 | "startup": "enabled",
221 | "environment": _environment(),
222 | }
223 | },
224 | }
225 | )
226 |
227 | def update_layer(self) -> None:
228 | """Update service layer to reflect changes in peers (replicas)."""
229 | if not self.is_ready:
230 | raise ContainerNotReady("cannot update layer")
231 |
232 | overlay = self._alertmanager_layer()
233 |
234 | self._container.add_layer(self._layer_name, overlay, combine=True)
235 | try:
236 | # If a config is invalid then alertmanager would exit immediately.
237 | # This would be caught by pebble (default timeout is 30 sec) and a ChangeError
238 | # would be raised.
239 | self._container.replan()
240 | except ChangeError as e:
241 | logger.error(
242 | "Failed to replan; pebble plan: %s; %s",
243 | self._container.get_plan().to_dict(),
244 | str(e),
245 | )
246 |
247 | def update_config(self, manifest: ConfigFileSystemState) -> None:
248 | """Update alertmanager config files to reflect changes in configuration.
249 |
250 | After pushing a new config, a hot-reload is attempted. If hot-reload fails, the service is
251 | restarted.
252 |
253 | Raises:
254 | ConfigUpdateFailure, if failed to update configuration file.
255 | """
256 | if not self.is_ready:
257 | raise ContainerNotReady("cannot update config")
258 |
259 | logger.debug("applying config changes")
260 | manifest.apply(self._container)
261 |
262 | # Validate with amtool and raise if bad
263 | try:
264 | self.check_config()
265 | except WorkloadManagerError as e:
266 | raise ConfigUpdateFailure("Failed to validate config (run check-config action)") from e
267 |
268 | def restart_service(self) -> bool:
269 | """Helper function for restarting the underlying service.
270 |
271 | Returns:
272 | True if restart succeeded; False otherwise.
273 | """
274 | logger.info("Restarting service %s", self._service_name)
275 |
276 | if not self.is_ready:
277 | logger.error("Cannot (re)start service: container is not ready.")
278 | return False
279 |
280 | # Check if service exists, to avoid ModelError from being raised when the service does
281 | # not exist,
282 | if not self._container.get_plan().services.get(self._service_name):
283 | logger.error("Cannot (re)start service: service does not (yet) exist.")
284 | return False
285 |
286 | self._container.restart(self._service_name)
287 |
288 | return True
289 |
290 | def reload(self) -> None:
291 | """Trigger a hot-reload of the configuration (or service restart).
292 |
293 | Raises:
294 | ConfigUpdateFailure, if the reload (or restart) fails.
295 | """
296 | if not self.is_ready:
297 | raise ContainerNotReady("cannot reload")
298 |
299 | # Obtain a "before" snapshot of the config from the server.
300 | # This is different from `config` above because alertmanager adds in a bunch of details
301 | # such as:
302 | #
303 | # smtp_hello: localhost
304 | # smtp_require_tls: true
305 | # pagerduty_url: https://events.pagerduty.com/v2/enqueue
306 | # opsgenie_api_url: https://api.opsgenie.com/
307 | # wechat_api_url: https://qyapi.weixin.qq.com/cgi-bin/
308 | # victorops_api_url: https://alert.victorops.com/integrations/generic/20131114/alert/
309 | #
310 | # The snapshot is needed to determine if reloading took place.
311 | try:
312 | config_from_server_before = self.api.config()
313 | except AlertmanagerBadResponse:
314 | config_from_server_before = None
315 |
316 | # Send an HTTP POST to alertmanager to hot-reload the config.
317 | # This reduces down-time compared to restarting the service.
318 | try:
319 | self.api.reload()
320 | except AlertmanagerBadResponse as e:
321 | logger.warning("config reload via HTTP POST failed: %s", str(e))
322 | # hot-reload failed so attempting a service restart
323 | if not self.restart_service():
324 | raise ConfigUpdateFailure(
325 | "Is config valid? hot reload and service restart failed."
326 | )
327 |
328 | # Obtain an "after" snapshot of the config from the server.
329 | try:
330 | config_from_server_after = self.api.config()
331 | except AlertmanagerBadResponse:
332 | config_from_server_after = None
333 |
334 | if config_from_server_before is None or config_from_server_after is None:
335 | logger.warning("cannot determine if reload succeeded")
336 | elif config_from_server_before == config_from_server_after:
337 | logger.warning("config remained the same after a reload")
338 |
--------------------------------------------------------------------------------