├── .jujuignore
├── CODEOWNERS
├── .gitignore
├── requirements.txt
├── actions.yaml
├── tests
    ├── unit
    │   ├── helpers.py
    │   ├── test_alertmanager_client.py
    │   ├── charm
    │   │   └── test_push_config_to_workload_on_startup.py
    │   ├── test_charm.py
    │   └── test_consumer.py
    └── integration
    │   ├── conftest.py
    │   ├── test_kubectl_delete.py
    │   ├── test_update_status_pressure.py
    │   ├── test_config_changed_modifies_file.py
    │   ├── test_upgrade_charm.py
    │   ├── test_rescale_charm.py
    │   ├── helpers.py
    │   ├── test_rerelate_alertmanager_dispatch_metrics_endpoint.py
    │   └── test_templates.py
├── charmcraft.yaml
├── src
    ├── prometheus_alert_rules
    │   ├── alertmanager_notifications_failed.rule
    │   ├── alertmanager_missing.rule
    │   └── alertmanager_configuration_reload_failure.rule
    ├── alertmanager_client.py
    └── charm.py
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── enhancement_proposal.yml
    │   └── bug_report.yml
    └── workflows
    │   ├── promote.yaml
    │   ├── pull-request.yaml
    │   ├── issues.yml
    │   ├── codeql-analysis.yml
    │   └── release-edge.yaml
├── config.yaml
├── INTEGRATING.md
├── RELEASE.md
├── metadata.yaml
├── pyproject.toml
├── icon.svg
├── tox.ini
├── CONTRIBUTING.md
├── README.md
├── lib
    └── charms
    │   ├── alertmanager_k8s
    │       └── v0
    │       │   └── alertmanager_dispatch.py
    │   ├── observability_libs
    │       └── v0
    │       │   └── kubernetes_service_patch.py
    │   ├── karma_k8s
    │       └── v0
    │       │   └── karma_dashboard.py
    │   └── grafana_k8s
    │       └── v0
    │           └── grafana_source.py
└── LICENSE


/.jujuignore:
--------------------------------------------------------------------------------
1 | /venv
2 | **/__pycache__
3 | 


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | *       @sed-i @abuelodelanada @rbarry82 @balbirthomas @dstathis @simskij
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | venv/
 2 | build/
 3 | *.charm
 4 | *.orig
 5 | .coverage
 6 | **/__pycache__/
 7 | *.py[cod]
 8 | .idea/
 9 | .tox/
10 | .mypy_cache


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Canonical Ltd.
2 | # See LICENSE file for licensing details.
3 | 
4 | ops
5 | PyYAML
6 | lightkube
7 | lightkube-models
8 | 


--------------------------------------------------------------------------------
/actions.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Canonical Ltd.
2 | # See LICENSE file for licensing details.
3 | 
4 | show-config:
5 |   description: Show alertmanager config file.
6 | 


--------------------------------------------------------------------------------
/tests/unit/helpers.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright 2021 Canonical Ltd.
 3 | # See LICENSE file for licensing details.
 4 | 
 5 | """Helper functions for writing tests."""
 6 | 
 7 | 
 8 | def no_op(*args, **kwargs) -> None:
 9 |     pass
10 | 
11 | 
12 | def tautology(*args, **kwargs) -> bool:
13 |     return True
14 | 


--------------------------------------------------------------------------------
/charmcraft.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Canonical Ltd.
 2 | # See LICENSE file for licensing details.
 3 | 
 4 | type: charm
 5 | bases:
 6 |   - build-on:
 7 |     - name: "ubuntu"
 8 |       channel: "20.04"
 9 |     run-on:
10 |     - name: "ubuntu"
11 |       channel: "20.04"
12 | parts:
13 |   charm:
14 |     build-packages:
15 |     - git
16 | 


--------------------------------------------------------------------------------
/src/prometheus_alert_rules/alertmanager_notifications_failed.rule:
--------------------------------------------------------------------------------
 1 | alert: AlertmanagerNotificationsFailed
 2 | expr: alertmanager_notifications_failed_total{integration=~".*"} != 0
 3 | for: 0m
 4 | labels:
 5 |   severity: warning
 6 | annotations:
 7 |   summary: Alertmanager notifications failure (instance {{ $labels.instance }})
 8 |   description: |
 9 |     Alertmanager notifications failure
10 |     VALUE = {{ $value }}
11 |     LABELS = {{ $labels }}
12 | 


--------------------------------------------------------------------------------
/src/prometheus_alert_rules/alertmanager_missing.rule:
--------------------------------------------------------------------------------
 1 | # Based on https://awesome-prometheus-alerts.grep.to/rules.html#prometheus-self-monitoring-1
 2 | alert: AlertmanagerJobMissing
 3 | expr: absent(up{})
 4 | for: 0m
 5 | labels:
 6 |   severity: warning
 7 | annotations:
 8 |   summary: Alertmanager job missing (instance {{ $labels.instance }})
 9 |   description: |
10 |     A Alertmanager job has disappeared
11 |     VALUE = {{ $value }}
12 |     LABELS = {{ $labels }}
13 | 


--------------------------------------------------------------------------------
/src/prometheus_alert_rules/alertmanager_configuration_reload_failure.rule:
--------------------------------------------------------------------------------
 1 | # Based on https://awesome-prometheus-alerts.grep.to/rules.html#prometheus-self-monitoring-1
 2 | alert: AlertmanagerConfigurationReloadFailure
 3 | expr: alertmanager_config_last_reload_successful{} != 1
 4 | for: 0m
 5 | labels:
 6 |   severity: warning
 7 | annotations:
 8 |   summary: Alertmanager configuration reload failure (instance {{ $labels.instance }})
 9 |   description: |
10 |     Alertmanager configuration reload error
11 |     VALUE = {{ $value }}
12 |     LABELS = {{ $labels }}
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/enhancement_proposal.yml:
--------------------------------------------------------------------------------
 1 | name: Enhancement Proposal
 2 | description: File an enhancement proposal
 3 | labels: ["Type: Enhancement", "Status: Triage"]
 4 | body:
 5 |   - type: markdown
 6 |     attributes:
 7 |       value: >
 8 |         Thanks for taking the time to fill out this enhancement proposal! Before submitting your issue, please make
 9 |         sure there isn't already a prior issue concerning this. If there is, please join that discussion instead.
10 |   - type: textarea
11 |     id: enhancement-proposal
12 |     attributes:
13 |       label: Enhancement Proposal
14 |       description: >
15 |         Describe the enhancement you would like to see in as much detail as needed.      
16 |     validations:
17 |       required: true
18 | 


--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Canonical Ltd.
 2 | # See LICENSE file for licensing details.
 3 | 
 4 | options:
 5 |   config_file:
 6 |     type: string
 7 |     description: >
 8 |       Alertmanager configuration file (yaml), with the exclusion of the templates section.
 9 |       Refer to https://www.prometheus.io/docs/alerting/latest/configuration/ for full details.
10 |     default: ""
11 |   templates_file:
12 |     type: string
13 |     description: >
14 |       Alertmanager templates definition file. This is a slight deviation from the official
15 |       alertmanager config spec. All templates need to go into this single config option, instead of
16 |       the 'templates' section of the main configuration file. The templates will be pushed to the
17 |       workload container, and the configuration file will be updated accordingly.
18 |       Refer to https://prometheus.io/docs/alerting/latest/notification_examples/ for more details
19 |       on templates.
20 |     default: ""
21 | 


--------------------------------------------------------------------------------
/INTEGRATING.md:
--------------------------------------------------------------------------------
 1 | # Integrating alertmanager-k8s
 2 | 
 3 | ## Provides
 4 | 
 5 | ### alertmanager_dispatch
 6 | 
 7 | Any charm that implements the
 8 | [`alertmanager_dispatch`](https://charmhub.io/alertmanager-k8s/libraries/alertmanager_dispatch)
 9 | relation interface can be related to this charm for forwarding alerts to alertmanager,
10 | for example: [Prometheus][Prometheus operator], [Loki][Loki operator].
11 | 
12 | ```
13 | juju relate alertmanager-k8s prometheus-k8s
14 | ```
15 | 
16 | ### karma_dashboard
17 | The [`karma_dashboard`](https://charmhub.io/karma-k8s/libraries/karma_dashboard)
18 | relation interface links an entire Alertmanager cluster to a
19 | [Karma][Karma operator] dashboard.
20 | Scaling alertmanager would automatically cause karma to group alerts by
21 | cluster.
22 | 
23 | ```
24 | juju relate alertmanager-k8s karma-k8s
25 | ```
26 | 
27 | ## Requires
28 | None.
29 | 
30 | [Loki operator]: https://charmhub.io/loki-k8s
31 | [Prometheus operator]: https://charmhub.io/prometheus-k8s
32 | [Karma operator]: https://charmhub.io/karma-k8s/
33 | 


--------------------------------------------------------------------------------
/tests/integration/conftest.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright 2021 Canonical Ltd.
 3 | # See LICENSE file for licensing details.
 4 | 
 5 | import socket
 6 | from pathlib import Path
 7 | 
 8 | import pytest
 9 | from pytest_operator.plugin import OpsTest
10 | 
11 | PYTEST_HTTP_SERVER_PORT = 8000
12 | 
13 | 
14 | @pytest.fixture(scope="module")
15 | async def charm_under_test(ops_test: OpsTest) -> Path:
16 |     """Charm used for integration testing."""
17 |     path_to_built_charm = await ops_test.build_charm(".")
18 | 
19 |     return path_to_built_charm
20 | 
21 | 
22 | @pytest.fixture(scope="session")
23 | def httpserver_listen_address():
24 |     s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
25 |     s.settimeout(0)
26 |     try:
27 |         # ip address does not need to be reachable
28 |         s.connect(("8.8.8.8", 1))
29 |         local_ip_address = s.getsockname()[0]
30 |     except Exception:
31 |         local_ip_address = "127.0.0.1"
32 |     finally:
33 |         s.close()
34 |     return (local_ip_address, PYTEST_HTTP_SERVER_PORT)
35 | 


--------------------------------------------------------------------------------
/.github/workflows/promote.yaml:
--------------------------------------------------------------------------------
 1 | name: Promote Charm
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       promotion:
 7 |         type: choice
 8 |         description: Channel to promote from
 9 |         options:
10 |           - edge -> beta
11 |           - beta -> candidate
12 |           - candidate -> stable
13 | 
14 | jobs:
15 |   promote:
16 |     name: Promote Charm
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |       - name: Checkout
20 |         uses: actions/checkout@v2
21 |       - name: Set target channel
22 |         env:
23 |           PROMOTE_FROM: ${{ github.event.inputs.promotion }}
24 |         run: |
25 |           if [ "${PROMOTE_FROM}" == "edge -> beta" ]; then
26 |             echo "promote-from=edge" >> ${GITHUB_ENV}
27 |             echo "promote-to=beta" >> ${GITHUB_ENV}
28 |           elif [ "${PROMOTE_FROM}" == "beta -> candidate" ]; then
29 |             echo "promote-from=beta" >> ${GITHUB_ENV}
30 |             echo "promote-to=candidate" >> ${GITHUB_ENV}
31 |           elif [ "${PROMOTE_FROM}" == "candidate -> stable" ]; then
32 |             echo "promote-from=candidate" >> ${GITHUB_ENV}
33 |             echo "promote-to=stable" >> ${GITHUB_ENV}
34 |           fi
35 |       - name: Promote Charm
36 |         uses: canonical/charming-actions/release-charm@1.0.3
37 |         with:
38 |           credentials: ${{ secrets.CHARMHUB_TOKEN }}
39 |           github-token: ${{ secrets.GITHUB_TOKEN }}
40 |           destination-channel: latest/${{ env.promote-to }}
41 |           origin-channel: latest/${{ env.promote-from }}
42 |           charmcraft-channel: latest/stable
43 | 


--------------------------------------------------------------------------------
/tests/integration/test_kubectl_delete.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright 2022 Canonical Ltd.
 3 | # See LICENSE file for licensing details.
 4 | 
 5 | 
 6 | import logging
 7 | from pathlib import Path
 8 | 
 9 | import pytest
10 | import yaml
11 | from helpers import is_alertmanager_up
12 | from pytest_operator.plugin import OpsTest
13 | 
14 | logger = logging.getLogger(__name__)
15 | 
16 | METADATA = yaml.safe_load(Path("./metadata.yaml").read_text())
17 | app_name = METADATA["name"]
18 | resources = {"alertmanager-image": METADATA["resources"]["alertmanager-image"]["upstream-source"]}
19 | 
20 | 
21 | @pytest.mark.abort_on_fail
22 | async def test_deploy_from_local_path(ops_test: OpsTest, charm_under_test):
23 |     """Deploy the charm-under-test."""
24 |     logger.debug("deploy local charm")
25 | 
26 |     await ops_test.model.deploy(charm_under_test, application_name=app_name, resources=resources)
27 |     await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000)
28 |     await is_alertmanager_up(ops_test, app_name)
29 | 
30 | 
31 | @pytest.mark.abort_on_fail
32 | async def test_kubectl_delete_pod(ops_test: OpsTest):
33 |     pod_name = f"{app_name}-0"
34 | 
35 |     cmd = [
36 |         "sg",
37 |         "microk8s",
38 |         "-c",
39 |         " ".join(["microk8s.kubectl", "delete", "pod", "-n", ops_test.model_name, pod_name]),
40 |     ]
41 | 
42 |     logger.debug(
43 |         "Removing pod '%s' from model '%s' with cmd: %s", pod_name, ops_test.model_name, cmd
44 |     )
45 | 
46 |     retcode, stdout, stderr = await ops_test.run(*cmd)
47 |     assert retcode == 0, f"kubectl failed: {(stderr or stdout).strip()}"
48 |     logger.debug(stdout)
49 |     await ops_test.model.block_until(lambda: len(ops_test.model.applications[app_name].units) > 0)
50 |     await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000)
51 |     assert await is_alertmanager_up(ops_test, app_name)
52 | 


--------------------------------------------------------------------------------
/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # Release Process
 2 | 
 3 | ## Overview
 4 | 
 5 | At any given time there are three revisions of the Alertmanager charm [available on CharmHub.io](https://charmhub.io/alertmanager-k8s), for each of the following channels:
 6 | 
 7 | 1. `latest/stable` is a well tested production ready version of the Charm.
 8 | 2. `latest/candidate` is a feature ready next version of the stable release, currently in testing.
 9 | 3. `latest/edge` is the bleeding edge developer version of the charm. While we really try not to, it may break and introduce regressions.
10 | 
11 | Currently, the Alertmanager charm does not make use of the `latest/beta` channel.
12 | For more information about CharmHub channels, refer to the [Juju charm store](https://discourse.charmhub.io/t/the-juju-charm-store) documentation.
13 | 
14 | ## When to create which revisions
15 | 
16 | * **Stable revisions** are done in consultation with product manager and engineering manager when the `candidate` revision has been well tested and is deemed ready for production.
17 | * **Candidate revisions** are done when the charm reaches a state of feature completion with respect to the next planned `stable` release.
18 | * **Edge revisions** are released at the developer's discretion, potentially every time something is merged into `main` and the unit tests pass.
19 | 
20 | ## How to publish revisions
21 | 
22 | Refer to the [Publish your operator in Charmhub](https://discourse.charmhub.io/t/publish-your-operator-in-charmhub) documentation.
23 | After a `latest/stable` release, it is expected that the version of the charm is the same as the one in `latest/candidate`, and those two channels will diverge again when we are ramping up through `latest/candidate` releases for a new `latest/stable` release.
24 | 
25 | ## A note on granularity of revisions
26 | 
27 | We believe in shipping often and with confidence.
28 | It is perfectly acceptable to have a new `latest/stable` release containing just one bug fix or a small new feature with respect to the last one.
29 | 


--------------------------------------------------------------------------------
/.github/workflows/pull-request.yaml:
--------------------------------------------------------------------------------
 1 | name: Pull Request
 2 | on:
 3 |   pull_request:
 4 |     branches:
 5 |       - main
 6 | 
 7 | jobs:
 8 |   lib-check:
 9 |     name: Static analysis of /lib for Python 3.5
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - name: Checkout
13 |       uses: actions/checkout@v2
14 |     - name: Set up Python 3.5
15 |       uses: actions/setup-python@v2
16 |       with:
17 |         python-version: 3.5
18 |     - name: Install dependencies
19 |       run: python3 -m pip install tox
20 |     - name: Run static analysis for /lib for 3.5
21 |       run: tox -vve static-lib
22 |   static-analysis:
23 |     name: Static analysis
24 |     runs-on: ubuntu-latest
25 |     steps:
26 |     - name: Checkout
27 |       uses: actions/checkout@v2
28 |     - name: Install dependencies
29 |       run: python3 -m pip install tox
30 |     - name: Run static analysis (charm)
31 |       run: tox -vve static-charm
32 |     - name: Run static analysis (unit tests)
33 |       run: tox -vve static-unit
34 |     - name: Run static analysis (integration tests)
35 |       run: tox -vve static-integration
36 |   lint:
37 |     name: Lint
38 |     runs-on: ubuntu-latest
39 |     steps:
40 |     - name: Checkout
41 |       uses: actions/checkout@v2
42 |     - name: Install dependencies
43 |       run: python3 -m pip install tox
44 |     - name: Run linters
45 |       run: tox -vve lint
46 |   unit-test:
47 |     name: Unit tests
48 |     runs-on: ubuntu-latest
49 |     steps:
50 |     - name: Checkout
51 |       uses: actions/checkout@v2
52 |     - name: Install dependencies
53 |       run: python -m pip install tox
54 |     - name: Run tests
55 |       run: tox -vve unit
56 |   integration-test-microk8s:
57 |     name: Integration tests (microk8s)
58 |     runs-on: ubuntu-latest
59 |     steps:
60 |     - name: Checkout
61 |       uses: actions/checkout@v2
62 |     - name: Setup operator environment
63 |       uses: charmed-kubernetes/actions-operator@main
64 |       with:
65 |         provider: microk8s
66 |     - name: Run alertmanger tests
67 |       run: tox -vve integration
68 | 


--------------------------------------------------------------------------------
/metadata.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Canonical Ltd.
 2 | # See LICENSE file for licensing details.
 3 | 
 4 | name: alertmanager-k8s
 5 | 
 6 | summary: |
 7 |   Kubernetes charm for Alertmanager.
 8 | 
 9 | description: |
10 |   Alertmanager handles alerts sent by client applications such as the Prometheus server.
11 |   It takes care of deduplicating, grouping, and routing them to the correct receiver integrations
12 |   such as email, PagerDuty, or OpsGenie. It also takes care of silencing and inhibition of alerts.
13 | 
14 | docs: https://discourse.charmhub.io/t/alertmanager-k8s-docs-index/5788
15 | 
16 | # workload containers
17 | containers:
18 |   alertmanager:  # container key used by pebble
19 |     resource: alertmanager-image
20 |     mounts:
21 |       - storage: data
22 |         # nflogs and silences files go here. With a mounted storage for silences, they persist
23 |         # across container restarts.
24 |         # This path is passed to alertmanager via the `--storage.path` cli argument.
25 |         location: /alertmanager
26 | 
27 | # oci-image resources for each container defined above
28 | resources:
29 |   alertmanager-image:
30 |     type: oci-image
31 |     description: OCI image for alertmanager
32 |     upstream-source: ubuntu/prometheus-alertmanager:0.23-22.04_beta
33 | provides:
34 |   alerting:
35 |     # The provider (alertmanager) adds the following key-value pair to the relation data bag of
36 |     # every alertmanager unit:
37 |     #   "public_address": <ip_address>:<port>
38 |     interface: alertmanager_dispatch
39 |     # assumed network type: private
40 |   karma-dashboard:
41 |     interface: karma_dashboard
42 |   self-metrics-endpoint:
43 |     interface: prometheus_scrape
44 |   grafana-dashboard:
45 |     interface: grafana_dashboard
46 |   grafana-source:
47 |     interface: grafana_datasource
48 | 
49 | peers:
50 |   replicas:
51 |     interface: alertmanager_replica
52 |     # assumed network type: private
53 | 
54 | storage:
55 |   data:
56 |     type: filesystem
57 |     description: >
58 |       Storage path passed to alertmanager via --storage.path argument and used for nflog and silences snapshot
59 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Canonical Ltd.
 2 | # See LICENSE file for licensing details.
 3 | 
 4 | # Testing tools configuration
 5 | [tool.coverage.run]
 6 | branch = true
 7 | 
 8 | [tool.coverage.report]
 9 | show_missing = true
10 | 
11 | # Formatting tools configuration
12 | [tool.black]
13 | line-length = 99
14 | target-version = ["py38"]
15 | 
16 | [tool.isort]
17 | profile = "black"
18 | 
19 | # Linting tools configuration
20 | [tool.flake8]
21 | max-line-length = 99
22 | max-doc-length = 99
23 | max-complexity = 10
24 | exclude = [".git", "__pycache__", ".tox", "build", "dist", "*.egg_info", "venv"]
25 | select = ["E", "W", "F", "C", "N", "R", "D", "H"]
26 | # Ignore W503, E501 because using black creates errors with this
27 | # Ignore D107 Missing docstring in __init__
28 | ignore = ["W503", "E501", "D107"]
29 | # D100, D101, D102, D103: Ignore missing docstrings in tests
30 | per-file-ignores = ["tests/*:D100,D101,D102,D103"]
31 | docstring-convention = "google"
32 | # Check for properly formatted copyright header in each file
33 | copyright-check = "True"
34 | copyright-author = "Canonical Ltd."
35 | copyright-regexp = "Copyright\\s\\d{4}([-,]\\d{4})*\\s+%(author)s"
36 | 
37 | # Static analysis tools configuration
38 | [tool.mypy]
39 | pretty = true
40 | python_version = 3.8
41 | mypy_path = "$MYPY_CONFIG_FILE_DIR/src:$MYPY_CONFIG_FILE_DIR/lib"
42 | follow_imports = "normal"
43 | warn_redundant_casts = true
44 | warn_unused_ignores = true
45 | warn_unused_configs = true
46 | show_traceback = true
47 | show_error_codes = true
48 | namespace_packages = true
49 | explicit_package_bases = true
50 | check_untyped_defs = true
51 | allow_redefinition = true
52 | 
53 | # Ignore libraries that do not have type hint nor stubs
54 | [[tool.mypy.overrides]]
55 | module = ["ops.*", "lightkube.*", "git.*", "pytest_operator.*", "validators.*"]
56 | ignore_missing_imports = true
57 | 
58 | [[tool.mypy.overrides]]
59 | module = ["charms.grafana_k8s.*", "charms.observability_libs.*"]
60 | follow_imports = "silent"
61 | warn_unused_ignores = false
62 | 
63 | [tool.pytest.ini_options]
64 | minversion = "6.0"
65 | log_cli_level = "INFO"
66 | asyncio_mode = "auto"


--------------------------------------------------------------------------------
/.github/workflows/issues.yml:
--------------------------------------------------------------------------------
 1 | name: Issues
 2 | 
 3 | on: [issues]
 4 | 
 5 | jobs:
 6 |   update:
 7 |     name: Update Issue
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - name: Dump Github Context
11 |         run: |
12 |           echo '${{ toJSON(github) }}'
13 |           echo "update=false" >> $GITHUB_ENV
14 |           
15 |           if [ ${{ github.event_name }} != "issues" ]; then
16 |             echo "This action only operates on issues"
17 |             exit 0
18 |           fi
19 |           
20 |           echo "update=true" >> $GITHUB_ENV
21 |       - name: Determine action
22 |         run: |
23 |           if [ ${{ github.event.action }} == "opened" ]; then
24 |             echo "action=open" >> $GITHUB_ENV
25 |           fi
26 |           if [ ${{ github.event.action }} == "reopened" ]; then
27 |             echo "action=reopen" >> $GITHUB_ENV
28 |           fi
29 |           if [ ${{ github.event.action }} == "closed" ]; then
30 |             echo "action=close" >> $GITHUB_ENV
31 |           fi
32 |       - name: Determine type
33 |         run: |
34 |           if ${{ contains(github.event.*.labels.*.name, 'Type: Bug') }}; then
35 |             echo "type=bug" >> $GITHUB_ENV
36 |           else
37 |             echo "type=story" >> $GITHUB_ENV
38 |           fi
39 |       - name: Update
40 |         if: ${{ env.update == 'true' }}
41 |         run: |
42 |           id="${{ github.event.issue.html_url }}"
43 |           title="${{ github.event.issue.title }}"
44 |           component="alertmanager"
45 |           
46 |           description="Opened by ${{ github.event.issue.user.login }}."
47 |           
48 |           data=$(jq -n \
49 |             --arg id "$id" \
50 |             --arg action "${{ env.action }}" \
51 |             --arg title "$title" \
52 |             --arg description "$description" \
53 |             --arg component "$component" \
54 |             --arg type "${{ env.type }}" \
55 |             '{data: {id: $id, action: $action, title: $title, description: $description, component: $component, type: $type}}')
56 |           
57 |           curl -X POST -H 'Content-type: application/json' --data "${data}" "${{ secrets.JIRA_URL }}"
58 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
 1 | name: Bug Report
 2 | description: File a bug report
 3 | labels: ["Type: Bug", "Status: Triage"]
 4 | body:
 5 |   - type: markdown
 6 |     attributes:
 7 |       value: >
 8 |         Thanks for taking the time to fill out this bug report! Before submitting your issue, please make
 9 |         sure you are using the latest version of the charm. If not, please switch to this image prior to 
10 |         posting your report to make sure it's not already solved.
11 |   - type: textarea
12 |     id: bug-description
13 |     attributes:
14 |       label: Bug Description
15 |       description: >
16 |         If applicable, add screenshots to help explain your problem. If applicable, add screenshots to 
17 |         help explain the problem you are facing.      
18 |     validations:
19 |       required: true
20 |   - type: textarea
21 |     id: reproduction
22 |     attributes:
23 |       label: To Reproduce
24 |       description: >
25 |         Please provide a step-by-step instruction of how to reproduce the behavior.
26 |       placeholder: |
27 |         1. `juju deploy ...`
28 |         2. `juju relate ...`
29 |         3. `juju status --relations`
30 |     validations:
31 |       required: true
32 |   - type: textarea
33 |     id: environment
34 |     attributes:
35 |       label: Environment
36 |       description: >
37 |         We need to know a bit more about the context in which you run the charm.
38 |         - Are you running Juju locally, on lxd, in multipass or on some other platform?
39 |         - What track and channel you deployed the charm from (ie. `latest/edge` or similar).
40 |         - Version of any applicable components, like the juju snap, the model controller, lxd, microk8s, and/or multipass.
41 |     validations:
42 |       required: true
43 |   - type: textarea
44 |     id: logs
45 |     attributes:
46 |       label: Relevant log output
47 |       description: >
48 |         Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
49 |         Fetch the logs using `juju debug-log --replay` and `kubectl logs ...`. Additional details available in the juju docs 
50 |         at https://juju.is/docs/olm/juju-logs
51 |       render: shell
52 |     validations:
53 |       required: true
54 |   - type: textarea
55 |     id: additional-context
56 |     attributes:
57 |       label: Additional context
58 | 
59 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | name: "CodeQL"
 7 | 
 8 | on:
 9 |   push:
10 |     branches: [main]
11 |   pull_request:
12 |     # The branches below must be a subset of the branches above
13 |     branches: [main]
14 | 
15 | permissions:
16 |   security-events:
17 |     write
18 | 
19 | jobs:
20 |   analyze:
21 |     name: Analyze
22 |     runs-on: ubuntu-latest
23 | 
24 |     strategy:
25 |       fail-fast: false
26 |       matrix:
27 |         # Override automatic language detection by changing the below list
28 |         # Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', 'python']
29 |         language: ['python']
30 |         # Learn more...
31 |         # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection
32 | 
33 |     steps:
34 |     - name: Checkout repository
35 |       uses: actions/checkout@v2
36 | 
37 |     # Initializes the CodeQL tools for scanning.
38 |     - name: Initialize CodeQL
39 |       uses: github/codeql-action/init@v1
40 |       with:
41 |         languages: ${{ matrix.language }}
42 |         # If you wish to specify custom queries, you can do so here or in a config file.
43 |         # By default, queries listed here will override any specified in a config file. 
44 |         # Prefix the list here with "+" to use these queries and those in the config file.
45 |         # queries: ./path/to/local/query, your-org/your-repo/queries@main
46 | 
47 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
48 |     # If this step fails, then you should remove it and run the build manually (see below)
49 |     - name: Autobuild
50 |       uses: github/codeql-action/autobuild@v1
51 | 
52 |     # ℹ️ Command-line programs to run using the OS shell.
53 |     # 📚 https://git.io/JvXDl
54 | 
55 |     # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
56 |     #    and modify them (or add more) to build your code if your project
57 |     #    uses a compiled language
58 | 
59 |     #- run: |
60 |     #   make bootstrap
61 |     #   make release
62 | 
63 |     - name: Perform CodeQL Analysis
64 |       uses: github/codeql-action/analyze@v1
65 | 


--------------------------------------------------------------------------------
/.github/workflows/release-edge.yaml:
--------------------------------------------------------------------------------
 1 | name: Release to Edge
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 | 
 7 | jobs:
 8 |   lib-check:
 9 |     name: Static analysis of /lib for Python 3.5
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - name: Checkout
13 |       uses: actions/checkout@v2
14 |     - name: Set up Python 3.5
15 |       uses: actions/setup-python@v2
16 |       with:
17 |         python-version: 3.5
18 |     - name: Install dependencies
19 |       run: python3 -m pip install tox
20 |     - name: Run static analysis for /lib for 3.5
21 |       run: tox -vve static-lib
22 |   static-analysis:
23 |     name: Static analysis
24 |     runs-on: ubuntu-latest
25 |     steps:
26 |     - name: Checkout
27 |       uses: actions/checkout@v2
28 |     - name: Install dependencies
29 |       run: python3 -m pip install tox
30 |     - name: Run static analysis (charm)
31 |       run: tox -vve static-charm
32 |     - name: Run static analysis (unit tests)
33 |       run: tox -vve static-unit
34 |     - name: Run static analysis (integration tests)
35 |       run: tox -vve static-integration
36 |   lint:
37 |     name: Lint
38 |     runs-on: ubuntu-latest
39 |     steps:
40 |     - name: Checkout
41 |       uses: actions/checkout@v2
42 |     - name: Install dependencies
43 |       run: python3 -m pip install tox
44 |     - name: Run linters
45 |       run: tox -vve lint
46 |   unit-test:
47 |     name: Unit tests
48 |     runs-on: ubuntu-latest
49 |     steps:
50 |     - name: Checkout
51 |       uses: actions/checkout@v2
52 |     - name: Install dependencies
53 |       run: python -m pip install tox
54 |     - name: Run tests
55 |       run: tox -vve unit
56 |   integration-test:
57 |     name: Integration tests (microk8s)
58 |     runs-on: ubuntu-latest
59 |     steps:
60 |     - name: Checkout
61 |       uses: actions/checkout@v2
62 |     - name: Setup operator environment
63 |       uses: charmed-kubernetes/actions-operator@main
64 |       with:
65 |         provider: microk8s
66 |     - name: Run alertmanger tests
67 |       run: tox -vve integration
68 |   release-to-charmhub:
69 |     name: Release to CharmHub
70 |     needs:
71 |       - static-analysis
72 |       - lib-check
73 |       - lint
74 |       - unit-test
75 |       - integration-test
76 |     runs-on: ubuntu-latest
77 |     steps:
78 |       - name: Checkout
79 |         uses: actions/checkout@v2
80 |         with:
81 |           fetch-depth: 0
82 |       - name: Select charmhub channel
83 |         uses: canonical/charming-actions/channel@1.0.0
84 |         id: channel
85 |       - name: Upload charm to charmhub
86 |         uses: canonical/charming-actions/upload-charm@1.0.0
87 |         with:
88 |           credentials: "${{ secrets.CHARMHUB_TOKEN }}"
89 |           github-token: "${{ secrets.GITHUB_TOKEN }}"
90 |           channel: "${{ steps.channel.outputs.name }}"
91 | 


--------------------------------------------------------------------------------
/icon.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!-- Generator: Adobe Illustrator 16.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | 
 4 | <svg
 5 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
 6 |    xmlns:cc="http://creativecommons.org/ns#"
 7 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 8 |    xmlns:svg="http://www.w3.org/2000/svg"
 9 |    xmlns="http://www.w3.org/2000/svg"
10 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
11 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
12 |    version="1.1"
13 |    id="Layer_1"
14 |    x="0px"
15 |    y="0px"
16 |    width="115.333px"
17 |    height="114px"
18 |    viewBox="0 0 115.333 114"
19 |    enable-background="new 0 0 115.333 114"
20 |    xml:space="preserve"
21 |    sodipodi:docname="prometheus_logo_orange.svg"
22 |    inkscape:version="0.92.1 r15371"><metadata
23 |      id="metadata4495"><rdf:RDF><cc:Work
24 |          rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
25 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title></dc:title></cc:Work></rdf:RDF></metadata><defs
26 |      id="defs4493" /><sodipodi:namedview
27 |      pagecolor="#ffffff"
28 |      bordercolor="#666666"
29 |      borderopacity="1"
30 |      objecttolerance="10"
31 |      gridtolerance="10"
32 |      guidetolerance="10"
33 |      inkscape:pageopacity="0"
34 |      inkscape:pageshadow="2"
35 |      inkscape:window-width="1484"
36 |      inkscape:window-height="886"
37 |      id="namedview4491"
38 |      showgrid="false"
39 |      inkscape:zoom="5.2784901"
40 |      inkscape:cx="60.603667"
41 |      inkscape:cy="60.329656"
42 |      inkscape:window-x="54"
43 |      inkscape:window-y="7"
44 |      inkscape:window-maximized="0"
45 |      inkscape:current-layer="Layer_1" /><g
46 |      id="Layer_2" /><path
47 |      style="fill:#e6522c;fill-opacity:1"
48 |      inkscape:connector-curvature="0"
49 |      id="path4486"
50 |      d="M 56.667,0.667 C 25.372,0.667 0,26.036 0,57.332 c 0,31.295 25.372,56.666 56.667,56.666 31.295,0 56.666,-25.371 56.666,-56.666 0,-31.296 -25.372,-56.665 -56.666,-56.665 z m 0,106.055 c -8.904,0 -16.123,-5.948 -16.123,-13.283 H 72.79 c 0,7.334 -7.219,13.283 -16.123,13.283 z M 83.297,89.04 H 30.034 V 79.382 H 83.298 V 89.04 Z M 83.106,74.411 H 30.186 C 30.01,74.208 29.83,74.008 29.66,73.802 24.208,67.182 22.924,63.726 21.677,60.204 c -0.021,-0.116 6.611,1.355 11.314,2.413 0,0 2.42,0.56 5.958,1.205 -3.397,-3.982 -5.414,-9.044 -5.414,-14.218 0,-11.359 8.712,-21.285 5.569,-29.308 3.059,0.249 6.331,6.456 6.552,16.161 3.252,-4.494 4.613,-12.701 4.613,-17.733 0,-5.21 3.433,-11.262 6.867,-11.469 -3.061,5.045 0.793,9.37 4.219,20.099 1.285,4.03 1.121,10.812 2.113,15.113 C 63.797,33.534 65.333,20.5 71,16 c -2.5,5.667 0.37,12.758 2.333,16.167 3.167,5.5 5.087,9.667 5.087,17.548 0,5.284 -1.951,10.259 -5.242,14.148 3.742,-0.702 6.326,-1.335 6.326,-1.335 l 12.152,-2.371 c 10e-4,-10e-4 -1.765,7.261 -8.55,14.254 z" /></svg>


--------------------------------------------------------------------------------
/tests/integration/test_update_status_pressure.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright 2021 Canonical Ltd.
 3 | # See LICENSE file for licensing details.
 4 | 
 5 | """This test module tests common lifecycle behaviors under frequent update-status hook firing.
 6 | 
 7 | 0. Set update-status frequency to the minimum possible
 8 | 1. Deploys and relate the charm-under-test
 9 | 2. Remove related app(s)
10 | """
11 | 
12 | import asyncio
13 | import logging
14 | from pathlib import Path
15 | 
16 | import pytest
17 | import yaml
18 | from helpers import is_alertmanager_up
19 | from pytest_operator.plugin import OpsTest
20 | 
21 | logger = logging.getLogger(__name__)
22 | 
23 | METADATA = yaml.safe_load(Path("./metadata.yaml").read_text())
24 | app_name = METADATA["name"]
25 | resources = {"alertmanager-image": METADATA["resources"]["alertmanager-image"]["upstream-source"]}
26 | 
27 | 
28 | @pytest.mark.abort_on_fail
29 | async def test_setup_env(ops_test: OpsTest):
30 |     await ops_test.model.set_config(
31 |         {"update-status-hook-interval": "10s", "logging-config": "<root>=WARNING; unit=DEBUG"}
32 |     )
33 | 
34 | 
35 | @pytest.mark.abort_on_fail
36 | async def test_deploy_multiple_units(ops_test: OpsTest, charm_under_test):
37 |     """Deploy the charm-under-test."""
38 |     logger.info("build charm from local source folder")
39 | 
40 |     logger.info("deploy charms")
41 |     await asyncio.gather(
42 |         ops_test.model.deploy(
43 |             charm_under_test, application_name=app_name, resources=resources, num_units=2
44 |         ),
45 |         ops_test.model.deploy(
46 |             "ch:prometheus-k8s", application_name="prom", channel="edge", trust=True
47 |         ),
48 |     )
49 | 
50 |     await asyncio.gather(
51 |         ops_test.model.add_relation(f"{app_name}:alerting", "prom"),
52 |         ops_test.model.wait_for_idle(status="active", timeout=2500),
53 |     )
54 | 
55 |     assert await is_alertmanager_up(ops_test, app_name)
56 | 
57 | 
58 | @pytest.mark.abort_on_fail
59 | async def test_remove_related_app(ops_test: OpsTest):
60 |     await ops_test.model.applications["prom"].remove()
61 |     # Block until it is really gone. Added after an itest failed when tried to redeploy:
62 |     # juju.errors.JujuError: ['cannot add application "related-app": application already exists']
63 |     await ops_test.model.block_until(lambda: "prom" not in ops_test.model.applications)
64 |     await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=300)
65 |     assert await is_alertmanager_up(ops_test, app_name)
66 | 
67 | 
68 | @pytest.mark.abort_on_fail
69 | async def test_wait_through_a_few_update_status_cycles(ops_test: OpsTest):
70 |     await asyncio.sleep(60)  # should be longer than the update-status period
71 | 
72 |     # "Disable" update-status so the charm gets a chance to become idle for long enough for
73 |     # wait_for_idle to succeed
74 |     await ops_test.model.set_config({"update-status-hook-interval": "60m"})
75 | 
76 |     await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=300)
77 | 


--------------------------------------------------------------------------------
/tests/integration/test_config_changed_modifies_file.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright 2021 Canonical Ltd.
 3 | # See LICENSE file for licensing details.
 4 | 
 5 | """This test module tests change in alertmanager config.
 6 | 
 7 | 1. Deploy the charm under test with default config and wait for it to become active.
 8 | 2. Make a config change and expect reload to be triggered.
 9 | 3. Confirm changes applied.
10 | """
11 | 
12 | import logging
13 | from pathlib import Path
14 | 
15 | import pytest
16 | import yaml
17 | from helpers import get_unit_address, is_alertmanager_up
18 | from pytest_operator.plugin import OpsTest
19 | 
20 | from alertmanager_client import Alertmanager
21 | 
22 | logger = logging.getLogger(__name__)
23 | 
24 | METADATA = yaml.safe_load(Path("./metadata.yaml").read_text())
25 | app_name = METADATA["name"]
26 | resources = {"alertmanager-image": METADATA["resources"]["alertmanager-image"]["upstream-source"]}
27 | 
28 | 
29 | @pytest.mark.abort_on_fail
30 | async def test_build_and_deploy(ops_test: OpsTest, charm_under_test):
31 |     """Build the charm-under-test and deploy it together with related charms.
32 | 
33 |     Assert on the unit status before any relations/configurations take place.
34 |     """
35 |     # deploy charm from local source folder
36 |     await ops_test.model.deploy(charm_under_test, resources=resources, application_name=app_name)
37 |     await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000)
38 |     assert ops_test.model.applications[app_name].units[0].workload_status == "active"
39 |     assert await is_alertmanager_up(ops_test, app_name)
40 | 
41 | 
42 | async def test_update_config(ops_test: OpsTest):
43 |     # Obtain a "before" snapshot of the config from the server.
44 |     client = Alertmanager(await get_unit_address(ops_test, app_name, 0))
45 |     config_from_server_before = client.config()
46 |     # Make sure the defaults is what we expect them to be (this is only a partial check, but an
47 |     # easy one).
48 |     assert "receivers" in config_from_server_before
49 | 
50 |     def rename_toplevel_receiver(config: dict, new_name: str):
51 |         old_name = config["route"]["receiver"]
52 |         config["route"]["receiver"] = new_name
53 | 
54 |         for receiver in config["receivers"]:
55 |             if receiver["name"] == old_name:
56 |                 receiver["name"] = new_name
57 | 
58 |     # Modify the default config
59 |     config = config_from_server_before.copy()
60 |     receiver_name = config["route"]["receiver"]
61 |     rename_toplevel_receiver(config, receiver_name * 2)
62 | 
63 |     await ops_test.model.applications[app_name].set_config({"config_file": yaml.safe_dump(config)})
64 |     await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=60)
65 | 
66 |     # Obtain an "after" snapshot of the config from the server.
67 |     config_from_server_after = client.config()
68 |     # Make sure the current config is what we expect it to be (this is only a partial check, but an
69 |     # easy one).
70 |     assert config_from_server_after["receivers"] == config["receivers"]
71 | 


--------------------------------------------------------------------------------
/tests/unit/test_alertmanager_client.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright 2021 Canonical Ltd.
 3 | # See LICENSE file for licensing details.
 4 | 
 5 | import json
 6 | import unittest
 7 | from datetime import datetime
 8 | from unittest.mock import patch
 9 | 
10 | from alertmanager_client import Alertmanager, AlertmanagerBadResponse
11 | 
12 | 
13 | class TestAlertmanagerAPIClient(unittest.TestCase):
14 |     def setUp(self):
15 |         self.api = Alertmanager("address", 12345)
16 | 
17 |     def test_base_url(self):
18 |         self.assertEqual("http://address:12345/", self.api.base_url)
19 | 
20 |     @patch("alertmanager_client.urllib.request.urlopen")
21 |     def test_reload_succeed(self, urlopen_mock):
22 |         urlopen_mock.return_value.code = 200
23 |         urlopen_mock.return_value.reason = "OK"
24 | 
25 |         self.api.reload()
26 |         urlopen_mock.assert_called()
27 | 
28 |     @patch("alertmanager_client.urllib.request.urlopen")
29 |     def test_status_succeed(self, urlopen_mock):
30 |         urlopen_mock.return_value.read = lambda: json.dumps({"status": "fake"})
31 |         urlopen_mock.return_value.code = 200
32 |         urlopen_mock.return_value.reason = "OK"
33 | 
34 |         status = self.api.status()
35 |         self.assertIsNotNone(status)
36 |         self.assertDictEqual({"status": "fake"}, status)
37 | 
38 |     def test_reload_and_status_fail(self):
39 |         def mock_connection_error(*args, **kwargs):
40 |             import urllib.error
41 | 
42 |             raise urllib.error.HTTPError(
43 |                 url="mock://url",
44 |                 code=500,
45 |                 msg="mock msg",
46 |                 hdrs={"mock hdr": "mock smth"},  # type: ignore[arg-type]
47 |                 fp=None,
48 |             )
49 | 
50 |         with patch("alertmanager_client.urllib.request.urlopen", mock_connection_error):
51 |             self.assertRaises(AlertmanagerBadResponse, self.api.reload)
52 | 
53 |         with patch("alertmanager_client.urllib.request.urlopen", mock_connection_error):
54 |             self.assertRaises(AlertmanagerBadResponse, self.api.status)
55 | 
56 |     @patch("alertmanager_client.urllib.request.urlopen")
57 |     def test_version(self, urlopen_mock):
58 |         urlopen_mock.return_value.read = lambda: json.dumps({"versionInfo": {"version": "0.1.2"}})
59 |         urlopen_mock.return_value.code = 200
60 |         urlopen_mock.return_value.reason = "OK"
61 | 
62 |         self.assertEqual(self.api.version, "0.1.2")
63 | 
64 |     @patch("alertmanager_client.urllib.request.urlopen")
65 |     def test_alerts_can_be_set(self, urlopen_mock):
66 |         msg = "HTTP 200 OK"
67 |         urlopen_mock.return_value = msg
68 |         alerts = [
69 |             {
70 |                 "startsAt": datetime.now().isoformat("T"),
71 |                 "status": "firing",
72 |                 "annotations": {
73 |                     "summary": "A fake alert",
74 |                 },
75 |                 "labels": {
76 |                     "alertname": "fake alert",
77 |                 },
78 |             }
79 |         ]
80 |         status = self.api.set_alerts(alerts)
81 |         urlopen_mock.assert_called()
82 |         self.assertEqual(status, msg)
83 | 


--------------------------------------------------------------------------------
/tests/integration/test_upgrade_charm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright 2021 Canonical Ltd.
 3 | # See LICENSE file for licensing details.
 4 | 
 5 | """This test module tests alertmanager upgrade with and without relations present.
 6 | 
 7 | 1. Deploy the charm under test _from charmhub_.
 8 | 2. Refresh with locally built charm.
 9 | 3. Add all supported relations.
10 | 4. Refresh with locally built charm.
11 | 5. Add unit and refresh again (test multi unit upgrade with relations).
12 | """
13 | 
14 | import asyncio
15 | import logging
16 | from pathlib import Path
17 | 
18 | import pytest
19 | import yaml
20 | from helpers import is_alertmanager_up
21 | from pytest_operator.plugin import OpsTest
22 | 
23 | logger = logging.getLogger(__name__)
24 | 
25 | METADATA = yaml.safe_load(Path("./metadata.yaml").read_text())
26 | app_name = METADATA["name"]
27 | resources = {"alertmanager-image": METADATA["resources"]["alertmanager-image"]["upstream-source"]}
28 | 
29 | 
30 | @pytest.mark.abort_on_fail
31 | async def test_upgrade_edge_with_local_in_isolation(ops_test: OpsTest, charm_under_test):
32 |     """Build the charm-under-test, deploy the charm from charmhub, and upgrade from path."""
33 |     logger.info("deploy charm from charmhub")
34 |     await ops_test.model.deploy("ch:alertmanager-k8s", application_name=app_name, channel="edge")
35 |     await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000)
36 | 
37 |     logger.info("upgrade deployed charm with local charm %s", charm_under_test)
38 |     await ops_test.model.applications[app_name].refresh(path=charm_under_test, resources=resources)
39 |     await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000)
40 |     assert await is_alertmanager_up(ops_test, app_name)
41 | 
42 | 
43 | @pytest.mark.abort_on_fail
44 | async def test_upgrade_local_with_local_with_relations(ops_test: OpsTest, charm_under_test):
45 |     # Deploy related apps
46 |     await asyncio.gather(
47 |         ops_test.model.deploy(
48 |             "ch:prometheus-k8s", application_name="prom", channel="edge", trust=True
49 |         ),
50 |         ops_test.model.deploy("ch:karma-k8s", application_name="karma", channel="edge"),
51 |     )
52 | 
53 |     # Relate apps
54 |     await asyncio.gather(
55 |         ops_test.model.add_relation(app_name, "prom:alertmanager"),
56 |         ops_test.model.add_relation(app_name, "karma"),
57 |     )
58 | 
59 |     # Refresh from path
60 |     await ops_test.model.applications[app_name].refresh(path=charm_under_test, resources=resources)
61 |     await ops_test.model.wait_for_idle(
62 |         apps=[app_name, "prom", "karma"], status="active", timeout=2500
63 |     )
64 |     assert await is_alertmanager_up(ops_test, app_name)
65 | 
66 | 
67 | @pytest.mark.abort_on_fail
68 | async def test_upgrade_with_multiple_units(ops_test: OpsTest, charm_under_test):
69 |     # Add unit
70 |     await ops_test.model.applications[app_name].scale(scale_change=1)
71 |     await ops_test.model.wait_for_idle(
72 |         apps=[app_name, "prom", "karma"], status="active", timeout=1000
73 |     )
74 | 
75 |     # Refresh from path
76 |     await ops_test.model.applications[app_name].refresh(path=charm_under_test, resources=resources)
77 |     await ops_test.model.wait_for_idle(
78 |         apps=[app_name, "prom", "karma"], status="active", timeout=2500
79 |     )
80 |     assert await is_alertmanager_up(ops_test, app_name)
81 | 


--------------------------------------------------------------------------------
/tests/integration/test_rescale_charm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright 2021 Canonical Ltd.
 3 | # See LICENSE file for licensing details.
 4 | 
 5 | """This test module tests rescaling.
 6 | 
 7 | 1. Deploys multiple units of the charm under test and waits for them to become active
 8 | 2. Reset and repeat the above until the leader unit is not the zero unit
 9 | 3. Scales up the application by a few units and waits for them to become active
10 | 4. Scales down the application to below the leader unit, to trigger a leadership change event
11 | """
12 | 
13 | 
14 | import logging
15 | from pathlib import Path
16 | 
17 | import pytest
18 | import yaml
19 | from helpers import block_until_leader_elected, get_leader_unit_num, is_alertmanager_up
20 | from pytest_operator.plugin import OpsTest
21 | 
22 | logger = logging.getLogger(__name__)
23 | 
24 | METADATA = yaml.safe_load(Path("./metadata.yaml").read_text())
25 | app_name = METADATA["name"]
26 | resources = {"alertmanager-image": METADATA["resources"]["alertmanager-image"]["upstream-source"]}
27 | 
28 | 
29 | # @pytest.mark.abort_on_fail
30 | @pytest.mark.xfail
31 | async def test_deploy_multiple_units(ops_test: OpsTest, charm_under_test):
32 |     """Deploy the charm-under-test."""
33 |     logger.info("build charm from local source folder")
34 | 
35 |     logger.info("deploy charm")
36 |     await ops_test.model.deploy(
37 |         charm_under_test, application_name=app_name, resources=resources, num_units=10
38 |     )
39 |     await block_until_leader_elected(ops_test, app_name)
40 | 
41 |     if await get_leader_unit_num(ops_test, app_name) == 0:
42 |         # We're unlucky this time: unit/0 is the leader, which means no scale down could trigger a
43 |         # leadership change event.
44 |         # Fail the test instead of model.reset() and repeat, because this hangs on github actions.
45 |         logger.info("Elected leader is unit/0 - resetting and repeating")
46 |         assert 0, "No luck in electing a leader that is not the zero unit. Try re-running?"
47 | 
48 |     await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000)
49 | 
50 | 
51 | # @pytest.mark.abort_on_fail
52 | @pytest.mark.xfail
53 | async def test_scale_down_to_single_unit_with_leadership_change(ops_test: OpsTest):
54 |     """Scale down below current leader to trigger a leadership change event."""
55 |     await ops_test.model.applications[app_name].scale(scale=1)
56 |     await ops_test.model.wait_for_idle(
57 |         apps=[app_name], status="active", timeout=1000, wait_for_exact_units=1
58 |     )
59 |     assert await is_alertmanager_up(ops_test, app_name)
60 | 
61 | 
62 | # @pytest.mark.abort_on_fail
63 | @pytest.mark.xfail
64 | async def test_scale_up_from_single_unit(ops_test: OpsTest):
65 |     """Add a few more units."""
66 |     await ops_test.model.applications[app_name].scale(scale_change=2)
67 |     await ops_test.model.wait_for_idle(
68 |         apps=[app_name], status="active", timeout=1000, wait_for_exact_units=3
69 |     )
70 |     assert await is_alertmanager_up(ops_test, app_name)
71 | 
72 | 
73 | # @pytest.mark.abort_on_fail
74 | @pytest.mark.xfail
75 | async def test_scale_down_to_single_unit_without_leadership_change(ops_test):
76 |     """Remove a few units."""
77 |     await ops_test.model.applications[app_name].scale(scale_change=-2)
78 |     await ops_test.model.wait_for_idle(
79 |         apps=[app_name], status="active", timeout=1000, wait_for_exact_units=1
80 |     )
81 |     assert await is_alertmanager_up(ops_test, app_name)
82 | 


--------------------------------------------------------------------------------
/tests/integration/helpers.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Canonical Ltd.
 2 | # See LICENSE file for licensing details.
 3 | 
 4 | """Helper functions for writing tests."""
 5 | 
 6 | import asyncio
 7 | import json
 8 | import logging
 9 | import urllib.request
10 | from typing import Dict
11 | 
12 | from pytest_operator.plugin import OpsTest
13 | 
14 | logger = logging.getLogger(__name__)
15 | 
16 | 
17 | async def get_unit_address(ops_test: OpsTest, app_name: str, unit_num: int) -> str:
18 |     """Get private address of a unit."""
19 |     status = await ops_test.model.get_status()  # noqa: F821
20 |     return status["applications"][app_name]["units"][f"{app_name}/{unit_num}"]["address"]
21 | 
22 | 
23 | def interleave(l1: list, l2: list) -> list:
24 |     """Interleave two lists.
25 | 
26 |     >>> interleave([1,2,3], ['a', 'b', 'c'])
27 |     [1, 'a', 2, 'b', 3, 'c']
28 | 
29 |     Reference: https://stackoverflow.com/a/11125298/3516684
30 |     """
31 |     return [x for t in zip(l1, l2) for x in t]
32 | 
33 | 
34 | async def cli_upgrade_from_path_and_wait(
35 |     ops_test: OpsTest,
36 |     path: str,
37 |     alias: str,
38 |     resources: Dict[str, str] = None,
39 |     wait_for_status: str = None,
40 | ):
41 |     if resources is None:
42 |         resources = {}
43 | 
44 |     resource_pairs = [f"{k}={v}" for k, v in resources.items()]
45 |     resource_arg_prefixes = ["--resource"] * len(resource_pairs)
46 |     resource_args = interleave(resource_arg_prefixes, resource_pairs)
47 | 
48 |     cmd = [
49 |         "juju",
50 |         "refresh",
51 |         "--path",
52 |         path,
53 |         alias,
54 |         *resource_args,
55 |     ]
56 | 
57 |     retcode, stdout, stderr = await ops_test.run(*cmd)
58 |     assert retcode == 0, f"Upgrade failed: {(stderr or stdout).strip()}"
59 |     logger.info(stdout)
60 |     await ops_test.model.wait_for_idle(apps=[alias], status=wait_for_status, timeout=120)
61 | 
62 | 
63 | async def get_leader_unit_num(ops_test: OpsTest, app_name: str):
64 |     units = ops_test.model.applications[app_name].units
65 |     is_leader = [await units[i].is_leader_from_status() for i in range(len(units))]
66 |     logger.info("Leaders: %s", is_leader)
67 |     return is_leader.index(True)
68 | 
69 | 
70 | async def is_leader_elected(ops_test: OpsTest, app_name: str):
71 |     units = ops_test.model.applications[app_name].units
72 |     return any([await units[i].is_leader_from_status() for i in range(len(units))])
73 | 
74 | 
75 | async def block_until_leader_elected(ops_test: OpsTest, app_name: str):
76 |     # await ops_test.model.block_until(is_leader_elected)
77 |     # block_until does not take async (yet?) https://github.com/juju/python-libjuju/issues/609
78 |     while not await is_leader_elected(ops_test, app_name):
79 |         await asyncio.sleep(5)
80 | 
81 | 
82 | async def is_alertmanage_unit_up(ops_test: OpsTest, app_name: str, unit_num: int):
83 |     address = await get_unit_address(ops_test, app_name, unit_num)
84 |     url = f"http://{address}:9093"
85 |     logger.info("am public address: %s", url)
86 | 
87 |     response = urllib.request.urlopen(f"{url}/api/v2/status", data=None, timeout=2.0)
88 |     return response.code == 200 and "versionInfo" in json.loads(response.read())
89 | 
90 | 
91 | async def is_alertmanager_up(ops_test: OpsTest, app_name: str):
92 |     return all(
93 |         [
94 |             await is_alertmanage_unit_up(ops_test, app_name, unit_num)
95 |             for unit_num in range(len(ops_test.model.applications[app_name].units))
96 |         ]
97 |     )
98 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Canonical Ltd.
  2 | # See LICENSE file for licensing details.
  3 | 
  4 | [tox]
  5 | skipsdist=True
  6 | skip_missing_interpreters = True
  7 | envlist = lint, static-{charm,lib,unit,integration}, unit
  8 | 
  9 | [vars]
 10 | src_path = {toxinidir}/src
 11 | tst_path = {toxinidir}/tests
 12 | lib_path = {toxinidir}/lib/charms/alertmanager_k8s
 13 | all_path = {[vars]src_path} {[vars]tst_path} {[vars]lib_path}
 14 | 
 15 | [testenv]
 16 | basepython = python3
 17 | setenv =
 18 |   PYTHONPATH = {toxinidir}:{toxinidir}/lib:{[vars]src_path}
 19 |   PYTHONBREAKPOINT=ipdb.set_trace
 20 |   PY_COLORS=1
 21 | passenv =
 22 |   PYTHONPATH
 23 |   HOME
 24 |   PATH
 25 |   CHARM_BUILD_DIR
 26 |   MODEL_SETTINGS
 27 |   HTTP_PROXY
 28 |   HTTPS_PROXY
 29 |   NO_PROXY
 30 | 
 31 | [testenv:fmt]
 32 | description = Apply coding style standards to code
 33 | deps =
 34 |     black
 35 |     isort
 36 | commands =
 37 |     isort {[vars]all_path}
 38 |     black {[vars]all_path}
 39 | 
 40 | [testenv:lint]
 41 | description = Check code against coding style standards
 42 | deps =
 43 |     black
 44 |     flake8
 45 |     flake8-docstrings
 46 |     flake8-copyright
 47 |     flake8-builtins
 48 |     pyproject-flake8
 49 |     pep8-naming
 50 |     isort
 51 |     codespell
 52 | commands =
 53 |     codespell {[vars]lib_path}
 54 |     codespell . --skip .git --skip .tox --skip build --skip lib --skip venv --skip .mypy_cache
 55 |     # pflake8 wrapper supports config from pyproject.toml
 56 |     pflake8 {[vars]all_path}
 57 |     isort --check-only --diff {[vars]all_path}
 58 |     black --check --diff {[vars]all_path}
 59 | 
 60 | [testenv:static-{charm,lib,unit,integration}]
 61 | description = Run static analysis checks
 62 | setenv =
 63 |     unit: MYPYPATH = {[vars]tst_path}/unit
 64 |     integration: MYPYPATH = {[vars]tst_path}/integration
 65 | deps =
 66 |     mypy
 67 |     types-PyYAML
 68 |     types-setuptools
 69 |     types-toml
 70 |     # pip-check-reqs does not yet work with recent pip
 71 |     pip-check-reqs
 72 |     charm: pip<=21.1.3
 73 |     charm: -r{toxinidir}/requirements.txt
 74 |     lib: git+https://github.com/canonical/operator#egg=ops
 75 |     unit: {[testenv:unit]deps}
 76 |     integration: {[testenv:integration]deps}
 77 | commands =
 78 |     charm: pip-missing-reqs {toxinidir}/src {toxinidir}/lib --requirements-file={toxinidir}/requirements.txt
 79 |     charm: pip-extra-reqs {toxinidir}/src {toxinidir}/lib --requirements-file={toxinidir}/requirements.txt
 80 |     charm: mypy {[vars]src_path} {posargs}
 81 |     lib: mypy --python-version 3.5 {[vars]lib_path} {posargs}
 82 |     unit: mypy {[vars]tst_path}/unit {posargs}
 83 |     integration: mypy {[vars]tst_path}/integration {posargs}
 84 | 
 85 | [testenv:unit]
 86 | description = Run unit tests
 87 | deps =
 88 |     pytest
 89 |     coverage[toml]
 90 |     hypothesis
 91 |     validators
 92 |     -r{toxinidir}/requirements.txt
 93 | commands =
 94 |     coverage run \
 95 |       --source={[vars]src_path},{[vars]lib_path} \
 96 |       -m pytest -v --tb native --log-cli-level=INFO -s {posargs} {[vars]tst_path}/unit
 97 |     coverage report
 98 | 
 99 | [testenv:integration]
100 | description = Run integration tests
101 | deps =
102 |     #git+https://github.com/juju/python-libjuju.git
103 |     juju
104 |     pytest
105 |     #git+https://github.com/charmed-kubernetes/pytest-operator.git
106 |     pytest-operator
107 |     pytest-httpserver
108 | commands =
109 |     pytest -v --tb native --log-cli-level=INFO -s {posargs} {toxinidir}/tests/integration
110 | 
111 | [testenv:integration-bundle]
112 | description = Run cos-lite bundle integration tests but with alertmanager built from source
113 | bundle_dir = {envtmpdir}/cos-lite-bundle
114 | deps =
115 |     # deps from cos-lite bundle - these are needed here because running pytest on the bundle
116 |     jinja2
117 |     #git+https://github.com/juju/python-libjuju.git
118 |     juju
119 |     pytest
120 |     #git+https://github.com/charmed-kubernetes/pytest-operator.git
121 |     pytest-operator
122 | allowlist_externals =
123 |     git
124 | commands =
125 |     git clone --single-branch --depth=1 https://github.com/canonical/cos-light-bundle.git {[testenv:integration-bundle]bundle_dir}
126 |     # run pytest on the integration tests of the cos-lite bundle, but override alertmanager with
127 |     # path to this source dir
128 |     pytest -v --tb native --log-cli-level=INFO -s --alertmanager={toxinidir} {posargs} {[testenv:integration-bundle]bundle_dir}/tests/integration
129 | 


--------------------------------------------------------------------------------
/tests/integration/test_rerelate_alertmanager_dispatch_metrics_endpoint.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright 2021 Canonical Ltd.
 3 | # See LICENSE file for licensing details.
 4 | 
 5 | """This test module tests alertmanager response to related apps being removed and re-related.
 6 | 
 7 | 1. Deploy the charm under test and a related app (Promethes) relate them using
 8 |    `alertmanager_dispatch` and `prometheus_scrape` interfaces and wait for them to become idle.
 9 | 2. Remove the relation.
10 | 3. Re-add the relation.
11 | 4. Remove the related application.
12 | 5. Redeploy the related application and add the relation back again.
13 | """
14 | 
15 | import asyncio
16 | import logging
17 | from pathlib import Path
18 | 
19 | import pytest
20 | import yaml
21 | from helpers import is_alertmanager_up
22 | from pytest_operator.plugin import OpsTest
23 | 
24 | logger = logging.getLogger(__name__)
25 | 
26 | METADATA = yaml.safe_load(Path("./metadata.yaml").read_text())
27 | app_name = METADATA["name"]
28 | resources = {"alertmanager-image": METADATA["resources"]["alertmanager-image"]["upstream-source"]}
29 | related_app = "related-app"
30 | 
31 | 
32 | @pytest.mark.abort_on_fail
33 | async def test_build_and_deploy(ops_test: OpsTest, charm_under_test):
34 |     """Build the charm-under-test and deploy it together with related charms."""
35 |     await asyncio.gather(
36 |         ops_test.model.deploy(
37 |             charm_under_test, resources=resources, application_name=app_name, num_units=2
38 |         ),
39 |         ops_test.model.deploy(
40 |             "ch:prometheus-k8s", application_name=related_app, channel="edge", trust=True
41 |         ),
42 |     )
43 | 
44 |     await ops_test.model.add_relation(app_name, f"{related_app}:alertmanager")
45 |     await ops_test.model.wait_for_idle(apps=[app_name, related_app], status="active", timeout=2500)
46 | 
47 |     assert await is_alertmanager_up(ops_test, app_name)
48 | 
49 |     await ops_test.model.add_relation(app_name, f"{related_app}:metrics-endpoint")
50 |     await ops_test.model.wait_for_idle(apps=[app_name, related_app], status="active", timeout=1000)
51 | 
52 |     assert await is_alertmanager_up(ops_test, app_name)
53 | 
54 | 
55 | @pytest.mark.abort_on_fail
56 | async def test_remove_relation(ops_test: OpsTest):
57 |     await ops_test.model.applications[app_name].remove_relation("alerting", related_app)
58 |     await ops_test.model.applications[app_name].remove_relation(
59 |         "self-metrics-endpoint", related_app
60 |     )
61 |     await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000)
62 |     assert await is_alertmanager_up(ops_test, app_name)
63 | 
64 | 
65 | @pytest.mark.abort_on_fail
66 | async def test_rerelate(ops_test: OpsTest):
67 |     await ops_test.model.add_relation(app_name, f"{related_app}:alertmanager")
68 |     await ops_test.model.wait_for_idle(apps=[app_name, related_app], status="active", timeout=1000)
69 |     assert await is_alertmanager_up(ops_test, app_name)
70 | 
71 |     await ops_test.model.add_relation(app_name, f"{related_app}:metrics-endpoint")
72 |     await ops_test.model.wait_for_idle(apps=[app_name, related_app], status="active", timeout=1000)
73 |     assert await is_alertmanager_up(ops_test, app_name)
74 | 
75 | 
76 | @pytest.mark.abort_on_fail
77 | async def test_remove_related_app(ops_test: OpsTest):
78 |     await ops_test.model.applications[related_app].remove()
79 |     # Block until it is really gone. Added after an itest failed when tried to redeploy:
80 |     # juju.errors.JujuError: ['cannot add application "related-app": application already exists']
81 |     await ops_test.model.block_until(lambda: related_app not in ops_test.model.applications)
82 |     await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000)
83 |     assert await is_alertmanager_up(ops_test, app_name)
84 | 
85 | 
86 | @pytest.mark.abort_on_fail
87 | async def test_rerelate_app(ops_test: OpsTest):
88 |     await ops_test.model.deploy(
89 |         "ch:prometheus-k8s", application_name=related_app, channel="edge", trust=True
90 |     )
91 |     await ops_test.model.add_relation(app_name, f"{related_app}:alertmanager")
92 |     await ops_test.model.wait_for_idle(apps=[app_name, related_app], status="active", timeout=1000)
93 |     assert await is_alertmanager_up(ops_test, app_name)
94 | 
95 |     await ops_test.model.add_relation(app_name, f"{related_app}:metrics-endpoint")
96 |     await ops_test.model.wait_for_idle(apps=[app_name, related_app], status="active", timeout=1000)
97 |     assert await is_alertmanager_up(ops_test, app_name)
98 | 


--------------------------------------------------------------------------------
/tests/unit/charm/test_push_config_to_workload_on_startup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright 2021 Canonical Ltd.
  3 | # See LICENSE file for licensing details.
  4 | 
  5 | import logging
  6 | import unittest
  7 | from unittest.mock import patch
  8 | 
  9 | import hypothesis.strategies as st
 10 | import validators
 11 | import yaml
 12 | from helpers import tautology
 13 | from hypothesis import given
 14 | from ops.testing import Harness
 15 | 
 16 | from charm import Alertmanager, AlertmanagerCharm
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | class TestPushConfigToWorkloadOnStartup(unittest.TestCase):
 22 |     """Feature: Push config to workload on startup.
 23 | 
 24 |     Background: Charm starts up with initial hooks.
 25 |     """
 26 | 
 27 |     @patch.object(Alertmanager, "reload", tautology)
 28 |     @patch("charm.KubernetesServicePatch", lambda *a, **kw: None)
 29 |     def setUp(self, *_):
 30 |         self.harness = Harness(AlertmanagerCharm)
 31 |         self.addCleanup(self.harness.cleanup)
 32 | 
 33 |         # self.harness.charm.app.name does not exist before .begin()
 34 |         # https://github.com/canonical/operator/issues/675
 35 |         # self.peer_rel_id = self.harness.add_relation("replicas", self.app_name)
 36 |         self.app_name = "alertmanager-k8s"
 37 |         self.peer_rel_id = self.harness.add_relation("replicas", self.app_name)
 38 |         self.harness.begin_with_initial_hooks()
 39 | 
 40 |     @given(st.booleans())
 41 |     def test_single_unit_cluster(self, is_leader):
 42 |         """Scenario: Current unit is the only unit present."""
 43 |         # WHEN only one unit is
 44 |         self.assertEqual(self.harness.model.app.planned_units(), 1)
 45 |         self.harness.set_leader(is_leader)
 46 | 
 47 |         # THEN amtool config is rendered
 48 |         amtool_config = yaml.safe_load(
 49 |             self.harness.charm.container.pull(self.harness.charm._amtool_config_path)
 50 |         )
 51 |         self.assertTrue(validators.url(amtool_config["alertmanager.url"]))
 52 | 
 53 |         # AND alertmanager config is rendered
 54 |         am_config = yaml.safe_load(
 55 |             self.harness.charm.container.pull(self.harness.charm._config_path)
 56 |         )
 57 |         self.assertGreaterEqual(am_config.keys(), {"global", "route", "receivers"})
 58 | 
 59 |         # AND path to config file is part of pebble layer command
 60 |         command = (
 61 |             self.harness.get_container_pebble_plan(self.harness.charm._container_name)
 62 |             .services[self.harness.charm._service_name]
 63 |             .command
 64 |         )
 65 |         self.assertIn(f"--config.file={self.harness.charm._config_path}", command)
 66 | 
 67 |         # AND peer clusters cli arg is not present in pebble layer command
 68 |         self.assertNotIn("--cluster.peer=", command)
 69 | 
 70 |     @given(st.booleans(), st.integers(2, 10))
 71 |     def test_multi_unit_cluster(self, is_leader, num_units):
 72 |         """Scenario: Current unit is a part of a multi-unit cluster."""
 73 |         # without the try-finally, if any assertion fails, then hypothesis would reenter without
 74 |         # the cleanup, carrying forward the units that were previously added
 75 |         try:
 76 |             self.assertEqual(self.harness.model.app.planned_units(), 1)
 77 | 
 78 |             # WHEN multiple units are present
 79 |             for i in range(1, num_units):
 80 |                 self.harness.add_relation_unit(self.peer_rel_id, f"{self.app_name}/{i}")
 81 |                 self.harness.update_relation_data(
 82 |                     self.peer_rel_id,
 83 |                     f"{self.app_name}/{i}",
 84 |                     {"private_address": f"{2*i}.{2*i}.{2*i}.{2*i}"},
 85 |                 )
 86 | 
 87 |             self.assertEqual(self.harness.model.app.planned_units(), num_units)
 88 |             self.harness.set_leader(is_leader)
 89 | 
 90 |             # THEN peer clusters cli arg is present in pebble layer command
 91 |             command = (
 92 |                 self.harness.get_container_pebble_plan(self.harness.charm._container_name)
 93 |                 .services[self.harness.charm._service_name]
 94 |                 .command
 95 |             )
 96 |             self.assertIn("--cluster.peer=", command)
 97 | 
 98 |         finally:
 99 |             # cleanup added units to prep for reentry by hypothesis' strategy
100 |             for i in reversed(range(1, num_units)):
101 |                 self.harness.remove_relation_unit(self.peer_rel_id, f"{self.app_name}/{i}")
102 | 


--------------------------------------------------------------------------------
/tests/integration/test_templates.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright 2022 Canonical Ltd.
  3 | # See LICENSE file for licensing details.
  4 | 
  5 | import json
  6 | import logging
  7 | from datetime import datetime, timedelta, timezone
  8 | from pathlib import Path
  9 | 
 10 | import pytest
 11 | import yaml
 12 | from helpers import get_unit_address, is_alertmanager_up
 13 | from pytest_operator.plugin import OpsTest
 14 | from werkzeug.wrappers import Request, Response
 15 | 
 16 | from alertmanager_client import Alertmanager
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | METADATA = yaml.safe_load(Path("./metadata.yaml").read_text())
 21 | app_name = METADATA["name"]
 22 | resources = {"alertmanager-image": METADATA["resources"]["alertmanager-image"]["upstream-source"]}
 23 | 
 24 | 
 25 | def request_handler(request: Request):
 26 |     response = Response("OK", status=200, content_type="text/plain")
 27 |     logger.info("Got Request Data : %s", json.loads(request.data.decode("utf-8")))
 28 |     return response
 29 | 
 30 | 
 31 | @pytest.mark.abort_on_fail
 32 | async def test_receiver_gets_alert(ops_test: OpsTest, charm_under_test, httpserver):
 33 | 
 34 |     # deploy charm from local source folder
 35 |     await ops_test.model.deploy(charm_under_test, resources=resources, application_name=app_name)
 36 |     await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000)
 37 |     assert ops_test.model.applications[app_name].units[0].workload_status == "active"
 38 |     assert await is_alertmanager_up(ops_test, app_name)
 39 | 
 40 |     # define the alertmanager configuration
 41 |     receiver_name = "fake-receiver"
 42 |     aconfig = {
 43 |         "global": {"http_config": {"tls_config": {"insecure_skip_verify": True}}},
 44 |         "route": {
 45 |             "group_by": ["alertname"],
 46 |             "group_wait": "3s",
 47 |             "group_interval": "5m",
 48 |             "repeat_interval": "1h",
 49 |             "receiver": receiver_name,
 50 |         },
 51 |         "receivers": [
 52 |             {
 53 |                 "name": receiver_name,
 54 |                 "slack_configs": [
 55 |                     {
 56 |                         "api_url": httpserver.url_for("/"),
 57 |                         "channel": "test",
 58 |                         "text": r"https://localhost/alerts/{{ .GroupLabels.alertname }}",
 59 |                     }
 60 |                 ],
 61 |             }
 62 |         ],
 63 |     }
 64 | 
 65 |     # use a template to define the slack callback id
 66 |     atemplate = r'{{ define "slack.default.callbackid" }}2{{ end }}'
 67 |     # set alertmanager configuration and template file
 68 |     await ops_test.model.applications[app_name].set_config(
 69 |         {"config_file": yaml.safe_dump(aconfig), "templates_file": atemplate}
 70 |     )
 71 |     await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=60)
 72 | 
 73 |     # create an alert
 74 |     start_time = datetime.now(timezone.utc)
 75 |     end_time = start_time + timedelta(minutes=5)
 76 |     alert_name = "fake-alert"
 77 |     model_uuid = "1234"
 78 |     alerts = [
 79 |         {
 80 |             "startsAt": start_time.isoformat("T"),
 81 |             "endsAt": end_time.isoformat("T"),
 82 |             "status": "firing",
 83 |             "annotations": {
 84 |                 "summary": "A fake alert",
 85 |             },
 86 |             "labels": {
 87 |                 "juju_model_uuid": model_uuid,
 88 |                 "juju_application": app_name,
 89 |                 "juju_model": ops_test.model_name,
 90 |                 "alertname": alert_name,
 91 |             },
 92 |             "generatorURL": f"http://localhost/{alert_name}",
 93 |         }
 94 |     ]
 95 | 
 96 |     # define the expected slack notification for the alert
 97 |     expected_notification = {
 98 |         "channel": "test",
 99 |         "username": "Alertmanager",
100 |         "attachments": [
101 |             {
102 |                 "title": f"[FIRING:1] {alert_name} {app_name} {ops_test.model_name} {model_uuid} ",
103 |                 "title_link": f"http://{app_name}-0:9093/#/alerts?receiver={receiver_name}",
104 |                 "text": f"https://localhost/alerts/{alert_name}",
105 |                 "fallback": f"[FIRING:1] {alert_name} {app_name} {ops_test.model_name} {model_uuid}  | "
106 |                 f"http://{app_name}-0:9093/#/alerts?receiver={receiver_name}",
107 |                 "callback_id": "2",
108 |                 "footer": "",
109 |                 "color": "danger",
110 |                 "mrkdwn_in": ["fallback", "pretext", "text"],
111 |             }
112 |         ],
113 |     }
114 | 
115 |     # set the alert
116 |     with httpserver.wait(timeout=120) as waiting:
117 |         # expect an alert to be forwarded to the receiver
118 |         httpserver.expect_oneshot_request(
119 |             "/", method="POST", json=expected_notification
120 |         ).respond_with_handler(request_handler)
121 |         client_address = await get_unit_address(ops_test, app_name, 0)
122 |         amanager = Alertmanager(address=client_address)
123 |         amanager.set_alerts(alerts)
124 | 
125 |     # check receiver got an alert
126 |     assert waiting.result
127 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing to alertmanager-k8s
  2 | 
  3 | ## Overview
  4 | 
  5 | This documents explains the processes and practices recommended for
  6 | contributing enhancements or bug fixing to the Alertmanager Charmed Operator.
  7 | 
  8 | The intended use case of this operator is to be deployed as part of the
  9 | [COS Lite] bundle, although that is not necessary.
 10 | 
 11 | 
 12 | ## Setup
 13 | 
 14 | A typical setup using [snaps](https://snapcraft.io/) can be found in the
 15 | [Juju docs](https://juju.is/docs/sdk/dev-setup).
 16 | 
 17 | 
 18 | ## Developing
 19 | 
 20 | - Prior to getting started on a pull request, we first encourage you to open an
 21 |   issue explaining the use case or bug.
 22 |   This gives other contributors a chance to weigh in early in the process.
 23 | - To author PRs you should be familiar with [juju](https://juju.is/#what-is-juju)
 24 |   and [how operators are written](https://juju.is/docs/sdk).
 25 | - The best way to get a head start is to join the conversation on our
 26 |   [Mattermost channel] or [Discourse].
 27 | - All enhancements require review before being merged. Besides the
 28 |   code quality and test coverage, the review will also take into
 29 |   account the resulting user experience for Juju administrators using
 30 |   this charm. To be able to merge you would have to rebase
 31 |   onto the `main` branch. We do this to avoid merge commits and to have a
 32 |   linear Git history.
 33 | - We use [`tox`](https://tox.wiki/en/latest/#) to manage all virtualenvs for
 34 |   the development lifecycle.
 35 | 
 36 | 
 37 | ### Testing
 38 | Unit tests are written with the Operator Framework [test harness] and
 39 | integration tests are written using [pytest-operator] and [python-libjuju].
 40 | 
 41 | The default test environments - lint, static and unit - will run if you start
 42 | `tox` without arguments.
 43 | 
 44 | You can also manually run a specific test environment:
 45 | 
 46 | ```shell
 47 | tox -e fmt              # update your code according to linting rules
 48 | tox -e lint             # code style
 49 | tox -e static           # static analysis
 50 | tox -e unit             # unit tests
 51 | tox -e integration      # integration tests
 52 | tox -e integration-lma  # integration tests for the lma-light bundle
 53 | ```
 54 | 
 55 | `tox` creates a virtual environment for every tox environment defined in
 56 | [tox.ini](tox.ini). To activate a tox environment for manual testing,
 57 | 
 58 | ```shell
 59 | source .tox/unit/bin/activate
 60 | ```
 61 | 
 62 | 
 63 | #### Manual testing
 64 | Alerts can be created using
 65 | [`amtool`](https://manpages.debian.org/testing/prometheus-alertmanager/amtool.1.en.html),
 66 | 
 67 | ```shell
 68 | amtool alert add alertname=oops service="my-service" severity=warning \
 69 |     instance="oops.example.net" --annotation=summary="High latency is high!" \
 70 |     --generator-url="http://prometheus.int.example.net"
 71 | ```
 72 | 
 73 | or using [Alertmanager's HTTP API][Alertmanager API browser],
 74 | [for example](https://gist.github.com/cherti/61ec48deaaab7d288c9fcf17e700853a):
 75 | 
 76 | ```shell
 77 | alertmanager_ip=$(juju status alertmanager/0 --format=json | \
 78 |   jq -r ".applications.alertmanager.units.\"alertmanager/0\".address")
 79 | 
 80 | curl -XPOST http://$alertmanager_ip:9093/api/v1/alerts -d "[{
 81 | 	\"status\": \"firing\",
 82 | 	\"labels\": {
 83 | 		\"alertname\": \"$name\",
 84 | 		\"service\": \"my-service\",
 85 | 		\"severity\":\"warning\",
 86 | 		\"instance\": \"$name.example.net\"
 87 | 	},
 88 | 	\"annotations\": {
 89 | 		\"summary\": \"High latency is high!\"
 90 | 	},
 91 | 	\"generatorURL\": \"http://prometheus.int.example.net\"
 92 | }]"
 93 | ```
 94 | 
 95 | The alert should then be listed,
 96 | 
 97 | ```shell
 98 | curl http://$alertmanager_ip:9093/api/v1/alerts
 99 | ```
100 | 
101 | and visible on a karma dashboard, if configured.
102 | 
103 | Relations between alertmanager and prometheus can be verified by
104 | [querying prometheus](https://prometheus.io/docs/prometheus/latest/querying/api/#alertmanagers)
105 | for active alertmanagers:
106 | 
107 | ```shell
108 | curl -X GET "http://$prom_ip:9090/api/v1/alertmanagers"
109 | ```
110 | 
111 | ## Build charm
112 | 
113 | Build the charm in this git repository using
114 | 
115 | ```shell
116 | charmcraft pack
117 | ```
118 | 
119 | which will create a `*.charm` file you can deploy with:
120 | 
121 | ```shell
122 | juju deploy ./alertmanager-k8s.charm \
123 |   --resource alertmanager-image=ubuntu/prometheus-alertmanager \
124 |   --config config_file='@path/to/alertmanager.yml' \
125 |   --config templates_file='@path/to/templates.tmpl'
126 | ```
127 | 
128 | 
129 | ## Code overview
130 | - The main charm class is `AlertmanagerCharm`, which responds to config changes
131 |   (via `ConfigChangedEvent`) and cluster changes (via `RelationJoinedEvent`,
132 |   `RelationChangedEvent` and `RelationDepartedEvent`).
133 | - All lifecycle events call a common hook, `_common_exit_hook` after executing
134 |   their own business logic. This pattern simplifies state tracking and improves
135 |   consistency.
136 | - On startup, the charm waits for `PebbleReadyEvent` and for an IP address to
137 |   become available before starting the karma service and declaring
138 |   `ActiveStatus`. The charm must be related to an alertmanager instance,
139 |   otherwise the charm will go into blocked state.
140 | 
141 | ## Design choices
142 | - The `alertmanager.yml` config file is created in its entirety by the charm
143 |   code on startup (the default `alertmanager.yml` is overwritten). This is done
144 |   to maintain consistency across OCI images.
145 | - Hot reload via the alertmanager HTTP API is used whenever possible instead of
146 |   service restart, to minimize downtime.
147 | 
148 | 
149 | [Alertmanager API browser]: https://petstore.swagger.io/?url=https://raw.githubusercontent.com/prometheus/alertmanager/master/api/v2/openapi.yaml
150 | [gh:Prometheus operator]: https://github.com/canonical/prometheus-operator
151 | [Prometheus operator]: https://charmhub.io/prometheus-k8s
152 | [COS Lite]: https://charmhub.io/cos-lite
153 | [Mattermost channel]: https://chat.charmhub.io/charmhub/channels/observability
154 | [Discourse]: https://discourse.charmhub.io/tag/alertmanager
155 | [test harness]: https://ops.readthedocs.io/en/latest/#module-ops.testing
156 | [pytest-operator]: https://github.com/charmed-kubernetes/pytest-operator/blob/main/docs/reference.md
157 | [python-libjuju]: https://pythonlibjuju.readthedocs.io/en/latest/
158 | 


--------------------------------------------------------------------------------
/tests/unit/test_charm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright 2021 Canonical Ltd.
  3 | # See LICENSE file for licensing details.
  4 | 
  5 | import unittest
  6 | from unittest.mock import patch
  7 | 
  8 | import ops
  9 | import yaml
 10 | from helpers import tautology
 11 | from ops.model import ActiveStatus, BlockedStatus
 12 | from ops.testing import Harness
 13 | 
 14 | from charm import Alertmanager, AlertmanagerCharm
 15 | 
 16 | 
 17 | class TestWithInitialHooks(unittest.TestCase):
 18 |     container_name: str = "alertmanager"
 19 | 
 20 |     @patch.object(Alertmanager, "reload", tautology)
 21 |     @patch("charm.KubernetesServicePatch", lambda x, y: None)
 22 |     @patch("socket.getfqdn", new=lambda *args: "fqdn")
 23 |     def setUp(self, *unused):
 24 |         self.harness = Harness(AlertmanagerCharm)
 25 |         self.addCleanup(self.harness.cleanup)
 26 | 
 27 |         self.relation_id = self.harness.add_relation("alerting", "otherapp")
 28 |         self.harness.add_relation_unit(self.relation_id, "otherapp/0")
 29 |         self.harness.set_leader(True)
 30 | 
 31 |         self.harness.begin_with_initial_hooks()
 32 | 
 33 |     def test_num_peers(self):
 34 |         self.assertEqual(0, len(self.harness.charm.peer_relation.units))  # type: ignore
 35 | 
 36 |     def test_pebble_layer_added(self, *unused):
 37 |         self.harness.container_pebble_ready(self.container_name)
 38 |         plan = self.harness.get_container_pebble_plan(self.container_name)
 39 | 
 40 |         # Check we've got the plan as expected
 41 |         self.assertIsNotNone(plan.services)
 42 |         self.assertIsNotNone(service := plan.services.get(self.harness.charm._service_name))
 43 |         self.assertIsNotNone(command := service.command)
 44 | 
 45 |         # Check command is as expected
 46 |         self.assertEqual(plan.services, self.harness.charm._alertmanager_layer().services)
 47 | 
 48 |         # Check command contains key arguments
 49 |         self.assertIn("--config.file", command)
 50 |         self.assertIn("--storage.path", command)
 51 |         self.assertIn("--web.listen-address", command)
 52 |         self.assertIn("--cluster.listen-address", command)
 53 | 
 54 |         # Check the service was started
 55 |         service = self.harness.model.unit.get_container("alertmanager").get_service("alertmanager")
 56 |         self.assertTrue(service.is_running())
 57 | 
 58 |     def test_relation_data_provides_public_address(self):
 59 |         # to suppress mypy error: Item "None" of "Optional[Any]" has no attribute "get_relation"
 60 |         model = self.harness.charm.framework.model
 61 |         assert model is not None
 62 | 
 63 |         rel = model.get_relation("alerting", self.relation_id)
 64 |         expected_address = "fqdn:{}".format(self.harness.charm.alertmanager_provider.api_port)
 65 |         self.assertEqual({"public_address": expected_address}, rel.data[self.harness.charm.unit])  # type: ignore
 66 | 
 67 |     def test_topology_added_if_user_provided_config_without_group_by(self, *unused):
 68 |         self.harness.container_pebble_ready(self.container_name)
 69 | 
 70 |         new_config = yaml.dump({"not a real config": "but good enough for testing"})
 71 |         self.harness.update_config({"config_file": new_config})
 72 |         updated_config = yaml.safe_load(
 73 |             self.harness.charm.container.pull(self.harness.charm._config_path)
 74 |         )
 75 | 
 76 |         self.assertEqual(updated_config["not a real config"], "but good enough for testing")
 77 |         self.assertListEqual(
 78 |             sorted(updated_config["route"]["group_by"]),
 79 |             sorted(["juju_model", "juju_application", "juju_model_uuid"]),
 80 |         )
 81 | 
 82 |     def test_topology_added_if_user_provided_config_with_group_by(self, *unused):
 83 |         self.harness.container_pebble_ready(self.container_name)
 84 | 
 85 |         new_config = yaml.dump({"route": {"group_by": ["alertname", "juju_model"]}})
 86 |         self.harness.update_config({"config_file": new_config})
 87 |         updated_config = yaml.safe_load(
 88 |             self.harness.charm.container.pull(self.harness.charm._config_path)
 89 |         )
 90 | 
 91 |         self.assertListEqual(
 92 |             sorted(updated_config["route"]["group_by"]),
 93 |             sorted(["alertname", "juju_model", "juju_application", "juju_model_uuid"]),
 94 |         )
 95 | 
 96 |     def test_charm_blocks_if_user_provided_config_with_templates(self, *unused):
 97 |         self.harness.container_pebble_ready(self.container_name)
 98 | 
 99 |         new_config = yaml.dump({"templates": ["/what/ever/*.tmpl"]})
100 |         self.harness.update_config({"config_file": new_config})
101 |         self.assertIsInstance(self.harness.charm.unit.status, BlockedStatus)
102 | 
103 |         new_config = yaml.dump({})
104 |         self.harness.update_config({"config_file": new_config})
105 |         self.assertIsInstance(self.harness.charm.unit.status, ActiveStatus)
106 | 
107 |     def test_templates_section_added_if_user_provided_templates(self, *unused):
108 |         self.harness.container_pebble_ready(self.container_name)
109 | 
110 |         templates = '{{ define "some.tmpl.variable" }}whatever it is{{ end}}'
111 |         self.harness.update_config({"templates_file": templates})
112 |         updated_templates = self.harness.charm.container.pull(self.harness.charm._templates_path)
113 |         self.assertEqual(templates, updated_templates.read())
114 | 
115 |         updated_config = yaml.safe_load(
116 |             self.harness.charm.container.pull(self.harness.charm._config_path)
117 |         )
118 |         self.assertEqual(updated_config["templates"], [f"{self.harness.charm._templates_path}"])
119 | 
120 | 
121 | class TestWithoutInitialHooks(unittest.TestCase):
122 |     container_name: str = "alertmanager"
123 | 
124 |     @patch.object(Alertmanager, "reload", tautology)
125 |     @patch("charm.KubernetesServicePatch", lambda x, y: None)
126 |     def setUp(self, *unused):
127 |         self.harness = Harness(AlertmanagerCharm)
128 |         self.addCleanup(self.harness.cleanup)
129 | 
130 |         self.relation_id = self.harness.add_relation("alerting", "otherapp")
131 |         self.harness.add_relation_unit(self.relation_id, "otherapp/0")
132 |         self.harness.set_leader(True)
133 | 
134 |         self.harness.begin()
135 |         self.harness.add_relation("replicas", "alertmanager")
136 | 
137 |     def test_unit_status_around_pebble_ready(self, *unused):
138 |         # before pebble_ready, status should be "maintenance"
139 |         self.assertIsInstance(self.harness.charm.unit.status, ops.model.MaintenanceStatus)
140 | 
141 |         # after pebble_ready, status should be "active"
142 |         self.harness.container_pebble_ready(self.container_name)
143 |         self.assertIsInstance(self.harness.charm.unit.status, ops.model.ActiveStatus)
144 | 
145 |         self.assertEqual(self.harness.model.unit.name, "alertmanager-k8s/0")
146 | 


--------------------------------------------------------------------------------
/tests/unit/test_consumer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright 2021 Canonical Ltd.
  3 | # See LICENSE file for licensing details.
  4 | 
  5 | import textwrap
  6 | import unittest
  7 | 
  8 | from charms.alertmanager_k8s.v0.alertmanager_dispatch import AlertmanagerConsumer
  9 | from ops.charm import CharmBase
 10 | from ops.framework import StoredState
 11 | from ops.testing import Harness
 12 | 
 13 | 
 14 | class DummyConsumerCharm(CharmBase):
 15 |     """Mimic bare functionality of AlertmanagerCharm needed to test the consumer."""
 16 | 
 17 |     # define custom metadata - without this the harness would parse the metadata.yaml in this repo,
 18 |     # which would result in expressions like self.harness.model.app.name to return
 19 |     # "alertmanager-k8s", which is not what we want in a consumer test
 20 |     metadata_yaml = textwrap.dedent(
 21 |         """
 22 |         name: DummyConsumerCharm
 23 |         containers:
 24 |           consumer-charm:
 25 |             resource: consumer-charm-image
 26 |         resources:
 27 |           consumer-charm-image:
 28 |             type: oci-image
 29 |         requires:
 30 |           alerting:
 31 |             interface: alertmanager_dispatch
 32 |         peers:
 33 |           replicas:
 34 |             interface: consumer_charm_replica
 35 |         """
 36 |     )
 37 |     _stored = StoredState()
 38 | 
 39 |     def __init__(self, *args, **kwargs):
 40 |         super().__init__(*args)
 41 |         # relation name must match metadata
 42 |         self.alertmanager_lib = AlertmanagerConsumer(self, relation_name="alerting")
 43 | 
 44 |         self.framework.observe(
 45 |             self.alertmanager_lib.on.cluster_changed, self._on_alertmanager_cluster_changed
 46 |         )
 47 | 
 48 |         self._stored.set_default(alertmanagers=[], cluster_changed_emitted=0)
 49 | 
 50 |     def _on_alertmanager_cluster_changed(self, _):
 51 |         self._stored.cluster_changed_emitted += 1
 52 |         self._stored.alertmanagers = self.alertmanager_lib.get_cluster_info()
 53 | 
 54 | 
 55 | class TestConsumer(unittest.TestCase):
 56 |     def setUp(self):
 57 |         self.harness = Harness(DummyConsumerCharm, meta=DummyConsumerCharm.metadata_yaml)
 58 |         self.addCleanup(self.harness.cleanup)
 59 |         self.harness.set_leader(True)
 60 |         self.harness.begin_with_initial_hooks()
 61 | 
 62 |     def _relate_to_alertmanager(self) -> int:
 63 |         """Create relation between 'this app' and a hypothetical (remote) alertmanager."""
 64 |         rel_id = self.harness.add_relation(relation_name="alerting", remote_app="am")
 65 |         return rel_id
 66 | 
 67 |     def _add_alertmanager_units(self, rel_id: int, num_units: int, start_with=0):
 68 |         for i in range(start_with, start_with + num_units):
 69 |             remote_unit_name = f"am/{i}"
 70 |             self.harness.add_relation_unit(rel_id, remote_unit_name)
 71 |             self.harness.update_relation_data(
 72 |                 rel_id, remote_unit_name, {"public_address": f"10.20.30.{i}"}
 73 |             )
 74 | 
 75 |         return rel_id
 76 | 
 77 |     def test_cluster_updated_after_alertmanager_units_join(self):
 78 |         # before
 79 |         self.assertEqual([], self.harness.charm.alertmanager_lib.get_cluster_info())
 80 |         num_events = self.harness.charm._stored.cluster_changed_emitted
 81 | 
 82 |         # add relation
 83 |         rel_id = self._relate_to_alertmanager()
 84 |         self._add_alertmanager_units(rel_id, num_units=2)
 85 | 
 86 |         # after
 87 |         self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
 88 |         self.assertListEqual(
 89 |             ["10.20.30.0", "10.20.30.1"], self.harness.charm.alertmanager_lib.get_cluster_info()
 90 |         )
 91 | 
 92 |         num_events = self.harness.charm._stored.cluster_changed_emitted
 93 | 
 94 |         # add another unit
 95 |         self._add_alertmanager_units(rel_id, num_units=1, start_with=2)
 96 |         self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
 97 |         self.assertListEqual(
 98 |             ["10.20.30.0", "10.20.30.1", "10.20.30.2"],
 99 |             self.harness.charm.alertmanager_lib.get_cluster_info(),
100 |         )
101 | 
102 |     def test_cluster_updated_after_alertmanager_unit_leaves(self):
103 |         num_events = self.harness.charm._stored.cluster_changed_emitted
104 | 
105 |         # add relation
106 |         rel_id = self._relate_to_alertmanager()
107 |         self._add_alertmanager_units(rel_id, num_units=4)
108 |         self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
109 |         before = self.harness.charm.alertmanager_lib.get_cluster_info()
110 |         self.assertEqual(len(before), 4)
111 | 
112 |         num_events = self.harness.charm._stored.cluster_changed_emitted
113 | 
114 |         # remove alertmanager units
115 |         self.harness.remove_relation_unit(rel_id, "am/3")
116 |         self.harness.remove_relation_unit(rel_id, "am/2")
117 |         self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
118 |         after = self.harness.charm.alertmanager_lib.get_cluster_info()
119 |         self.assertListEqual(after, ["10.20.30.0", "10.20.30.1"])
120 | 
121 |         num_events = self.harness.charm._stored.cluster_changed_emitted
122 | 
123 |         # remove all remaining units
124 |         self.harness.remove_relation_unit(rel_id, "am/1")
125 |         self.harness.remove_relation_unit(rel_id, "am/0")
126 |         self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
127 |         after = self.harness.charm.alertmanager_lib.get_cluster_info()
128 |         self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
129 |         self.assertListEqual(after, [])
130 | 
131 |     def test_cluster_is_empty_after_relation_breaks(self):
132 |         # add relation
133 |         rel_id = self._relate_to_alertmanager()
134 |         self._add_alertmanager_units(rel_id, num_units=4)
135 |         before = self.harness.charm.alertmanager_lib.get_cluster_info()
136 |         self.assertEqual(len(before), 4)
137 | 
138 |         num_events = self.harness.charm._stored.cluster_changed_emitted
139 | 
140 |         # remove relation
141 |         self.harness.remove_relation(rel_id)
142 |         after = self.harness.charm.alertmanager_lib.get_cluster_info()
143 |         self.assertGreater(self.harness.charm._stored.cluster_changed_emitted, num_events)
144 |         self.assertListEqual([], after)
145 | 
146 |     def test_relation_changed(self):
147 |         # add relation
148 |         rel_id = self._relate_to_alertmanager()
149 |         self._add_alertmanager_units(rel_id, num_units=2)
150 | 
151 |         # update remote unit's relation data (emulates upgrade-charm)
152 |         self.harness.update_relation_data(rel_id, "am/1", {"public_address": "90.80.70.60"})
153 |         self.assertListEqual(
154 |             ["10.20.30.0", "90.80.70.60"], self.harness.charm.alertmanager_lib.get_cluster_info()
155 |         )
156 | 


--------------------------------------------------------------------------------
/src/alertmanager_client.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright 2021 Canonical Ltd.
  3 | # See LICENSE file for licensing details.
  4 | 
  5 | """Client library for Alertmanager API."""
  6 | 
  7 | import json
  8 | import logging
  9 | import time
 10 | import urllib.error
 11 | import urllib.parse
 12 | import urllib.request
 13 | from typing import Optional
 14 | 
 15 | import yaml
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | class AlertmanagerBadResponse(RuntimeError):
 21 |     """A catch-all exception type to indicate 'no reply', regardless the reason."""
 22 | 
 23 | 
 24 | class Alertmanager:
 25 |     """Alertmanager HTTP API client."""
 26 | 
 27 |     def __init__(self, address: str = "localhost", port: int = 9093, timeout=2.0):
 28 |         self.base_url = f"http://{address}:{port}/"
 29 |         self.timeout = timeout
 30 | 
 31 |     def reload(self) -> bool:
 32 |         """Send a POST request to to hot-reload the config.
 33 | 
 34 |         This reduces down-time compared to restarting the service.
 35 | 
 36 |         Returns:
 37 |           True if reload succeeded (returned 200 OK); False otherwise.
 38 |         """
 39 |         url = urllib.parse.urljoin(self.base_url, "/-/reload")
 40 |         # for an empty POST request, the `data` arg must be b"" to tell urlopen it's a POST
 41 |         if resp := self._open(url, data=b"", timeout=self.timeout):
 42 |             logger.warning("reload: POST returned a non-empty response: %s", resp)
 43 |             return False
 44 |         return True
 45 | 
 46 |     @staticmethod
 47 |     def _open(url: str, data: Optional[bytes], timeout: float) -> bytes:
 48 |         """Send a request using urlopen.
 49 | 
 50 |         Args:
 51 |             url: target url for the request
 52 |             data: bytes to send to target
 53 |             timeout: duration in seconds after which to return, regardless the result
 54 | 
 55 |         Raises:
 56 |             AlertmanagerBadResponse: If no response or invalid response, regardless the reason.
 57 |         """
 58 |         for retry in reversed(range(3)):
 59 |             try:
 60 |                 response = urllib.request.urlopen(url, data, timeout)
 61 |                 if response.code == 200 and response.reason == "OK":
 62 |                     return response.read()
 63 |                 elif retry == 0:
 64 |                     raise AlertmanagerBadResponse(
 65 |                         f"Bad response (code={response.code}, reason={response.reason})"
 66 |                     )
 67 | 
 68 |             except (ValueError, urllib.error.HTTPError, urllib.error.URLError) as e:
 69 |                 if retry == 0:
 70 |                     raise AlertmanagerBadResponse("Bad response") from e
 71 | 
 72 |             time.sleep(0.2)
 73 | 
 74 |         assert False, "unreachable"  # help mypy (https://github.com/python/mypy/issues/8964)
 75 | 
 76 |     def status(self) -> dict:
 77 |         """Obtain status information from the alertmanager server.
 78 | 
 79 |         Typical output:
 80 |         {
 81 |           "cluster": {
 82 |             "peers": [],
 83 |             "status": "disabled"
 84 |           },
 85 |           "config": {
 86 |             "original": "global: [...]"
 87 |           },
 88 |           "uptime": "2021-08-31T14:15:31.613Z",
 89 |           "versionInfo": {
 90 |             "branch": "HEAD",
 91 |             "buildDate": "20210324-17:46:50",
 92 |             "buildUser": "root@lgw01-amd64-031",
 93 |             "goVersion": "go1.14.15",
 94 |             "revision": "4c6c03ebfe21009c546e4d1e9b92c371d67c021d",
 95 |             "version": "0.21.0"
 96 |           }
 97 |         }
 98 |         """
 99 |         url = urllib.parse.urljoin(self.base_url, "/api/v2/status")
100 |         try:
101 |             # the `data` arg must be None to tell urlopen it's a GET
102 |             return json.loads(self._open(url, data=None, timeout=self.timeout))
103 |         except (TypeError, json.decoder.JSONDecodeError) as e:
104 |             raise AlertmanagerBadResponse("Response is not a JSON string") from e
105 | 
106 |     @property
107 |     def version(self) -> str:
108 |         """Obtain version number from the alertmanager server."""
109 |         try:
110 |             return self.status()["versionInfo"]["version"]
111 |         except KeyError as e:
112 |             raise AlertmanagerBadResponse("Unexpected response") from e
113 | 
114 |     def config(self) -> dict:
115 |         """Obtain config from the alertmanager server.
116 | 
117 |         Typical output (here displayed in yaml format):
118 |         global:
119 |           resolve_timeout: 5m
120 |           http_config:
121 |             tls_config:
122 |               insecure_skip_verify: true
123 |           smtp_hello: localhost
124 |           smtp_require_tls: true
125 |           pagerduty_url: https://events.pagerduty.com/v2/enqueue
126 |           opsgenie_api_url: https://api.opsgenie.com/
127 |           wechat_api_url: https://qyapi.weixin.qq.com/cgi-bin/
128 |           victorops_api_url: https://alert.victorops.com/integrations/generic/20131114/alert/
129 |         route:
130 |           receiver: dummy
131 |           group_by:
132 |             - juju_application
133 |             - juju_model
134 |             - juju_model_uuid
135 |           group_wait: 30s
136 |           group_interval: 5m
137 |           repeat_interval: 1h
138 |         receivers:
139 |           - name: dummy
140 |             webhook_configs:
141 |               - send_resolved: true
142 |                 http_config:
143 |                   tls_config:
144 |                     insecure_skip_verify: true
145 |                 url: http://127.0.0.1:5001/
146 |                 max_alerts: 0
147 |         templates: []
148 |         """
149 |         try:
150 |             config = self.status()["config"]["original"]
151 |         except KeyError as e:
152 |             raise AlertmanagerBadResponse("Unexpected response") from e
153 | 
154 |         try:
155 |             return yaml.safe_load(config)
156 |         except yaml.YAMLError as e:
157 |             raise AlertmanagerBadResponse("Response is not a YAML string") from e
158 | 
159 |     def _post(
160 |         self, url: str, post_data: bytes, headers: dict = None, timeout: int = None
161 |     ) -> bytes:
162 |         """Make a HTTP POST request to Alertmanager.
163 | 
164 |         Args:
165 |             url: string URL where POST request is sent.
166 |             post_data: encoded string (bytes) of data to be posted.
167 |             headers: dictionary containing HTTP headers to be used for POST request.
168 |             timeout: numeric timeout value in seconds.
169 | 
170 |         Returns:
171 |             urllib response object.
172 |         """
173 |         response = "".encode("utf-8")
174 |         timeout = timeout or self.timeout
175 |         request = urllib.request.Request(url, headers=headers or {}, data=post_data, method="POST")
176 | 
177 |         try:
178 |             response = urllib.request.urlopen(request, timeout=timeout)
179 |         except urllib.error.HTTPError as error:
180 |             logger.debug(
181 |                 "Failed posting to %s, reason: %s",
182 |                 url,
183 |                 error.reason,
184 |             )
185 |         except urllib.error.URLError as error:
186 |             logger.debug("Invalid URL %s : %s", url, error)
187 |         except TimeoutError:
188 |             logger.debug("Request timeout during posting to URL %s", url)
189 |         return response
190 | 
191 |     def set_alerts(self, alerts: list) -> bytes:
192 |         """Send a set of new alerts to alertmanger.
193 | 
194 |         Args:
195 |             alerts: a list of alerts to be set. Format of this list is
196 |                described here https://prometheus.io/docs/alerting/latest/clients/.
197 | 
198 |         Returns:
199 |             urllib response object.
200 |         """
201 |         url = urllib.parse.urljoin(self.base_url, "/api/v1/alerts")
202 |         headers = {"Content-Type": "application/json"}
203 |         post_data = json.dumps(alerts).encode("utf-8")
204 |         response = self._post(url, post_data, headers=headers)
205 | 
206 |         return response
207 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Alertmanager Operator (k8s)
  2 | 
  3 | [![Test Suite](https://github.com/canonical/alertmanager-k8s-operator/actions/workflows/release-edge.yaml/badge.svg)](https://github.com/canonical/alertmanager-k8s-operator/actions/workflows/release-edge.yaml)
  4 | ![Discourse status](https://img.shields.io/discourse/status?server=https%3A%2F%2Fdiscourse.charmhub.io&style=flat)
  5 | 
  6 | This Charmed Operator handles instantiation, scaling, configuration, and Day 2
  7 | operations specific to [Alertmanager].
  8 | 
  9 | This operator drives the Alertmanager application, and it can be composed with
 10 | other operators to deliver a complex application or service,
 11 | such as [COS Lite][COS Lite bundle].
 12 | 
 13 | Alertmanager receives alerts from supporting applications, such as
 14 | [Prometheus][Prometheus operator] or [Loki][Loki operator], then deduplicates,
 15 | groups and routes them to the configured receiver(s).
 16 | 
 17 | 
 18 | [Alertmanager]: https://prometheus.io/docs/alerting/latest/alertmanager/
 19 | [COS Lite bundle]: https://charmhub.io/cos-lite
 20 | [Loki operator]: https://charmhub.io/loki-k8s
 21 | [Prometheus operator]: https://charmhub.io/prometheus-k8s
 22 | 
 23 | 
 24 | ## Getting started
 25 | 
 26 | ### Basic deployment
 27 | 
 28 | Once you have a controller and model ready, you can deploy alertmanager
 29 | using the Juju CLI:
 30 | 
 31 | ```shell
 32 | juju deploy --channel=beta alertmanager-k8s
 33 | ```
 34 | 
 35 | The available [channels](https://snapcraft.io/docs/channels) are listed at the top
 36 | of [the page](https://charmhub.io/alertmanager-k8s) and can also be retrieved with
 37 | Charmcraft CLI:
 38 | 
 39 | ```shell
 40 | $ charmcraft status alertmanager-k8s
 41 | 
 42 | Track    Base                  Channel    Version    Revision    Resources
 43 | latest   ubuntu 20.04 (amd64)  stable     -          -           -
 44 |                                candidate  -          -           -
 45 |                                beta       9          9           alertmanager-image (r1)
 46 |                                edge       9          9           alertmanager-image (r1)
 47 | ```
 48 | 
 49 | Once the Charmed Operator is deployed, the status can be checked by running:
 50 | 
 51 | ```shell
 52 | juju status --relations --storage --color
 53 | ```
 54 | 
 55 | 
 56 | ### Configuration
 57 | 
 58 | In order to have alerts dispatched to your receiver(s) of choice,
 59 | a [configuration file](https://www.prometheus.io/docs/alerting/latest/configuration/)
 60 | must be provided to Alertmanager using the
 61 | [`config_file`](https://charmhub.io/alertmanager-k8s/configure#config_file) option:
 62 | 
 63 | ```shell
 64 | juju config alertmanager-k8s \
 65 |   config_file='@path/to/alertmanager.yml'
 66 | ```
 67 | 
 68 | Note that if you use templates, you should use the `templates_file` config option
 69 | instead of having a `templates` section in your `yaml` configuration file.
 70 | (This is a slight deviation from the official alertmanager config spec.)
 71 | 
 72 | 
 73 | Use the [`templates_file`](https://charmhub.io/alertmanager-k8s/configure#templates_file)
 74 | option to push templates that are being used by the configuration file:
 75 | 
 76 | ```shell
 77 | juju config alertmanager-k8s \
 78 |   config_file='@path/to/alertmanager.yml' \
 79 |   templates_file='@path/to/templates.tmpl'
 80 | ```
 81 | 
 82 | All templates need to go into this single config option, instead of
 83 | the 'templates' section of the main configuration file. The templates will be
 84 | pushed to the workload container, and the configuration file will be updated
 85 | accordingly.
 86 | 
 87 | Refer to the
 88 | [official templates documentation](https://prometheus.io/docs/alerting/latest/notification_examples/)
 89 | for more details.
 90 | 
 91 | 
 92 | To verify Alertmanager is using the expected configuration you can use the
 93 | [`show-config`](https://charmhub.io/alertmanager-k8s/actions#show-config) action:
 94 | 
 95 | ```shell
 96 | juju run-action alertmanager-k8s/0 show-config --wait
 97 | ```
 98 | 
 99 | 
100 | ### Dashboard and HTTP API
101 | 
102 | The Alertmanager dashboard and
103 | [HTTP API](https://www.prometheus.io/docs/alerting/latest/management_api/)
104 | can be accessed at the default port (9093) on the Alertmanager IP address,
105 | which is determinable with a `juju status` command.
106 | 
107 | To obtain the load-balanaced application IP,
108 | 
109 | ```shell
110 | juju status alertmanager-k8s --format=json \
111 |   | jq -r '.applications."alertmanager-k8s".address'
112 | ```
113 | 
114 | Similarly, to obtain an individual unit's IP address:
115 | 
116 | ```shell
117 | juju status alertmanager-k8s --format=json \
118 |   | jq -r '.applications."alertmanager-k8s".units."alertmanager-k8s/0".address'
119 | ```
120 | 
121 | 
122 | ## Clustering
123 | 
124 | ### Forming a cluster
125 | 
126 | Alertmanager [supports clustering](https://www.prometheus.io/docs/alerting/latest/alertmanager/#high-availability)
127 | and all you need to do to create/update a cluster is to rescale the application to the desired number
128 | of units using `add-unit`:
129 | 
130 | ```shell
131 | juju add-unit alertmanager-k8s
132 | ```
133 | 
134 | or using `scale-application`:
135 | 
136 | ```shell
137 | juju scale-application alertmanager-k8s 3
138 | ```
139 | 
140 | Internally, HA is achieved by providing each Alertmanager instance at least one IP address of another instance. The cluster would then auto-update with subsequent changes to the units present.
141 | 
142 | ### Verification
143 | #### Pebble plan
144 | Cluster information is passed to Alertmanager via [`--cluster.peer` command line arguments](https://github.com/prometheus/alertmanager#high-availability). This can be verified by looking at the current pebble plan:
145 | 
146 | ```shell
147 | $ juju exec --unit alertmanager-k8s/0 -- \
148 |   PEBBLE_SOCKET=/charm/containers/alertmanager/pebble.socket \
149 |   pebble plan
150 | 
151 | services:
152 |     alertmanager:
153 |         summary: alertmanager service
154 |         startup: enabled
155 |         override: replace
156 |         command: alertmanager --config.file=/etc/alertmanager/alertmanager.yml --storage.path=/alertmanager --web.listen-address=:9093 --cluster.listen-address=0.0.0.0:9094 --cluster.peer=10.1.179.220:9094 --cluster.peer=10.1.179.221:9094
157 | ```
158 | #### HTTP API
159 | To manually verify a cluster is indeed formed, you can query the alertmanager HTTP API directly:
160 | 
161 | ```shell
162 | $ curl -s $ALERTMANAGER_IP:9093/api/v1/status \
163 |   | jq '.data.clusterStatus.peers[].address'
164 | "10.1.179.220:9094"
165 | "10.1.179.221:9094"
166 | "10.1.179.217:9094"
167 | ```
168 | 
169 | 
170 | ## OCI Images
171 | This charm is published on Charmhub with alertmanager images from
172 | [ubuntu/prometheus-alertmanager], however, it should also work with the
173 | official [quay.io/prometheus/alertmanager].
174 | 
175 | To try the charm with a different image you can use `juju refresh`. For example:
176 | 
177 | ```shell
178 | juju refresh alertmanager-k8s \
179 |   --resource alertmanager-image=quay.io/prometheus/alertmanager
180 | ```
181 | 
182 | (Note: currently, refreshing to a different image only works when deploying from a local
183 | charm - [lp/1954462](https://bugs.launchpad.net/juju/+bug/1954462).)
184 | 
185 | ### Resource revisions
186 | Workload images are archived on charmhub by revision number.
187 | 
188 | | Resource           | Revision | Image             |
189 | |--------------------|:--------:|-------------------|
190 | | alertmanager-image | r1       | [0.21-20.04_beta] |
191 | 
192 | You can use `charmcraft` to see the mapping between charm revisions and resource revisions:
193 | 
194 | ```shell
195 | charmcraft status alertmanager-k8s
196 | ```
197 | 
198 | [ubuntu/prometheus-alertmanager]: https://hub.docker.com/r/ubuntu/prometheus-alertmanager
199 | [quay.io/prometheus/alertmanager]: https://quay.io/repository/prometheus/alertmanager?tab=tags
200 | [0.21-20.04_beta]: https://hub.docker.com/layers/ubuntu/prometheus-alertmanager/0.21-20.04_beta/images/sha256-1418c677768887c2c717d043c9cb8397a32552a61354cb98c25cef23eeeb2b3f?context=explore
201 | 
202 | 
203 | ## Official alertmanager documentation
204 | 
205 | For further details about Alertmanager configuration and usage, please refer to
206 | the [official Alertmanager documentation](https://www.prometheus.io/docs/alerting/latest/overview/).
207 | 
208 | 
209 | ## Additional Information
210 | - [Logging, Monitoring, and Alerting](https://discourse.ubuntu.com/t/logging-monitoring-and-alerting/19151) (LMA) -
211 |   a tutorial for running Prometheus, Grafana and Alertmanager with LXD.
212 | - [Alertmanager README](https://github.com/prometheus/alertmanager)
213 | - [PromCon 2018: Life of an Alert](https://youtube.com/watch?v=PUdjca23Qa4)
214 | 


--------------------------------------------------------------------------------
/lib/charms/alertmanager_k8s/v0/alertmanager_dispatch.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Canonical Ltd.
  2 | # See LICENSE file for licensing details.
  3 | 
  4 | """# Alertmanager library.
  5 | 
  6 | This library is designed to be used by a charm consuming or providing the `alertmanager_dispatch`
  7 | relation interface.
  8 | 
  9 | This library is published as part of the
 10 | [Alertmanager charm](https://charmhub.io/alertmanager-k8s).
 11 | 
 12 | You can file bugs [here](https://github.com/canonical/alertmanager-operator/issues)!
 13 | 
 14 | A typical example of including this library might be:
 15 | 
 16 | ```python
 17 | # ...
 18 | from charms.alertmanager_k8s.v0.alertmanager_dispatch import AlertmanagerConsumer
 19 | 
 20 | class SomeApplication(CharmBase):
 21 |   def __init__(self, *args):
 22 |     # ...
 23 |     self.alertmanager_consumer = AlertmanagerConsumer(self, relation_name="alertmanager")
 24 |     # ...
 25 | ```
 26 | """
 27 | import logging
 28 | import socket
 29 | from typing import List
 30 | 
 31 | import ops
 32 | from ops.charm import CharmBase, RelationEvent, RelationJoinedEvent, RelationRole
 33 | from ops.framework import EventBase, EventSource, Object, ObjectEvents
 34 | from ops.model import Relation
 35 | 
 36 | # The unique Charmhub library identifier, never change it
 37 | LIBID = "37f1ca6f8fe84e3092ebbf6dc2885310"
 38 | 
 39 | # Increment this major API version when introducing breaking changes
 40 | LIBAPI = 0
 41 | 
 42 | # Increment this PATCH version before using `charmcraft publish-lib` or reset
 43 | # to 0 if you are raising the major API version
 44 | LIBPATCH = 4
 45 | 
 46 | # Set to match metadata.yaml
 47 | INTERFACE_NAME = "alertmanager_dispatch"
 48 | 
 49 | logger = logging.getLogger(__name__)
 50 | 
 51 | 
 52 | class ClusterChanged(EventBase):
 53 |     """Event raised when an alertmanager cluster is changed.
 54 | 
 55 |     If an alertmanager unit is added to or removed from a relation,
 56 |     then a :class:`ClusterChanged` event should be emitted.
 57 |     """
 58 | 
 59 | 
 60 | class AlertmanagerConsumerEvents(ObjectEvents):
 61 |     """Event descriptor for events raised by `AlertmanagerConsumer`."""
 62 | 
 63 |     cluster_changed = EventSource(ClusterChanged)
 64 | 
 65 | 
 66 | class RelationManagerBase(Object):
 67 |     """Base class that represents relation ends ("provides" and "requires").
 68 | 
 69 |     :class:`RelationManagerBase` is used to create a relation manager. This is done by inheriting
 70 |     from :class:`RelationManagerBase` and customising the sub class as required.
 71 | 
 72 |     Attributes:
 73 |         name (str): consumer's relation name
 74 |     """
 75 | 
 76 |     def __init__(self, charm: CharmBase, relation_name: str, relation_role: RelationRole):
 77 |         super().__init__(charm, relation_name)
 78 |         self.charm = charm
 79 |         self._validate_relation(relation_name, relation_role)
 80 |         self.name = relation_name
 81 | 
 82 |     def _validate_relation(self, relation_name: str, relation_role: RelationRole):
 83 |         try:
 84 |             if self.charm.meta.relations[relation_name].role != relation_role:
 85 |                 raise ValueError(
 86 |                     "Relation '{}' in the charm's metadata.yaml must be '{}' "
 87 |                     "to be managed by this library, but instead it is '{}'".format(
 88 |                         relation_name,
 89 |                         relation_role,
 90 |                         self.charm.meta.relations[relation_name].role,
 91 |                     )
 92 |                 )
 93 |             if self.charm.meta.relations[relation_name].interface_name != INTERFACE_NAME:
 94 |                 raise ValueError(
 95 |                     "Relation '{}' in the charm's metadata.yaml must use the '{}' interface "
 96 |                     "to be managed by this library, but instead it is '{}'".format(
 97 |                         relation_name,
 98 |                         INTERFACE_NAME,
 99 |                         self.charm.meta.relations[relation_name].interface_name,
100 |                     )
101 |                 )
102 |         except KeyError:
103 |             raise ValueError(
104 |                 "Relation '{}' is not in the charm's metadata.yaml".format(relation_name)
105 |             )
106 | 
107 | 
108 | class AlertmanagerConsumer(RelationManagerBase):
109 |     """A "consumer" handler to be used by charms that relate to Alertmanager (the 'requires' side).
110 | 
111 |     To have your charm consume alertmanager cluster data, declare the interface's use in your
112 |     charm's metadata.yaml file:
113 | 
114 |     ```yaml
115 |     requires:
116 |       alertmanager:
117 |         interface: alertmanager_dispatch
118 |     ```
119 | 
120 |     A typical example of importing this library might be
121 | 
122 |     ```python
123 |     from charms.alertmanager_k8s.v0.alertmanager_dispatch import AlertmanagerConsumer
124 |     ```
125 | 
126 |     In your charm's `__init__` method:
127 | 
128 |     ```python
129 |     self.alertmanager_consumer = AlertmanagerConsumer(self, relation_name="alertmanager")
130 |     ```
131 | 
132 |     Every change in the alertmanager cluster emits a :class:`ClusterChanged` event that the
133 |     consumer charm can register and handle, for example:
134 | 
135 |     ```
136 |     self.framework.observe(self.alertmanager_consumer.on.cluster_changed,
137 |                            self._on_alertmanager_cluster_changed)
138 |     ```
139 | 
140 |     The updated alertmanager cluster can then be obtained via the `get_cluster_info` method
141 | 
142 |     This consumer library expect the consumer charm to observe the `cluster_changed` event.
143 | 
144 |     Arguments:
145 |             charm (CharmBase): consumer charm
146 |             relation_name (str): from consumer's metadata.yaml
147 | 
148 |     Attributes:
149 |             charm (CharmBase): consumer charm
150 |     """
151 | 
152 |     on = AlertmanagerConsumerEvents()
153 | 
154 |     def __init__(self, charm: CharmBase, relation_name: str = "alerting"):
155 |         super().__init__(charm, relation_name, RelationRole.requires)
156 | 
157 |         self.framework.observe(
158 |             self.charm.on[self.name].relation_changed, self._on_relation_changed
159 |         )
160 |         self.framework.observe(
161 |             self.charm.on[self.name].relation_departed,
162 |             self._on_relation_departed,
163 |         )
164 |         self.framework.observe(self.charm.on[self.name].relation_broken, self._on_relation_broken)
165 | 
166 |     def _on_relation_changed(self, event: ops.charm.RelationChangedEvent):
167 |         """This hook notifies the charm that there may have been changes to the cluster."""
168 |         if event.unit:  # event.unit may be `None` in the case of app data change
169 |             # inform consumer about the change
170 |             self.on.cluster_changed.emit()
171 | 
172 |     def get_cluster_info(self) -> List[str]:
173 |         """Returns a list of ip addresses of all the alertmanager units."""
174 |         alertmanagers = []  # type: List[str]
175 |         relation = self.charm.model.get_relation(self.name)
176 |         if not relation:
177 |             return alertmanagers
178 |         for unit in relation.units:
179 |             address = relation.data[unit].get("public_address")
180 |             if address:
181 |                 alertmanagers.append(address)
182 |         return sorted(alertmanagers)
183 | 
184 |     def _on_relation_departed(self, _):
185 |         """This hook notifies the charm that there may have been changes to the cluster."""
186 |         self.on.cluster_changed.emit()
187 | 
188 |     def _on_relation_broken(self, _):
189 |         """This hook notifies the charm that a relation has been completely removed."""
190 |         # inform consumer about the change
191 |         self.on.cluster_changed.emit()
192 | 
193 | 
194 | class AlertmanagerProvider(RelationManagerBase):
195 |     """A "provider" handler to be used by charms that relate to Alertmanager (the 'provides' side).
196 | 
197 |     To have your charm provide alertmanager cluster data, declare the interface's use in your
198 |     charm's metadata.yaml file:
199 | 
200 |     ```yaml
201 |     provides:
202 |       alerting:
203 |         interface: alertmanager_dispatch
204 |     ```
205 | 
206 |     A typical example of importing this library might be
207 | 
208 |     ```python
209 |     from charms.alertmanager_k8s.v0.alertmanager_dispatch import AlertmanagerProvider
210 |     ```
211 | 
212 |     In your charm's `__init__` method:
213 | 
214 |     ```python
215 |     self.alertmanager_provider = AlertmanagerProvider(self, self._relation_name, self._api_port)
216 |     ```
217 | 
218 |     Then inform consumers on any update to alertmanager cluster data via
219 | 
220 |     ```python
221 |     self.alertmanager_provider.update_relation_data()
222 |     ```
223 | 
224 |     This provider auto-registers relation events on behalf of the main Alertmanager charm.
225 | 
226 |     Arguments:
227 |             charm (CharmBase): consumer charm
228 |             relation_name (str): relation name (not interface name)
229 |             api_port (int): alertmanager server's api port; this is needed here to avoid accessing
230 |                             charm constructs directly
231 | 
232 |     Attributes:
233 |             charm (CharmBase): the Alertmanager charm
234 |     """
235 | 
236 |     def __init__(self, charm, relation_name: str = "alerting", api_port: int = 9093):
237 |         super().__init__(charm, relation_name, RelationRole.provides)
238 | 
239 |         self._api_port = api_port
240 | 
241 |         events = self.charm.on[self.name]
242 | 
243 |         # No need to observe `relation_departed` or `relation_broken`: data bags are auto-updated
244 |         # so both events are address on the consumer side.
245 |         self.framework.observe(events.relation_joined, self._on_relation_joined)
246 | 
247 |     @property
248 |     def api_port(self):
249 |         """Get the API port number to use for alertmanager."""
250 |         return self._api_port
251 | 
252 |     def _on_relation_joined(self, event: RelationJoinedEvent):
253 |         """This hook stores the public address of the newly-joined "alerting" relation.
254 | 
255 |         This is needed for consumers such as prometheus, which should be aware of all alertmanager
256 |         instances.
257 |         """
258 |         self.update_relation_data(event)
259 | 
260 |     def _generate_relation_data(self, relation: Relation):
261 |         """Helper function to generate relation data in the correct format."""
262 |         public_address = "{}:{}".format(socket.getfqdn(), self.api_port)
263 |         return {"public_address": public_address}
264 | 
265 |     def update_relation_data(self, event: RelationEvent = None):
266 |         """Helper function for updating relation data bags.
267 | 
268 |         This function can be used in two different ways:
269 |         - update relation data bag of a given event (e.g. a newly joined relation);
270 |         - update relation data for all relations
271 | 
272 |         Args:
273 |             event: The event whose data bag needs to be updated. If it is None, update data bags of
274 |             all relations.
275 |         """
276 |         if event is None:
277 |             # update all existing relation data
278 |             # a single consumer charm's unit may be related to multiple providers
279 |             if self.name in self.charm.model.relations:
280 |                 for relation in self.charm.model.relations[self.name]:
281 |                     # Sometimes (e.g. when an app is removed with `--force`), there is a dangling
282 |                     # relation, for which we get the following error:
283 |                     # ops.model.ModelError: b'ERROR relation 17 not found (not found)\n'
284 |                     # when trying to `network-get alerting`.
285 |                     relation.data[self.charm.unit].update(self._generate_relation_data(relation))
286 | 
287 |         else:
288 |             # update relation data only for the newly joined relation
289 |             event.relation.data[self.charm.unit].update(
290 |                 self._generate_relation_data(event.relation)
291 |             )
292 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/lib/charms/observability_libs/v0/kubernetes_service_patch.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Canonical Ltd.
  2 | # See LICENSE file for licensing details.
  3 | 
  4 | """# KubernetesServicePatch Library.
  5 | 
  6 | This library is designed to enable developers to more simply patch the Kubernetes Service created
  7 | by Juju during the deployment of a sidecar charm. When sidecar charms are deployed, Juju creates a
  8 | service named after the application in the namespace (named after the Juju model). This service by
  9 | default contains a "placeholder" port, which is 65536/TCP.
 10 | 
 11 | When modifying the default set of resources managed by Juju, one must consider the lifecycle of the
 12 | charm. In this case, any modifications to the default service (created during deployment), will
 13 | be overwritten during a charm upgrade.
 14 | 
 15 | When initialised, this library binds a handler to the parent charm's `install` and `upgrade_charm`
 16 | events which applies the patch to the cluster. This should ensure that the service ports are
 17 | correct throughout the charm's life.
 18 | 
 19 | The constructor simply takes a reference to the parent charm, and a list of tuples that each define
 20 | a port for the service, where each tuple contains:
 21 | 
 22 | - a name for the port
 23 | - port for the service to listen on
 24 | - optionally: a targetPort for the service (the port in the container!)
 25 | - optionally: a nodePort for the service (for NodePort or LoadBalancer services only!)
 26 | - optionally: a name of the service (in case service name needs to be patched as well)
 27 | 
 28 | ## Getting Started
 29 | 
 30 | To get started using the library, you just need to fetch the library using `charmcraft`. **Note
 31 | that you also need to add `lightkube` and `lightkube-models` to your charm's `requirements.txt`.**
 32 | 
 33 | ```shell
 34 | cd some-charm
 35 | charmcraft fetch-lib charms.observability_libs.v0.kubernetes_service_patch
 36 | echo <<-EOF >> requirements.txt
 37 | lightkube
 38 | lightkube-models
 39 | EOF
 40 | ```
 41 | 
 42 | Then, to initialise the library:
 43 | 
 44 | For ClusterIP services:
 45 | ```python
 46 | # ...
 47 | from charms.observability_libs.v0.kubernetes_service_patch import KubernetesServicePatch
 48 | 
 49 | class SomeCharm(CharmBase):
 50 |   def __init__(self, *args):
 51 |     # ...
 52 |     self.service_patcher = KubernetesServicePatch(self, [(f"{self.app.name}", 8080)])
 53 |     # ...
 54 | ```
 55 | 
 56 | For LoadBalancer/NodePort services:
 57 | ```python
 58 | # ...
 59 | from charms.observability_libs.v0.kubernetes_service_patch import KubernetesServicePatch
 60 | 
 61 | class SomeCharm(CharmBase):
 62 |   def __init__(self, *args):
 63 |     # ...
 64 |     self.service_patcher = KubernetesServicePatch(
 65 |         self, [(f"{self.app.name}", 443, 443, 30666)], "LoadBalancer"
 66 |     )
 67 |     # ...
 68 | ```
 69 | 
 70 | Additionally, you may wish to use mocks in your charm's unit testing to ensure that the library
 71 | does not try to make any API calls, or open any files during testing that are unlikely to be
 72 | present, and could break your tests. The easiest way to do this is during your test `setUp`:
 73 | 
 74 | ```python
 75 | # ...
 76 | 
 77 | @patch("charm.KubernetesServicePatch", lambda x, y: None)
 78 | def setUp(self, *unused):
 79 |     self.harness = Harness(SomeCharm)
 80 |     # ...
 81 | ```
 82 | """
 83 | 
 84 | import logging
 85 | from types import MethodType
 86 | from typing import Literal, Sequence, Tuple, Union
 87 | 
 88 | from lightkube import ApiError, Client
 89 | from lightkube.models.core_v1 import ServicePort, ServiceSpec
 90 | from lightkube.models.meta_v1 import ObjectMeta
 91 | from lightkube.resources.core_v1 import Service
 92 | from lightkube.types import PatchType
 93 | from ops.charm import CharmBase
 94 | from ops.framework import Object
 95 | 
 96 | logger = logging.getLogger(__name__)
 97 | 
 98 | # The unique Charmhub library identifier, never change it
 99 | LIBID = "0042f86d0a874435adef581806cddbbb"
100 | 
101 | # Increment this major API version when introducing breaking changes
102 | LIBAPI = 0
103 | 
104 | # Increment this PATCH version before using `charmcraft publish-lib` or reset
105 | # to 0 if you are raising the major API version
106 | LIBPATCH = 6
107 | 
108 | PortDefinition = Union[Tuple[str, int], Tuple[str, int, int], Tuple[str, int, int, int]]
109 | ServiceType = Literal["ClusterIP", "LoadBalancer"]
110 | 
111 | 
112 | class KubernetesServicePatch(Object):
113 |     """A utility for patching the Kubernetes service set up by Juju."""
114 | 
115 |     def __init__(
116 |         self,
117 |         charm: CharmBase,
118 |         ports: Sequence[PortDefinition],
119 |         service_name: str = None,
120 |         service_type: ServiceType = "ClusterIP",
121 |         additional_labels: dict = None,
122 |         additional_selectors: dict = None,
123 |         additional_annotations: dict = None,
124 |     ):
125 |         """Constructor for KubernetesServicePatch.
126 | 
127 |         Args:
128 |             charm: the charm that is instantiating the library.
129 |             ports: a list of tuples (name, port, targetPort, nodePort) for every service port.
130 |             service_name: allows setting custom name to the patched service. If none given,
131 |                 application name will be used.
132 |             service_type: desired type of K8s service. Default value is in line with ServiceSpec's
133 |                 default value.
134 |             additional_labels: Labels to be added to the kubernetes service (by default only
135 |                 "app.kubernetes.io/name" is set to the service name)
136 |             additional_selectors: Selectors to be added to the kubernetes service (by default only
137 |                 "app.kubernetes.io/name" is set to the service name)
138 |             additional_annotations: Annotations to be added to the kubernetes service.
139 |         """
140 |         super().__init__(charm, "kubernetes-service-patch")
141 |         self.charm = charm
142 |         self.service_name = service_name if service_name else self._app
143 |         self.service = self._service_object(
144 |             ports,
145 |             service_name,
146 |             service_type,
147 |             additional_labels,
148 |             additional_selectors,
149 |             additional_annotations,
150 |         )
151 | 
152 |         # Make mypy type checking happy that self._patch is a method
153 |         assert isinstance(self._patch, MethodType)
154 |         # Ensure this patch is applied during the 'install' and 'upgrade-charm' events
155 |         self.framework.observe(charm.on.install, self._patch)
156 |         self.framework.observe(charm.on.upgrade_charm, self._patch)
157 | 
158 |     def _service_object(
159 |         self,
160 |         ports: Sequence[PortDefinition],
161 |         service_name: str = None,
162 |         service_type: ServiceType = "ClusterIP",
163 |         additional_labels: dict = None,
164 |         additional_selectors: dict = None,
165 |         additional_annotations: dict = None,
166 |     ) -> Service:
167 |         """Creates a valid Service representation.
168 | 
169 |         Args:
170 |             ports: a list of tuples of the form (name, port) or (name, port, targetPort)
171 |                 or (name, port, targetPort, nodePort) for every service port. If the 'targetPort'
172 |                 is omitted, it is assumed to be equal to 'port', with the exception of NodePort
173 |                 and LoadBalancer services, where all port numbers have to be specified.
174 |             service_name: allows setting custom name to the patched service. If none given,
175 |                 application name will be used.
176 |             service_type: desired type of K8s service. Default value is in line with ServiceSpec's
177 |                 default value.
178 |             additional_labels: Labels to be added to the kubernetes service (by default only
179 |                 "app.kubernetes.io/name" is set to the service name)
180 |             additional_selectors: Selectors to be added to the kubernetes service (by default only
181 |                 "app.kubernetes.io/name" is set to the service name)
182 |             additional_annotations: Annotations to be added to the kubernetes service.
183 | 
184 |         Returns:
185 |             Service: A valid representation of a Kubernetes Service with the correct ports.
186 |         """
187 |         if not service_name:
188 |             service_name = self._app
189 |         labels = {"app.kubernetes.io/name": self._app}
190 |         if additional_labels:
191 |             labels.update(additional_labels)
192 |         selector = {"app.kubernetes.io/name": self._app}
193 |         if additional_selectors:
194 |             selector.update(additional_selectors)
195 |         return Service(
196 |             apiVersion="v1",
197 |             kind="Service",
198 |             metadata=ObjectMeta(
199 |                 namespace=self._namespace,
200 |                 name=service_name,
201 |                 labels=labels,
202 |                 annotations=additional_annotations,  # type: ignore[arg-type]
203 |             ),
204 |             spec=ServiceSpec(
205 |                 selector=selector,
206 |                 ports=[
207 |                     ServicePort(
208 |                         name=p[0],
209 |                         port=p[1],
210 |                         targetPort=p[2] if len(p) > 2 else p[1],  # type: ignore[misc]
211 |                         nodePort=p[3] if len(p) > 3 else None,  # type: ignore[arg-type, misc]
212 |                     )
213 |                     for p in ports
214 |                 ],
215 |                 type=service_type,
216 |             ),
217 |         )
218 | 
219 |     def _patch(self, _) -> None:
220 |         """Patch the Kubernetes service created by Juju to map the correct port.
221 | 
222 |         Raises:
223 |             PatchFailed: if patching fails due to lack of permissions, or otherwise.
224 |         """
225 |         if not self.charm.unit.is_leader():
226 |             return
227 | 
228 |         client = Client()
229 |         try:
230 |             if self.service_name != self._app:
231 |                 self._delete_and_create_service(client)
232 |             client.patch(Service, self.service_name, self.service, patch_type=PatchType.MERGE)
233 |         except ApiError as e:
234 |             if e.status.code == 403:
235 |                 logger.error("Kubernetes service patch failed: `juju trust` this application.")
236 |             else:
237 |                 logger.error("Kubernetes service patch failed: %s", str(e))
238 |         else:
239 |             logger.info("Kubernetes service '%s' patched successfully", self._app)
240 | 
241 |     def _delete_and_create_service(self, client: Client):
242 |         service = client.get(Service, self._app, namespace=self._namespace)
243 |         service.metadata.name = self.service_name  # type: ignore[attr-defined]
244 |         service.metadata.resourceVersion = service.metadata.uid = None  # type: ignore[attr-defined]   # noqa: E501
245 |         client.delete(Service, self._app, namespace=self._namespace)
246 |         client.create(service)
247 | 
248 |     def is_patched(self) -> bool:
249 |         """Reports if the service patch has been applied.
250 | 
251 |         Returns:
252 |             bool: A boolean indicating if the service patch has been applied.
253 |         """
254 |         client = Client()
255 |         # Get the relevant service from the cluster
256 |         service = client.get(Service, name=self.service_name, namespace=self._namespace)
257 |         # Construct a list of expected ports, should the patch be applied
258 |         expected_ports = [(p.port, p.targetPort) for p in self.service.spec.ports]
259 |         # Construct a list in the same manner, using the fetched service
260 |         fetched_ports = [(p.port, p.targetPort) for p in service.spec.ports]  # type: ignore[attr-defined]  # noqa: E501
261 |         return expected_ports == fetched_ports
262 | 
263 |     @property
264 |     def _app(self) -> str:
265 |         """Name of the current Juju application.
266 | 
267 |         Returns:
268 |             str: A string containing the name of the current Juju application.
269 |         """
270 |         return self.charm.app.name
271 | 
272 |     @property
273 |     def _namespace(self) -> str:
274 |         """The Kubernetes namespace we're running in.
275 | 
276 |         Returns:
277 |             str: A string containing the name of the current Kubernetes namespace.
278 |         """
279 |         with open("/var/run/secrets/kubernetes.io/serviceaccount/namespace", "r") as f:
280 |             return f.read().strip()
281 | 


--------------------------------------------------------------------------------
/lib/charms/karma_k8s/v0/karma_dashboard.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Canonical Ltd.
  2 | # See LICENSE file for licensing details.
  3 | 
  4 | """# Karma library.
  5 | 
  6 | This library is designed to be used by a charm consuming or providing the karma-dashboard relation.
  7 | This library is published as part of the [Karma charm](https://charmhub.io/karma-k8s).
  8 | 
  9 | You can file bugs [here](https://github.com/canonical/karma-operator/issues)!
 10 | 
 11 | A typical example of including this library might be:
 12 | 
 13 | ```python
 14 | # ...
 15 | from charms.karma_k8s.v0.karma_dashboard import KarmaConsumer
 16 | 
 17 | class SomeApplication(CharmBase):
 18 |   def __init__(self, *args):
 19 |     # ...
 20 |     self.karma_consumer = KarmaConsumer(self, "dashboard")
 21 |     # ...
 22 | ```
 23 | """
 24 | 
 25 | import logging
 26 | from typing import Dict, List, Optional
 27 | 
 28 | import ops.charm
 29 | from ops.charm import CharmBase, RelationJoinedEvent, RelationRole
 30 | from ops.framework import EventBase, EventSource, Object, ObjectEvents, StoredState
 31 | 
 32 | # The unique Charmhub library identifier, never change it
 33 | LIBID = "98f9dc00f7ff4b1197895886bdd92037"
 34 | 
 35 | # Increment this major API version when introducing breaking changes
 36 | LIBAPI = 0
 37 | 
 38 | # Increment this PATCH version before using `charmcraft publish-lib` or reset
 39 | # to 0 if you are raising the major API version
 40 | LIBPATCH = 3
 41 | 
 42 | # Set to match metadata.yaml
 43 | INTERFACE_NAME = "karma_dashboard"
 44 | 
 45 | logger = logging.getLogger(__name__)
 46 | 
 47 | 
 48 | class KarmaAlertmanagerConfig:
 49 |     """A helper class for alertmanager server configuration for Karma.
 50 | 
 51 |     Refer to the Karma documentation for full details:
 52 |     https://github.com/prymitive/karma/blob/main/docs/CONFIGURATION.md#alertmanagers
 53 |     """
 54 | 
 55 |     required_fields = {"name", "uri"}
 56 |     optional_fields = {"cluster"}
 57 |     _supported_fields = required_fields | optional_fields
 58 | 
 59 |     @staticmethod
 60 |     def is_valid(config: Dict[str, str]) -> bool:
 61 |         """Validate alertmanager server configuration for Karma.
 62 | 
 63 |         Args:
 64 |             config: target configuration to be validated.
 65 | 
 66 |         Returns:
 67 |             True if all required keys are present and all remaining keys are supported optional
 68 |             fields; False otherwise.
 69 |         """
 70 |         all_required = all(key in config for key in KarmaAlertmanagerConfig.required_fields)
 71 |         all_supported = all(key in KarmaAlertmanagerConfig._supported_fields for key in config)
 72 |         return all_required and all_supported
 73 | 
 74 |     @staticmethod
 75 |     def from_dict(data: Dict[str, str]) -> Dict[str, str]:
 76 |         """Generate alertmanager server configuration from the given dict.
 77 | 
 78 |         Configuration is constructed by creating a subset of the provided dictionary that contains
 79 |         only the supported fields.
 80 | 
 81 |         Args:
 82 |             data: a dict that may contain alertmanager server configuration for Karma.
 83 | 
 84 |         Returns:
 85 |             A subset of `data` that contains all the supported fields found in `data`, if the
 86 |             resulting subset makes a valid configuration; False otherwise.
 87 |         """
 88 |         config = {k: data[k] for k in data if k in KarmaAlertmanagerConfig.required_fields}
 89 |         optional_config = {
 90 |             k: data[k] for k in data if data[k] and k in KarmaAlertmanagerConfig.optional_fields
 91 |         }
 92 |         config.update(optional_config)
 93 |         return config if KarmaAlertmanagerConfig.is_valid(config) else {}
 94 | 
 95 |     @staticmethod
 96 |     def build(name: str, url: str, *, cluster=None) -> Dict[str, str]:
 97 |         """Build alertmanager server configuration for Karma.
 98 | 
 99 |         Args:
100 |             name: name for the alertmanager unit.
101 |             url: url of the alertmanager api server (including scheme and port)
102 |             cluster: name of a cluster to which the alertmanager unit belongs to (optional)
103 | 
104 |         Returns:
105 |             Alertmanager server configuration for Karma.
106 |         """
107 |         return KarmaAlertmanagerConfig.from_dict({"name": name, "uri": url, "cluster": cluster})
108 | 
109 | 
110 | class KarmaAlertmanagerConfigChanged(EventBase):
111 |     """Event raised when karma configuration is changed.
112 | 
113 |     If an alertmanager unit is added to or removed from a relation,
114 |     then a :class:`KarmaAlertmanagerConfigChanged` should be emitted.
115 |     """
116 | 
117 | 
118 | class KarmaConsumerEvents(ObjectEvents):
119 |     """Event descriptor for events raised by `AlertmanagerConsumer`."""
120 | 
121 |     alertmanager_config_changed = EventSource(KarmaAlertmanagerConfigChanged)
122 | 
123 | 
124 | class RelationManagerBase(Object):
125 |     """Base class that represents relation ends ("provides" and "requires").
126 | 
127 |     :class:`RelationManagerBase` is used to create a relation manager. This is done by inheriting
128 |     from :class:`RelationManagerBase` and customising the sub class as required.
129 | 
130 |     Attributes:
131 |         name (str): consumer's relation name
132 |     """
133 | 
134 |     def __init__(self, charm: CharmBase, relation_name, relation_role: RelationRole):
135 |         super().__init__(charm, relation_name)
136 |         self.charm = charm
137 |         self._validate_relation(relation_name, relation_role)
138 |         self.name = relation_name
139 | 
140 |     def _validate_relation(self, relation_name: str, relation_role: RelationRole):
141 |         try:
142 |             if self.charm.meta.relations[relation_name].role != relation_role:
143 |                 raise ValueError(
144 |                     "Relation '{}' in the charm's metadata.yaml must be '{}' "
145 |                     "to be managed by this library, but instead it is '{}'".format(
146 |                         relation_name,
147 |                         relation_role,
148 |                         self.charm.meta.relations[relation_name].role,
149 |                     )
150 |                 )
151 |             if self.charm.meta.relations[relation_name].interface_name != INTERFACE_NAME:
152 |                 raise ValueError(
153 |                     "Relation '{}' in the charm's metadata.yaml must use the '{}' interface "
154 |                     "to be managed by this library, but instead it is '{}'".format(
155 |                         relation_name,
156 |                         INTERFACE_NAME,
157 |                         self.charm.meta.relations[relation_name].interface_name,
158 |                     )
159 |                 )
160 |         except KeyError:
161 |             raise ValueError(
162 |                 "Relation '{}' is not in the charm's metadata.yaml".format(relation_name)
163 |             )
164 | 
165 | 
166 | class KarmaConsumer(RelationManagerBase):
167 |     """A "consumer" handler to be used by the Karma charm (the 'requires' side).
168 | 
169 |     This library offers the interface needed in order to forward Alertmanager URLs and associated
170 |     information to the Karma application.
171 | 
172 |     To have your charm provide URLs to Karma, declare the interface's use in your charm's
173 |     metadata.yaml file:
174 | 
175 |     ```yaml
176 |     provides:
177 |       karma-dashboard:
178 |         interface: karma_dashboard
179 |     ```
180 | 
181 |     A typical example of importing this library might be
182 | 
183 |     ```python
184 |     from charms.alertmanager_karma.v0.karma_dashboard import KarmaConsumer
185 |     ```
186 | 
187 |     In your charm's `__init__` method:
188 | 
189 |     ```python
190 |     self.karma_consumer = KarmaConsumer(self, "dashboard")
191 |     ```
192 | 
193 |     The consumer charm is expected to observe and respond to the
194 |     :class:`KarmaAlertmanagerConfigChanged` event, for example:
195 | 
196 |     ```python
197 |     self.framework.observe(
198 |         self.karma_consumer.on.alertmanager_config_changed, self._on_alertmanager_config_changed
199 |     )
200 |     ```
201 | 
202 |     This consumer observes relation joined, changed and departed events on behalf of the charm.
203 | 
204 |     From charm code you can then obtain the list of proxied alertmanagers via:
205 | 
206 |     ```python
207 |     alertmanagers = self.karma_consumer.get_alertmanager_servers()
208 |     ```
209 | 
210 |     Arguments:
211 |             charm (CharmBase): consumer charm
212 |             name (str): from consumer's metadata.yaml
213 | 
214 |     Attributes:
215 |             relation_charm (CharmBase): consumer charm
216 |     """
217 | 
218 |     on = KarmaConsumerEvents()
219 | 
220 |     def __init__(self, charm, relation_name: str = "karma-dashboard"):
221 |         super().__init__(charm, relation_name, RelationRole.requires)
222 |         self.charm = charm
223 | 
224 |         events = self.charm.on[self.name]
225 |         self.framework.observe(events.relation_changed, self._on_relation_changed)
226 |         self.framework.observe(events.relation_departed, self._on_relation_departed)
227 | 
228 |     def get_alertmanager_servers(self) -> List[Dict[str, str]]:
229 |         """Return configuration data for all related alertmanager servers.
230 | 
231 |         The exact spec is described in the Karma project documentation
232 |         https://github.com/prymitive/karma/blob/main/docs/CONFIGURATION.md#alertmanagers
233 |         Every item in the returned list represents an item under the "servers" yaml section.
234 | 
235 |         Returns:
236 |             List of server configurations, in the format prescribed by the Karma project
237 |         """
238 |         servers = []
239 | 
240 |         logger.debug("relations for %s: %s", self.name, self.charm.model.relations[self.name])
241 |         for relation in self.charm.model.relations[self.name]:
242 |             # get data from related application
243 |             for key in relation.data:
244 |                 if key is not self.charm.unit and isinstance(key, ops.charm.model.Unit):
245 |                     data = relation.data[key]
246 |                     config = KarmaAlertmanagerConfig.from_dict(data)
247 |                     if config and config not in servers:
248 |                         servers.append(config)
249 | 
250 |         return servers  # TODO sorted
251 | 
252 |     def _on_relation_changed(self, _):
253 |         """Event handler for RelationChangedEvent."""
254 |         self.on.alertmanager_config_changed.emit()
255 | 
256 |     def _on_relation_departed(self, _):
257 |         """Hook is called when a unit leaves, but another unit may still be present."""
258 |         # At this point the unit data bag of the departing unit is gone from relation data
259 |         self.on.alertmanager_config_changed.emit()
260 | 
261 |     @property
262 |     def config_valid(self) -> bool:
263 |         """Check if the current configuration is valid.
264 | 
265 |         Returns:
266 |             True if the currently stored configuration for an alertmanager target is valid; False
267 |             otherwise.
268 |         """
269 |         # karma will fail starting without alertmanager server(s), which would cause pebble to
270 |         # error out.
271 | 
272 |         # check that there is at least one alertmanager server configured
273 |         servers = self.get_alertmanager_servers()
274 |         return len(servers) > 0
275 | 
276 | 
277 | class KarmaProvider(RelationManagerBase):
278 |     """A "provider" handler to be used by charms that relate to Karma (the 'provides' side).
279 | 
280 |     This library offers the interface needed in order to provide Alertmanager URLs and associated
281 |     information to the Karma application.
282 | 
283 |     To have your charm provide URLs to Karma, declare the interface's use in your charm's
284 |     metadata.yaml file:
285 | 
286 |     ```yaml
287 |     provides:
288 |       karma-dashboard:
289 |         interface: karma_dashboard
290 |     ```
291 | 
292 |     A typical example of importing this library might be
293 | 
294 |     ```python
295 |     from charms.karma_k8s.v0.karma_dashboard import KarmaProvider
296 |     ```
297 | 
298 |     In your charm's `__init__` method:
299 | 
300 |     ```python
301 |     self.karma_provider = KarmaProvider(self, "karma-dashboard")
302 |     ```
303 | 
304 |     The provider charm is expected to set the target URL via the consumer library, for example in
305 |     config-changed:
306 | 
307 |         self.karma_provider.target = "http://whatever:9093"
308 | 
309 |     The provider charm can then obtain the configured IP address, for example:
310 | 
311 |         self.unit.status = ActiveStatus("Proxying {}".format(self.karma_provider.target))
312 | 
313 |     Arguments:
314 |             charm (CharmBase): consumer charm
315 |             relation_name (str): relation name from consumer's metadata.yaml
316 | 
317 |     Attributes:
318 |             charm (CharmBase): consumer charm
319 |     """
320 | 
321 |     _stored = StoredState()
322 | 
323 |     def __init__(self, charm, relation_name: str = "dashboard"):
324 |         super().__init__(charm, relation_name, RelationRole.provides)
325 |         self.charm = charm
326 | 
327 |         # StoredState is used for holding the target URL.
328 |         # It is needed here because the target URL may be set by the consumer before any
329 |         # "karma-dashboard" relation is joined, in which case there are no relation unit data bags
330 |         # available for storing the target URL.
331 |         self._stored.set_default(config={})
332 | 
333 |         events = self.charm.on[self.name]
334 |         self.framework.observe(events.relation_joined, self._on_relation_joined)
335 | 
336 |     def _on_relation_joined(self, event: RelationJoinedEvent):
337 |         self._update_relation_data(event)
338 | 
339 |     @property
340 |     def config_valid(self) -> bool:
341 |         """Check if the current configuration is valid.
342 | 
343 |         Returns:
344 |             True if the currently stored configuration for an alertmanager target is valid; False
345 |             otherwise.
346 |         """
347 |         return KarmaAlertmanagerConfig.is_valid(self._stored.config)
348 | 
349 |     @property
350 |     def target(self) -> Optional[str]:
351 |         """str: Alertmanager URL to be used by Karma."""
352 |         return self._stored.config.get("uri", None)
353 | 
354 |     @target.setter
355 |     def target(self, url: str) -> None:
356 |         """Configure an alertmanager target server to be used by Karma.
357 | 
358 |         Apart from the server's URL, the server configuration is determined from the juju topology.
359 | 
360 |         Args:
361 |             url: Complete URL (scheme and port) of the target alertmanager server.
362 | 
363 |         Returns:
364 |             None.
365 |         """
366 |         name = self.charm.unit.name
367 |         cluster = "{}_{}".format(self.charm.model.name, self.charm.app.name)
368 |         config = KarmaAlertmanagerConfig.build(name, url, cluster=cluster)
369 |         if not config:
370 |             logger.warning("Invalid config: {%s, %s}", name, url)
371 |             return
372 | 
373 |         self._stored.config.update(config)
374 | 
375 |         # target changed - must update all relation data
376 |         self._update_relation_data()
377 | 
378 |     def _update_relation_data(self, event: RelationJoinedEvent = None):
379 |         """Helper function for updating relation data bags.
380 | 
381 |         This function can be used in two different ways:
382 |         - update relation data bag of a given event (e.g. a newly joined relation);
383 |         - update relation data for all relations
384 | 
385 |         Args:
386 |             event: The event whose data bag needs to be updated. If it is None, update data bags of
387 |             all relations.
388 |         """
389 |         if event is None:
390 |             # update all existing relation data
391 |             # a single consumer charm's unit may be related to multiple karma dashboards
392 |             if self.name in self.charm.model.relations:
393 |                 for relation in self.charm.model.relations[self.name]:
394 |                     relation.data[self.charm.unit].update(self._stored.config)
395 |         else:
396 |             # update relation data only for the newly joined relation
397 |             event.relation.data[self.charm.unit].update(self._stored.config)
398 | 


--------------------------------------------------------------------------------
/src/charm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright 2021 Canonical Ltd.
  3 | # See LICENSE file for licensing details.
  4 | 
  5 | """A Juju charm for alertmanager."""
  6 | 
  7 | import hashlib
  8 | import logging
  9 | import socket
 10 | from typing import List, Optional, cast
 11 | 
 12 | import yaml
 13 | from charms.alertmanager_k8s.v0.alertmanager_dispatch import AlertmanagerProvider
 14 | from charms.grafana_k8s.v0.grafana_dashboard import GrafanaDashboardProvider
 15 | from charms.grafana_k8s.v0.grafana_source import GrafanaSourceProvider
 16 | from charms.karma_k8s.v0.karma_dashboard import KarmaProvider
 17 | from charms.observability_libs.v0.kubernetes_service_patch import KubernetesServicePatch
 18 | from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointProvider
 19 | from ops.charm import ActionEvent, CharmBase
 20 | from ops.framework import StoredState
 21 | from ops.main import main
 22 | from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus, Relation
 23 | from ops.pebble import Layer, PathError, ProtocolError
 24 | 
 25 | from alertmanager_client import Alertmanager, AlertmanagerBadResponse
 26 | 
 27 | logger = logging.getLogger(__name__)
 28 | 
 29 | 
 30 | def sha256(hashable) -> str:
 31 |     """Use instead of the builtin hash() for repeatable values."""
 32 |     if isinstance(hashable, str):
 33 |         hashable = hashable.encode("utf-8")
 34 |     return hashlib.sha256(hashable).hexdigest()
 35 | 
 36 | 
 37 | class ConfigUpdateFailure(RuntimeError):
 38 |     """Custom exception for failed config updates."""
 39 | 
 40 | 
 41 | class AlertmanagerCharm(CharmBase):
 42 |     """A Juju charm for alertmanager.
 43 | 
 44 |     Attributes:
 45 |         api: an API client instance for communicating with the alertmanager workload
 46 |                 server
 47 |     """
 48 | 
 49 |     # Container name is automatically determined from charm name
 50 |     # Layer name is used for the layer label argument in container.add_layer
 51 |     # Service name matches charm name for consistency
 52 |     _container_name = _layer_name = _service_name = "alertmanager"
 53 |     _relation_name = "alerting"
 54 |     _peer_relation_name = "replicas"  # must match metadata.yaml peer role name
 55 |     _api_port = 9093  # port to listen on for the web interface and API
 56 |     _ha_port = 9094  # port for HA-communication between multiple instances of alertmanager
 57 | 
 58 |     # path, inside the workload container, to the alertmanager and amtool configuration files
 59 |     # the amalgamated templates file goes in the same folder as the main configuration file
 60 |     _config_path = "/etc/alertmanager/alertmanager.yml"
 61 |     _templates_path = "/etc/alertmanager/templates.tmpl"
 62 |     _amtool_config_path = "/etc/amtool/config.yml"
 63 | 
 64 |     # path, inside the workload container for alertmanager data, e.g. 'nflogs', 'silences'.
 65 |     _storage_path = "/alertmanager"
 66 | 
 67 |     _stored = StoredState()
 68 | 
 69 |     def __init__(self, *args):
 70 |         super().__init__(*args)
 71 |         self._stored.set_default(config_hash=None, launched_with_peers=False)
 72 |         self.api = Alertmanager(port=self._api_port)
 73 | 
 74 |         self.alertmanager_provider = AlertmanagerProvider(
 75 |             self, self._relation_name, self._api_port
 76 |         )
 77 |         self.grafana_dashboard_provider = GrafanaDashboardProvider(charm=self)
 78 |         self.grafana_source_provider = GrafanaSourceProvider(
 79 |             charm=self,
 80 |             source_type="alertmanager",
 81 |             source_url=self.api_address,
 82 |         )
 83 |         self.karma_provider = KarmaProvider(self, "karma-dashboard")
 84 | 
 85 |         self.service_patcher = KubernetesServicePatch(
 86 |             self,
 87 |             [
 88 |                 (f"{self.app.name}", self._api_port, self._api_port),
 89 |                 (f"{self.app.name}-ha", self._ha_port, self._ha_port),
 90 |             ],
 91 |         )
 92 | 
 93 |         # Self-monitoring
 94 |         self._scraping = MetricsEndpointProvider(
 95 |             self,
 96 |             relation_name="self-metrics-endpoint",
 97 |             jobs=[{"static_configs": [{"targets": [f"*:{self._api_port}"]}]}],
 98 |         )
 99 | 
100 |         self.container = self.unit.get_container(self._container_name)
101 | 
102 |         # Core lifecycle events
103 |         self.framework.observe(self.on.config_changed, self._on_config_changed)
104 |         self.framework.observe(self.on.alertmanager_pebble_ready, self._on_pebble_ready)
105 |         self.framework.observe(self.on.start, self._on_start)
106 |         self.framework.observe(self.on.update_status, self._on_update_status)
107 |         self.framework.observe(self.on.upgrade_charm, self._on_upgrade_charm)
108 | 
109 |         # Peer relation events
110 |         self.framework.observe(
111 |             self.on[self._peer_relation_name].relation_joined, self._on_peer_relation_joined
112 |         )
113 |         self.framework.observe(
114 |             self.on[self._peer_relation_name].relation_changed, self._on_peer_relation_changed
115 |         )
116 | 
117 |         # Action events
118 |         self.framework.observe(self.on.show_config_action, self._on_show_config_action)
119 | 
120 |     def _on_show_config_action(self, event: ActionEvent):
121 |         """Hook for the show-config action."""
122 |         event.log(f"Fetching {self._config_path}")
123 |         if not self.container.can_connect():
124 |             event.fail("Container not ready")
125 | 
126 |         try:
127 |             content = self.container.pull(self._config_path)
128 |             # juju requires keys to be lowercase alphanumeric (can't use self._config_path)
129 |             event.set_results({"path": self._config_path, "content": content.read()})
130 |         except (ProtocolError, PathError) as e:
131 |             event.fail(str(e))
132 | 
133 |     @property
134 |     def api_port(self) -> int:
135 |         """Get the API port number to use for alertmanager (default: 9093)."""
136 |         return self._api_port
137 | 
138 |     @property
139 |     def peer_relation(self) -> Optional["Relation"]:
140 |         """Helper function for obtaining the peer relation object.
141 | 
142 |         Returns: peer relation object
143 |         (NOTE: would return None if called too early, e.g. during install).
144 |         """
145 |         return self.model.get_relation(self._peer_relation_name)
146 | 
147 |     def _alertmanager_layer(self) -> Layer:
148 |         """Returns Pebble configuration layer for alertmanager."""
149 | 
150 |         def _command():
151 |             """Returns full command line to start alertmanager."""
152 |             peer_addresses = self._get_peer_addresses()
153 | 
154 |             # cluster listen address - empty string disables HA mode
155 |             listen_address_arg = "" if len(peer_addresses) == 0 else f"0.0.0.0:{self._ha_port}"
156 | 
157 |             # The chosen port in the cluster.listen-address flag is the port that needs to be
158 |             # specified in the cluster.peer flag of the other peers.
159 |             # Assuming all replicas use the same port.
160 |             # Sorting for repeatability in comparing between service layers.
161 |             peer_cmd_args = " ".join(
162 |                 sorted([f"--cluster.peer={address}" for address in peer_addresses])
163 |             )
164 |             return (
165 |                 f"alertmanager "
166 |                 f"--config.file={self._config_path} "
167 |                 f"--storage.path={self._storage_path} "
168 |                 f"--web.listen-address=:{self._api_port} "
169 |                 f"--cluster.listen-address={listen_address_arg} "
170 |                 f"{peer_cmd_args}"
171 |             )
172 | 
173 |         return Layer(
174 |             {
175 |                 "summary": "alertmanager layer",
176 |                 "description": "pebble config layer for alertmanager",
177 |                 "services": {
178 |                     self._service_name: {
179 |                         "override": "replace",
180 |                         "summary": "alertmanager service",
181 |                         "command": _command(),
182 |                         "startup": "enabled",
183 |                     }
184 |                 },
185 |             }
186 |         )
187 | 
188 |     def _restart_service(self) -> bool:
189 |         """Helper function for restarting the underlying service.
190 | 
191 |         Returns:
192 |             True if restart succeeded; False otherwise.
193 |         """
194 |         logger.info("Restarting service %s", self._service_name)
195 | 
196 |         if not self.container.can_connect():
197 |             logger.error("Cannot (re)start service: container is not ready.")
198 |             return False
199 | 
200 |         # Check if service exists, to avoid ModelError from being raised when the service does
201 |         # not exist,
202 |         if not self.container.get_plan().services.get(self._service_name):
203 |             logger.error("Cannot (re)start service: service does not (yet) exist.")
204 |             return False
205 | 
206 |         self.container.restart(self._service_name)
207 | 
208 |         # Update "launched with peers" flag.
209 |         # The service should be restarted when peers joined if this is False.
210 |         plan = self.container.get_plan()
211 |         service = plan.services.get(self._service_name)
212 |         self._stored.launched_with_peers = "--cluster.peer" in service.command
213 | 
214 |         return True
215 | 
216 |     def _update_layer(self, restart: bool) -> bool:
217 |         """Update service layer to reflect changes in peers (replicas).
218 | 
219 |         Args:
220 |           restart: a flag indicating if the service should be restarted if a change was detected.
221 | 
222 |         Returns:
223 |           True if anything changed; False otherwise
224 |         """
225 |         overlay = self._alertmanager_layer()
226 |         plan = self.container.get_plan()
227 | 
228 |         if self._service_name not in plan.services or overlay.services != plan.services:
229 |             self.container.add_layer(self._layer_name, overlay, combine=True)
230 | 
231 |             if restart:
232 |                 self._restart_service()
233 | 
234 |             return True
235 | 
236 |         return False
237 | 
238 |     @property
239 |     def _default_config(self) -> dict:
240 |         return {
241 |             "global": {"http_config": {"tls_config": {"insecure_skip_verify": True}}},
242 |             "route": {
243 |                 "group_wait": "30s",
244 |                 "group_interval": "5m",
245 |                 "repeat_interval": "1h",
246 |                 "receiver": "dummy",
247 |             },
248 |             "receivers": [
249 |                 {"name": "dummy", "webhook_configs": [{"url": "http://127.0.0.1:5001/"}]}
250 |             ],
251 |         }
252 | 
253 |     def _update_config(self) -> None:
254 |         """Update alertmanager.yml config file to reflect changes in configuration.
255 | 
256 |         After pushing a new config, a hot-reload is attempted. If hot-reload fails, the service is
257 |         restarted.
258 | 
259 |         Raises:
260 |           ConfigUpdateFailure, if failed to update configuration file.
261 |         """
262 |         # update amtool config file
263 |         amtool_config = yaml.safe_dump({"alertmanager.url": f"http://localhost:{self.api_port}"})
264 |         self.container.push(self._amtool_config_path, amtool_config, make_dirs=True)
265 | 
266 |         # if no config provided, use default config with a dummy receiver
267 |         config = yaml.safe_load(self.config["config_file"]) or self._default_config
268 | 
269 |         if config.get("templates", []):
270 |             logger.error(
271 |                 "alertmanager config file must not have a 'templates' section; "
272 |                 "use the 'templates' config option instead."
273 |             )
274 |             raise ConfigUpdateFailure(
275 |                 "Invalid config file: use charm's 'templates' config option instead"
276 |             )
277 | 
278 |         # add templates, if any
279 |         if templates := self.config["templates_file"]:
280 |             config["templates"] = [f"{self._templates_path}"]
281 |             self.container.push(self._templates_path, templates, make_dirs=True)
282 | 
283 |         # add juju topology to "group_by"
284 |         route = cast(dict, config.get("route", {}))
285 |         route["group_by"] = list(
286 |             set(route.get("group_by", [])).union(
287 |                 ["juju_application", "juju_model", "juju_model_uuid"]
288 |             )
289 |         )
290 |         config["route"] = route
291 | 
292 |         config_yaml = yaml.safe_dump(config)
293 |         config_hash = sha256(config_yaml)
294 | 
295 |         if config_hash == self._stored.config_hash:
296 |             logger.debug("no change in config")
297 |             return
298 | 
299 |         logger.debug("config changed")
300 |         self._push_config_and_reload(config_yaml)
301 |         self._stored.config_hash = config_hash
302 | 
303 |     def _push_config_and_reload(self, config_yaml):
304 |         """Push config into workload container, and trigger a hot-reload (or service restart).
305 | 
306 |         Args:
307 |             config_yaml: contents of the new config file.
308 | 
309 |         Raises:
310 |             ConfigUpdateFailure, if config update fails.
311 |         """
312 |         self.container.push(self._config_path, config_yaml, make_dirs=True)
313 | 
314 |         # Obtain a "before" snapshot of the config from the server.
315 |         # This is different from `config` above because alertmanager adds in a bunch of details
316 |         # such as:
317 |         #
318 |         #   smtp_hello: localhost
319 |         #   smtp_require_tls: true
320 |         #   pagerduty_url: https://events.pagerduty.com/v2/enqueue
321 |         #   opsgenie_api_url: https://api.opsgenie.com/
322 |         #   wechat_api_url: https://qyapi.weixin.qq.com/cgi-bin/
323 |         #   victorops_api_url: https://alert.victorops.com/integrations/generic/20131114/alert/
324 |         #
325 |         # The snapshot is needed to determine if reloading took place.
326 |         try:
327 |             config_from_server_before = self.api.config()
328 |         except AlertmanagerBadResponse:
329 |             config_from_server_before = None
330 | 
331 |         # Send an HTTP POST to alertmanager to hot-reload the config.
332 |         # This reduces down-time compared to restarting the service.
333 |         try:
334 |             self.api.reload()
335 |         except AlertmanagerBadResponse as e:
336 |             logger.warning("config reload via HTTP POST failed: %s", str(e))
337 |             # hot-reload failed so attempting a service restart
338 |             if not self._restart_service():
339 |                 raise ConfigUpdateFailure(
340 |                     "Is config valid? hot reload and service restart failed."
341 |                 )
342 | 
343 |         # Obtain an "after" snapshot of the config from the server.
344 |         try:
345 |             config_from_server_after = self.api.config()
346 |         except AlertmanagerBadResponse:
347 |             config_from_server_after = None
348 | 
349 |         if config_from_server_before is None or config_from_server_after is None:
350 |             logger.warning("cannot determine if reload succeeded")
351 |         elif config_from_server_before == config_from_server_after:
352 |             logger.warning("config remained the same after a reload")
353 | 
354 |     @property
355 |     def api_address(self):
356 |         """Returns the API address (including scheme and port) of the alertmanager server."""
357 |         return f"http://{socket.getfqdn()}:{self.api_port}"
358 | 
359 |     def _common_exit_hook(self) -> None:
360 |         """Event processing hook that is common to all events to ensure idempotency."""
361 |         if not self.container.can_connect():
362 |             self.unit.status = MaintenanceStatus("Waiting for pod startup to complete")
363 |             return
364 | 
365 |         # In the case of a single unit deployment, no 'RelationJoined' event is emitted, so
366 |         # setting IP here.
367 |         # Store private address in unit's peer relation data bucket. This is still needed because
368 |         # the "private-address" field in the data bag is being populated incorrectly.
369 |         # Also, ip address may still be None even after RelationJoinedEvent, for which
370 |         # "ops.model.RelationDataError: relation data values must be strings" would be emitted.
371 |         if self.peer_relation:
372 |             self.peer_relation.data[self.unit]["private_address"] = socket.getfqdn()
373 | 
374 |         self.alertmanager_provider.update_relation_data()
375 |         if karma_address := self.api_address:
376 |             self.karma_provider.target = karma_address
377 | 
378 |         # Update pebble layer
379 |         layer_changed = self._update_layer(restart=False)
380 | 
381 |         service_running = (
382 |             service := self.container.get_service(self._service_name)
383 |         ) and service.is_running()
384 | 
385 |         num_peers = len(rel.units) if (rel := self.peer_relation) else 0
386 | 
387 |         if layer_changed and (
388 |             not service_running or (num_peers > 0 and not self._stored.launched_with_peers)
389 |         ):
390 |             self._restart_service()
391 | 
392 |         # Update config file
393 |         try:
394 |             self._update_config()
395 |         except ConfigUpdateFailure as e:
396 |             self.unit.status = BlockedStatus(str(e))
397 |             return
398 | 
399 |         self.unit.status = ActiveStatus()
400 | 
401 |     def _on_pebble_ready(self, _):
402 |         """Event handler for PebbleReadyEvent."""
403 |         self._common_exit_hook()
404 | 
405 |     def _on_config_changed(self, _):
406 |         """Event handler for ConfigChangedEvent."""
407 |         self._common_exit_hook()
408 | 
409 |     def _on_start(self, _):
410 |         """Event handler for StartEvent.
411 | 
412 |         With Juju 2.9.5 encountered a scenario in which pebble_ready and config_changed fired,
413 |         but IP address was not available and the status was stuck on "Waiting for IP address".
414 |         Adding this hook reduce the likelihood of that scenario.
415 |         """
416 |         self._common_exit_hook()
417 | 
418 |     def _on_peer_relation_joined(self, _):
419 |         """Event handler for replica's RelationChangedEvent."""
420 |         self._common_exit_hook()
421 | 
422 |     def _on_peer_relation_changed(self, _):
423 |         """Event handler for replica's RelationChangedEvent.
424 | 
425 |         `relation_changed` is needed in addition to `relation_joined` because when a second unit
426 |         joins, the first unit must be restarted and provided with the second unit's IP address.
427 |         when the first unit sees "joined", it is not guaranteed that the second unit already has
428 |         an IP address.
429 |         """
430 |         self._common_exit_hook()
431 | 
432 |     def _on_update_status(self, _):
433 |         """Event handler for UpdateStatusEvent.
434 | 
435 |         Logs list of peers, uptime and version info.
436 |         """
437 |         try:
438 |             status = self.api.status()
439 |             logger.info(
440 |                 "alertmanager %s is up and running (uptime: %s); "
441 |                 "cluster mode: %s, with %d peers",
442 |                 status["versionInfo"]["version"],
443 |                 status["uptime"],
444 |                 status["cluster"]["status"],
445 |                 len(status["cluster"]["peers"]),
446 |             )
447 |         except AlertmanagerBadResponse as e:
448 |             logger.error("Failed to obtain status: %s", str(e))
449 | 
450 |         # Calling the common hook to make sure a single unit set its IP in case all events fired
451 |         # before an IP address was ready, leaving UpdateStatue as the last resort.
452 |         self._common_exit_hook()
453 | 
454 |     def _on_upgrade_charm(self, _):
455 |         """Event handler for replica's UpgradeCharmEvent."""
456 |         # update config hash
457 |         self._stored.config_hash = (
458 |             ""
459 |             if not self.container.can_connect()
460 |             else sha256(yaml.safe_dump(yaml.safe_load(self.container.pull(self._config_path))))
461 |         )
462 | 
463 |         # After upgrade (refresh), the unit ip address is not guaranteed to remain the same, and
464 |         # the config may need update. Calling the common hook to update.
465 |         self._common_exit_hook()
466 | 
467 |     def _get_peer_addresses(self) -> List[str]:
468 |         """Create a list of HA addresses of all peer units (all units excluding current).
469 | 
470 |         The returned addresses include the HA port number but do not include scheme (http).
471 |         If a unit does not have an address, it will be omitted from the list.
472 |         """
473 |         addresses = []
474 |         if pr := self.peer_relation:
475 |             addresses = [
476 |                 f"{address}:{self._ha_port}"
477 |                 for unit in pr.units  # pr.units only holds peers (self.unit is not included)
478 |                 if (address := pr.data[unit].get("private_address"))
479 |             ]
480 | 
481 |         return addresses
482 | 
483 | 
484 | if __name__ == "__main__":
485 |     main(AlertmanagerCharm, use_juju_for_storage=True)
486 | 


--------------------------------------------------------------------------------
/lib/charms/grafana_k8s/v0/grafana_source.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Canonical Ltd.
  2 | # See LICENSE file for licensing details.
  3 | 
  4 | """## Overview.
  5 | 
  6 | This document explains how to integrate with the Grafana charm
  7 | for the purpose of providing a datasource which can be used by
  8 | Grafana dashboards. It also explains the structure of the data
  9 | expected by the `grafana-source` interface, and may provide a
 10 | mechanism or reference point for providing a compatible interface
 11 | or library by providing a definitive reference guide to the
 12 | structure of relation data which is shared between the Grafana
 13 | charm and any charm providing datasource information.
 14 | 
 15 | ## Provider Library Usage
 16 | 
 17 | The Grafana charm interacts with its datasources using its charm
 18 | library. The goal of this library is to be as simple to use as
 19 | possible, and instantiation of the class with or without changing
 20 | the default arguments provides a complete use case. For the simplest
 21 | use case of a Prometheus (or Prometheus-compatible) datasource
 22 | provider in a charm which `provides: grafana-source`, creation of a
 23 | `GrafanaSourceProvider` object with the default arguments is sufficient.
 24 | 
 25 | The default arguments are:
 26 | 
 27 |     `charm`: `self` from the charm instantiating this library
 28 |     `source_type`: None
 29 |     `source_port`: None
 30 |     `source_url`: None
 31 |     `relation_name`: grafana-source
 32 |     `refresh_event`: A `PebbleReady` event from `charm`, used to refresh
 33 |         the IP address sent to Grafana on a charm lifecycle event or
 34 |         pod restart
 35 | 
 36 | The value of `source_url` should be a fully-resolvable URL for a valid Grafana
 37 | source, e.g., `http://example.com/api` or similar.
 38 | 
 39 | If your configuration requires any changes from these defaults, they
 40 | may be set from the class constructor. It may be instantiated as
 41 | follows:
 42 | 
 43 |     from charms.grafana_k8s.v0.grafana_source import GrafanaSourceProvider
 44 | 
 45 |     class FooCharm:
 46 |         def __init__(self, *args):
 47 |             super().__init__(*args, **kwargs)
 48 |             ...
 49 |             self.grafana_source_provider = GrafanaSourceProvider(
 50 |                 self, source_type="prometheus", source_port="9090"
 51 |             )
 52 |             ...
 53 | 
 54 | The first argument (`self`) should be a reference to the parent (datasource)
 55 | charm, as this charm's model will be used for relation data, IP addresses,
 56 | and lifecycle events.
 57 | 
 58 | An instantiated `GrafanaSourceProvider` will ensure that each unit of its
 59 | parent charm is added as a datasource in the Grafana configuration once a
 60 | relation is established, using the [Grafana datasource provisioning](
 61 | https://grafana.com/docs/grafana/latest/administration/provisioning/#data-sources)
 62 | specification via YAML files.
 63 | 
 64 | This information is added to the relation data for the charms as serialized JSON
 65 | from a dict, with a structure of:
 66 | ```
 67 | {
 68 |     "application": {
 69 |         "model": charm.model.name, # from `charm` in the constructor
 70 |         "model_uuid": charm.model.uuid,
 71 |         "application": charm.model.app.name,
 72 |         "type": source_type,
 73 |     },
 74 |     "unit/0": {
 75 |         "uri": {ip_address}:{port}{path} # `ip_address` is derived at runtime, `port` from the constructor,
 76 |                                          # and `path` from the constructor, if specified
 77 |     },
 78 | ```
 79 | 
 80 | This is ingested by :class:`GrafanaSourceConsumer`, and is sufficient for configuration.
 81 | 
 82 | 
 83 | ## Consumer Library Usage
 84 | 
 85 | The `GrafanaSourceConsumer` object may be used by Grafana
 86 | charms to manage relations with available datasources. For this
 87 | purpose, a charm consuming Grafana datasource information should do
 88 | the following things:
 89 | 
 90 | 1. Instantiate the `GrafanaSourceConsumer` object by providing it a
 91 | reference to the parent (Grafana) charm and, optionally, the name of
 92 | the relation that the Grafana charm uses to interact with datasources.
 93 | This relation must confirm to the `grafana-source` interface.
 94 | 
 95 | For example a Grafana charm may instantiate the
 96 | `GrafanaSourceConsumer` in its constructor as follows
 97 | 
 98 |     from charms.grafana_k8s.v0.grafana_source import GrafanaSourceConsumer
 99 | 
100 |     def __init__(self, *args):
101 |         super().__init__(*args)
102 |         ...
103 |         self.grafana_source_consumer = GrafanaSourceConsumer(self)
104 |         ...
105 | 
106 | 2. A Grafana charm also needs to listen to the
107 | `GrafanaSourceEvents` events emitted by the `GrafanaSourceConsumer`
108 | by adding itself as an observer for these events:
109 | 
110 |     self.framework.observe(
111 |         self.grafana_source_consumer.on.sources_changed,
112 |         self._on_sources_changed,
113 |     )
114 |     self.framework.observe(
115 |         self.grafana_source_consumer.on.sources_to_delete_changed,
116 |         self._on_sources_to_delete_change,
117 |     )
118 | 
119 | The reason for two separate events is that Grafana keeps track of
120 | removed datasources in its [datasource provisioning](
121 | https://grafana.com/docs/grafana/latest/administration/provisioning/#data-sources).
122 | 
123 | If your charm is merely implementing a `grafana-source`-compatible API,
124 | and is does not follow exactly the same semantics as Grafana, observing these
125 | events may not be needed.
126 | """
127 | 
128 | import json
129 | import logging
130 | import re
131 | import socket
132 | from typing import Any, Dict, List, Optional, Union
133 | 
134 | from ops.charm import (
135 |     CharmBase,
136 |     CharmEvents,
137 |     RelationChangedEvent,
138 |     RelationDepartedEvent,
139 |     RelationEvent,
140 |     RelationJoinedEvent,
141 |     RelationRole,
142 | )
143 | from ops.framework import (
144 |     BoundEvent,
145 |     EventBase,
146 |     EventSource,
147 |     Object,
148 |     ObjectEvents,
149 |     StoredDict,
150 |     StoredList,
151 |     StoredState,
152 | )
153 | from ops.model import Relation
154 | 
155 | # The unique Charmhub library identifier, never change it
156 | LIBID = "974705adb86f40228298156e34b460dc"
157 | 
158 | # Increment this major API version when introducing breaking changes
159 | LIBAPI = 0
160 | 
161 | # Increment this PATCH version before using `charmcraft publish-lib` or reset
162 | # to 0 if you are raising the major API version
163 | LIBPATCH = 11
164 | 
165 | logger = logging.getLogger(__name__)
166 | 
167 | DEFAULT_RELATION_NAME = "grafana-source"
168 | DEFAULT_PEER_NAME = "grafana"
169 | RELATION_INTERFACE_NAME = "grafana_datasource"
170 | 
171 | 
172 | def _type_convert_stored(obj):
173 |     """Convert Stored* to their appropriate types, recursively."""
174 |     if isinstance(obj, StoredList):
175 |         return list(map(_type_convert_stored, obj))
176 |     elif isinstance(obj, StoredDict):
177 |         rdict = {}
178 |         for k in obj.keys():
179 |             rdict[k] = _type_convert_stored(obj[k])
180 |         return rdict
181 |     else:
182 |         return obj
183 | 
184 | 
185 | class RelationNotFoundError(Exception):
186 |     """Raised if there is no relation with the given name."""
187 | 
188 |     def __init__(self, relation_name: str):
189 |         self._relation_name = relation_name
190 |         self.message = "No relation named '{}' found".format(relation_name)
191 | 
192 |         super().__init__(self.message)
193 | 
194 | 
195 | class RelationInterfaceMismatchError(Exception):
196 |     """Raised if the relation with the given name has a different interface."""
197 | 
198 |     def __init__(
199 |         self,
200 |         relation_name: str,
201 |         expected_relation_interface: str,
202 |         actual_relation_interface: str,
203 |     ):
204 |         self._relation_name = relation_name
205 |         self.expected_relation_interface = expected_relation_interface
206 |         self.actual_relation_interface = actual_relation_interface
207 |         self.message = (
208 |             "The '{}' relation has '{}' as "
209 |             "interface rather than the expected '{}'".format(
210 |                 relation_name, actual_relation_interface, expected_relation_interface
211 |             )
212 |         )
213 | 
214 |         super().__init__(self.message)
215 | 
216 | 
217 | class RelationRoleMismatchError(Exception):
218 |     """Raised if the relation with the given name has a different direction."""
219 | 
220 |     def __init__(
221 |         self,
222 |         relation_name: str,
223 |         expected_relation_role: RelationRole,
224 |         actual_relation_role: RelationRole,
225 |     ):
226 |         self._relation_name = relation_name
227 |         self.expected_relation_interface = expected_relation_role
228 |         self.actual_relation_role = actual_relation_role
229 |         self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format(
230 |             relation_name, repr(actual_relation_role), repr(expected_relation_role)
231 |         )
232 | 
233 |         super().__init__(self.message)
234 | 
235 | 
236 | def _validate_relation_by_interface_and_direction(
237 |     charm: CharmBase,
238 |     relation_name: str,
239 |     expected_relation_interface: str,
240 |     expected_relation_role: RelationRole,
241 | ) -> None:
242 |     """Verifies that a relation has the necessary characteristics.
243 | 
244 |     Verifies that the `relation_name` provided: (1) exists in metadata.yaml,
245 |     (2) declares as interface the interface name passed as `relation_interface`
246 |     and (3) has the right "direction", i.e., it is a relation that `charm`
247 |     provides or requires.
248 | 
249 |     Args:
250 |         charm: a `CharmBase` object to scan for the matching relation.
251 |         relation_name: the name of the relation to be verified.
252 |         expected_relation_interface: the interface name to be matched by the
253 |             relation named `relation_name`.
254 |         expected_relation_role: whether the `relation_name` must be either
255 |             provided or required by `charm`.
256 |     """
257 |     if relation_name not in charm.meta.relations:
258 |         raise RelationNotFoundError(relation_name)
259 | 
260 |     relation = charm.meta.relations[relation_name]
261 | 
262 |     actual_relation_interface = relation.interface_name
263 |     if actual_relation_interface != expected_relation_interface:
264 |         raise RelationInterfaceMismatchError(
265 |             relation_name, expected_relation_interface, actual_relation_interface
266 |         )
267 | 
268 |     if expected_relation_role == RelationRole.provides:
269 |         if relation_name not in charm.meta.provides:
270 |             raise RelationRoleMismatchError(
271 |                 relation_name, RelationRole.provides, RelationRole.requires
272 |             )
273 |     elif expected_relation_role == RelationRole.requires:
274 |         if relation_name not in charm.meta.requires:
275 |             raise RelationRoleMismatchError(
276 |                 relation_name, RelationRole.requires, RelationRole.provides
277 |             )
278 |     else:
279 |         raise Exception("Unexpected RelationDirection: {}".format(expected_relation_role))
280 | 
281 | 
282 | class SourceFieldsMissingError(Exception):
283 |     """An exception to indicate there a missing fields from a Grafana datsource definition."""
284 | 
285 |     pass
286 | 
287 | 
288 | class GrafanaSourcesChanged(EventBase):
289 |     """Event emitted when Grafana sources change."""
290 | 
291 |     def __init__(self, handle, data=None):
292 |         super().__init__(handle)
293 |         self.data = data
294 | 
295 |     def snapshot(self) -> Dict:
296 |         """Save grafana source information."""
297 |         return {"data": self.data}
298 | 
299 |     def restore(self, snapshot) -> None:
300 |         """Restore grafana source information."""
301 |         self.data = snapshot["data"]
302 | 
303 | 
304 | class GrafanaSourceEvents(ObjectEvents):
305 |     """Events raised by :class:`GrafanaSourceEvents."""
306 | 
307 |     # We are emitting multiple events for the same thing due to the way Grafana provisions
308 |     # datasources. There is no "convenient" way to tell Grafana to remove them outside of
309 |     # setting a separate "deleteDatasources" key in the configuration file to tell Grafana
310 |     # to forget about them, and the reasons why sources_to_delete -> deleteDatasources
311 |     # would be emitted is intrinsically linked to the sources themselves
312 |     sources_changed = EventSource(GrafanaSourcesChanged)
313 |     sources_to_delete_changed = EventSource(GrafanaSourcesChanged)
314 | 
315 | 
316 | class GrafanaSourceProvider(Object):
317 |     """A provider object for Grafana datasources."""
318 | 
319 |     def __init__(
320 |         self,
321 |         charm: CharmBase,
322 |         source_type: str,
323 |         source_port: Optional[str] = "",
324 |         source_url: Optional[str] = "",
325 |         refresh_event: Optional[BoundEvent] = None,
326 |         relation_name: str = DEFAULT_RELATION_NAME,
327 |         extra_fields: dict = None,
328 |     ) -> None:
329 |         """Construct a Grafana charm client.
330 | 
331 |         The :class:`GrafanaSourceProvider` object provides an interface
332 |         to Grafana. This interface supports providing additional
333 |         sources for Grafana to monitor. For example, if a charm
334 |         exposes some metrics which are consumable by an ingestor
335 |         (such as Prometheus), then an additional source can be added
336 |         by instantiating a :class:`GrafanaSourceProvider` object and
337 |         adding its datasources as follows:
338 | 
339 |             self.grafana = GrafanaSourceProvider(self)
340 |             self.grafana.add_source(
341 |                 address=<address>,
342 |                 port=<port>
343 |             )
344 | 
345 |         Args:
346 |             charm: a :class:`CharmBase` object which manages this
347 |                 :class:`GrafanaSourceProvider` object. Generally this is
348 |                 `self` in the instantiating class.
349 |             source_type: an optional (default `prometheus`) source type
350 |                 required for Grafana configuration. The value must match
351 |                 the DataSource type from the Grafana perspective.
352 |             source_port: an optional (default `9090`) source port
353 |                 required for Grafana configuration.
354 |             source_url: an optional source URL which can be used, for example, if
355 |                 ingress for a source is enabled, or a URL path to the API consumed
356 |                 by the datasource must be specified for another reason. If set,
357 |                 'source_port' will not be used.
358 |             relation_name: string name of the relation that is provides the
359 |                 Grafana source service. It is strongly advised not to change
360 |                 the default, so that people deploying your charm will have a
361 |                 consistent experience with all other charms that provide
362 |                 Grafana datasources.
363 |             refresh_event: a :class:`CharmEvents` event on which the IP
364 |                 address should be refreshed in case of pod or
365 |                 machine/VM restart.
366 |             extra_fields: a :dict: which is used for additional information required
367 |                 for some datasources in the `jsonData` field
368 |         """
369 |         _validate_relation_by_interface_and_direction(
370 |             charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides
371 |         )
372 | 
373 |         super().__init__(charm, relation_name)
374 |         self._charm = charm
375 |         self._relation_name = relation_name
376 |         events = self._charm.on[relation_name]
377 | 
378 |         self._source_type = source_type
379 |         if source_type == "alertmanager":
380 |             if not extra_fields:
381 |                 extra_fields = {"implementation": "prometheus"}
382 |             elif not extra_fields.get("implementation", None):
383 |                 extra_fields["implementation"] = "prometheus"
384 | 
385 |         self._extra_fields = extra_fields
386 | 
387 |         if not refresh_event:
388 |             if len(self._charm.meta.containers) == 1:
389 |                 container = list(self._charm.meta.containers.values())[0]
390 |                 refresh_event = self._charm.on[container.name.replace("-", "_")].pebble_ready
391 | 
392 |         if source_port and source_url:
393 |             logger.warning(
394 |                 "Both `source_port` and `source_url` were specified! Using "
395 |                 "`source_url` as the address."
396 |             )
397 | 
398 |         if source_url and not re.match(r"^\w+://", source_url):
399 |             logger.warning(
400 |                 "'source_url' should start with a scheme, such as "
401 |                 "'http://'. Assuming 'http://' since none is present."
402 |             )
403 |             source_url = "http://{}".format(source_url)
404 | 
405 |         self._source_port = source_port
406 |         self._source_url = source_url
407 | 
408 |         self.framework.observe(events.relation_joined, self._set_sources_from_event)
409 |         if refresh_event:
410 |             self.framework.observe(refresh_event, self._set_unit_details)
411 | 
412 |     def update_source(self, source_url: Optional[str] = ""):
413 |         """Trigger the update of relation data."""
414 |         if source_url:
415 |             self._source_url = source_url
416 | 
417 |         rel = self._charm.model.get_relation(self._relation_name)
418 | 
419 |         if not rel:
420 |             return
421 | 
422 |         self._set_sources(rel)
423 | 
424 |     def _set_sources_from_event(self, event: RelationJoinedEvent) -> None:
425 |         """Get a `Relation` object from the event to pass on."""
426 |         self._set_sources(event.relation)
427 | 
428 |     def _set_sources(self, rel: Relation):
429 |         """Inform the consumer about the source configuration."""
430 |         self._set_unit_details(rel)
431 | 
432 |         if not self._charm.unit.is_leader():
433 |             return
434 | 
435 |         logger.debug("Setting Grafana data sources: %s", self._scrape_data)
436 |         rel.data[self._charm.app]["grafana_source_data"] = json.dumps(self._scrape_data)
437 | 
438 |     @property
439 |     def _scrape_data(self) -> Dict:
440 |         """Generate source metadata.
441 | 
442 |         Returns:
443 |             Source configuration data for Grafana.
444 |         """
445 |         data = {
446 |             "model": str(self._charm.model.name),
447 |             "model_uuid": str(self._charm.model.uuid),
448 |             "application": str(self._charm.model.app.name),
449 |             "type": self._source_type,
450 |             "extra_fields": self._extra_fields,
451 |         }
452 |         return data
453 | 
454 |     def _set_unit_details(self, _: Union[BoundEvent, RelationEvent, Relation]):
455 |         """Set unit host details.
456 | 
457 |         Each time a provider charm container is restarted it updates its own host address in the
458 |         unit relation data for the Prometheus consumer.
459 |         """
460 |         for relation in self._charm.model.relations[self._relation_name]:
461 |             url = self._source_url or "{}:{}".format(socket.getfqdn(), self._source_port)
462 |             relation.data[self._charm.unit]["grafana_source_host"] = url
463 | 
464 | 
465 | class GrafanaSourceConsumer(Object):
466 |     """A consumer object for working with Grafana datasources."""
467 | 
468 |     on = GrafanaSourceEvents()
469 |     _stored = StoredState()
470 | 
471 |     def __init__(
472 |         self,
473 |         charm: CharmBase,
474 |         relation_name: str = DEFAULT_RELATION_NAME,
475 |     ) -> None:
476 |         """A Grafana based Monitoring service consumer, i.e., the charm that uses a datasource.
477 | 
478 |         Args:
479 |             charm: a :class:`CharmBase` instance that manages this
480 |                 instance of the Grafana source service.
481 |             relation_name: string name of the relation that is provides the
482 |                 Grafana source service. It is strongly advised not to change
483 |                 the default, so that people deploying your charm will have a
484 |                 consistent experience with all other charms that provide
485 |                 Grafana datasources.
486 |         """
487 |         _validate_relation_by_interface_and_direction(
488 |             charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires
489 |         )
490 | 
491 |         super().__init__(charm, relation_name)
492 |         self._relation_name = relation_name
493 |         self._charm = charm
494 |         events = self._charm.on[relation_name]
495 | 
496 |         # We're stuck with this forever now so upgrades work, or until such point as we can
497 |         # break compatibility
498 |         self._stored.set_default(
499 |             sources=dict(),
500 |             sources_to_delete=set(),
501 |         )
502 | 
503 |         self.framework.observe(events.relation_changed, self._on_grafana_source_relation_changed)
504 |         self.framework.observe(events.relation_departed, self._on_grafana_source_relation_departed)
505 |         self.framework.observe(
506 |             self._charm.on[DEFAULT_PEER_NAME].relation_changed,
507 |             self._on_grafana_peer_changed,
508 |         )
509 | 
510 |     def _on_grafana_source_relation_changed(self, event: CharmEvents) -> None:
511 |         """Handle relation changes in related providers.
512 | 
513 |         If there are changes in relations between Grafana source consumers
514 |         and providers, this event handler (if the unit is the leader) will
515 |         get data for an incoming grafana-source relation through a
516 |         :class:`GrafanaSourcesChanged` event, and make the relation data
517 |         is available in the app's datastore object. This data is set using
518 |         Juju application topology.
519 | 
520 |         The Grafana charm can then respond to the event to update its
521 |         configuration.
522 |         """
523 |         if self._charm.unit.is_leader():
524 |             sources = {}
525 | 
526 |             for rel in self._charm.model.relations[self._relation_name]:
527 |                 source = self._get_source_config(rel)
528 |                 if source:
529 |                     sources[rel.id] = source
530 | 
531 |             self.set_peer_data("sources", sources)
532 | 
533 |         self.on.sources_changed.emit()
534 | 
535 |     def _on_grafana_peer_changed(self, _: RelationChangedEvent) -> None:
536 |         """Emit source events on peer events so secondary charm data updates."""
537 |         if self._charm.unit.is_leader():
538 |             return
539 |         self.on.sources_changed.emit()
540 |         self.on.sources_to_delete_changed.emit()
541 | 
542 |     def _get_source_config(self, rel: Relation):
543 |         """Generate configuration from data stored in relation data by providers."""
544 |         source_data = json.loads(rel.data[rel.app].get("grafana_source_data", "{}"))  # type: ignore
545 |         if not source_data:
546 |             return
547 | 
548 |         data = []
549 | 
550 |         sources_to_delete = self.get_peer_data("sources_to_delete")
551 |         for unit_name, host_addr in self._relation_hosts(rel).items():
552 |             unique_source_name = "juju_{}_{}_{}_{}".format(
553 |                 source_data["model"],
554 |                 source_data["model_uuid"],
555 |                 source_data["application"],
556 |                 unit_name.split("/")[1],
557 |             )
558 | 
559 |             host = (
560 |                 "http://{}".format(host_addr) if not re.match(r"^\w+://", host_addr) else host_addr
561 |             )
562 | 
563 |             host_data = {
564 |                 "unit": unit_name,
565 |                 "source_name": unique_source_name,
566 |                 "source_type": source_data["type"],
567 |                 "url": host,
568 |             }
569 |             if source_data.get("extra_fields", None):
570 |                 host_data["extra_fields"] = source_data.get("extra_fields")
571 | 
572 |             if host_data["source_name"] in sources_to_delete:
573 |                 sources_to_delete.remove(host_data["source_name"])
574 | 
575 |             data.append(host_data)
576 |         self.set_peer_data("sources_to_delete", list(sources_to_delete))
577 |         return data
578 | 
579 |     def _relation_hosts(self, rel: Relation) -> Dict:
580 |         """Fetch host names and address of all provider units for a single relation.
581 | 
582 |         Args:
583 |             rel: An `ops.model.Relation` object for which the host name to
584 |                 address mapping is required.
585 | 
586 |         Returns:
587 |             A dictionary that maps unit names to unit addresses for
588 |             the specified relation.
589 |         """
590 |         hosts = {}
591 |         for unit in rel.units:
592 |             host_address = rel.data[unit].get("grafana_source_host")
593 |             if not host_address:
594 |                 continue
595 |             hosts[unit.name] = host_address
596 |         return hosts
597 | 
598 |     def _on_grafana_source_relation_departed(self, event: RelationDepartedEvent) -> None:
599 |         """Update job config when providers depart.
600 | 
601 |         When a Grafana source provider departs, the configuration
602 |         for that provider is removed from the list of sources jobs,
603 |         added to a list of sources to remove, and other providers
604 |         are informed through a :class:`GrafanaSourcesChanged` event.
605 |         """
606 |         removed_source = False
607 |         if self._charm.unit.is_leader():
608 |             removed_source = self._remove_source_from_datastore(event)
609 | 
610 |         if removed_source:
611 |             self.on.sources_to_delete_changed.emit()
612 | 
613 |     def _remove_source_from_datastore(self, event: RelationDepartedEvent) -> bool:
614 |         """Remove the grafana-source from the datastore.
615 | 
616 |         Add the name to the list of sources to remove when a relation is broken.
617 | 
618 |         Returns a boolean indicating whether an event should be emitted.
619 |         """
620 |         rel_id = event.relation.id
621 |         logger.debug("Removing all data for relation: {}".format(rel_id))
622 | 
623 |         stored_sources = self.get_peer_data("sources")
624 | 
625 |         removed_source = stored_sources.pop(str(rel_id), None)
626 |         if removed_source:
627 |             if event.unit:
628 |                 # Remove one unit only
629 |                 dead_unit = [s for s in removed_source if s["unit"] == event.unit.name][0]
630 |                 self._remove_source(dead_unit["source_name"])
631 | 
632 |                 # Re-update the list of stored sources
633 |                 stored_sources[rel_id] = [
634 |                     dict(s) for s in removed_source if s["unit"] != event.unit.name
635 |                 ]
636 |             else:
637 |                 for host in removed_source:
638 |                     self._remove_source(host["source_name"])
639 | 
640 |             self.set_peer_data("sources", stored_sources)
641 |             return True
642 |         return False
643 | 
644 |     def _remove_source(self, source_name: str) -> None:
645 |         """Remove a datasource by name."""
646 |         sources_to_delete = self.get_peer_data("sources_to_delete")
647 |         if source_name not in sources_to_delete:
648 |             sources_to_delete.append(source_name)
649 |             self.set_peer_data("sources_to_delete", sources_to_delete)
650 | 
651 |     def upgrade_keys(self) -> None:
652 |         """On upgrade, ensure stored data maintains compatibility."""
653 |         # self._stored.sources may have hyphens instead of underscores in key names.
654 |         # Make sure they reconcile.
655 |         self._set_default_data()
656 |         sources = _type_convert_stored(self._stored.sources)
657 |         for rel_id in sources.keys():
658 |             for i in range(len(sources[rel_id])):
659 |                 sources[rel_id][i].update(
660 |                     {k.replace("-", "_"): v for k, v in sources[rel_id][i].items()}
661 |                 )
662 | 
663 |         # If there's stored data, merge it and purge it
664 |         if self._stored.sources:
665 |             self._stored.sources = {}
666 |             peer_sources = self.get_peer_data("sources")
667 |             sources.update(peer_sources)
668 |             self.set_peer_data("sources", sources)
669 | 
670 |         if self._stored.sources_to_delete:
671 |             old_sources_to_delete = _type_convert_stored(self._stored.sources_to_delete)
672 |             self._stored.sources_to_delete = set()
673 |             peer_sources_to_delete = set(self.get_peer_data("sources_to_delete"))
674 |             sources_to_delete = set.union(old_sources_to_delete, peer_sources_to_delete)
675 |             self.set_peer_data("sources_to_delete", sources_to_delete)
676 | 
677 |     @property
678 |     def sources(self) -> List[dict]:
679 |         """Returns an array of sources the source_consumer knows about."""
680 |         sources = []
681 |         stored_sources = self.get_peer_data("sources")
682 |         for source in stored_sources.values():
683 |             sources.extend([host for host in _type_convert_stored(source)])
684 | 
685 |         return sources
686 | 
687 |     @property
688 |     def sources_to_delete(self) -> List[str]:
689 |         """Returns an array of source names which have been removed."""
690 |         return self.get_peer_data("sources_to_delete")
691 | 
692 |     def _set_default_data(self) -> None:
693 |         """Set defaults if they are not in peer relation data."""
694 |         data = {"sources": {}, "sources_to_delete": []}  # type: ignore
695 |         for k, v in data.items():
696 |             if not self.get_peer_data(k):
697 |                 self.set_peer_data(k, v)
698 | 
699 |     def set_peer_data(self, key: str, data: Any) -> None:
700 |         """Put information into the peer data bucket instead of `StoredState`."""
701 |         self._charm.peers.data[self._charm.app][key] = json.dumps(data)  # type: ignore
702 | 
703 |     def get_peer_data(self, key: str) -> Any:
704 |         """Retrieve information from the peer data bucket instead of `StoredState`."""
705 |         data = self._charm.peers.data[self._charm.app].get(key, "")  # type: ignore
706 |         return json.loads(data) if data else {}
707 | 


--------------------------------------------------------------------------------