├── .jujuignore ├── .gitignore ├── config.yaml ├── src ├── errors.py ├── loki_alert_rules │ ├── grafana_agent_high_rate.rule │ └── high_error_rate.rule ├── state.py ├── k8s_charm.py ├── machine_charm.py ├── apr.py ├── charm.py └── apr_charm.py ├── requirements.txt ├── charmcraft.yaml ├── README.md ├── machine_metadata.yaml ├── k8s_metadata.yaml ├── _workload └── apr.py ├── pyproject.toml ├── tox.ini ├── LICENSE └── lib └── charms └── loki_k8s └── v0 └── loki_push_api.py /.jujuignore: -------------------------------------------------------------------------------- 1 | /venv 2 | *.py[cod] 3 | *.charm 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv/ 2 | build/ 3 | *.charm 4 | .coverage 5 | __pycache__/ 6 | *.py[cod] 7 | .tox 8 | .idea/ 9 | tests/integration/*-tester/lib/ 10 | metadata.yaml 11 | -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | options: 5 | sides: 6 | description: | 7 | Number of sides of the dice. 8 | type: integer 9 | default: 6 10 | -------------------------------------------------------------------------------- /src/errors.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | 5 | class AprError(RuntimeError): 6 | """Base class for all errors raised by apr and charms.""" 7 | 8 | 9 | class AprServiceError(AprError): 10 | """Base class for errors where the apr service is involved.""" 11 | -------------------------------------------------------------------------------- /src/loki_alert_rules/grafana_agent_high_rate.rule: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: apr-operator-high-log-volume 3 | rules: 4 | - alert: HighLogVolume 5 | expr: | 6 | count_over_time(({%%juju_topology%%})[30s]) > 100 7 | labels: 8 | severity: high 9 | annotations: 10 | summary: Log rate is too high! 11 | -------------------------------------------------------------------------------- /src/loki_alert_rules/high_error_rate.rule: -------------------------------------------------------------------------------- 1 | alert: HostHighLogErrorRate 2 | expr: count_over_time({job="varlogs"} |= "error" [1h]) > 100 3 | for: 0m 4 | labels: 5 | severity: warning 6 | annotations: 7 | summary: High error rate in logs (instance {{ $labels.instance }}) 8 | description: "High error rate in logs\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" 9 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | # FIXME: Packing the charm with 2.2.0+139.gd011d92 will not include dependencies in PYDEPS key: 5 | # https://chat.charmhub.io/charmhub/pl/wngp665ycjnb78ar9ojrfhxjkr 6 | # That's why we are including cosl here until the bug in charmcraft is solved 7 | cosl 8 | ops 9 | pydantic 10 | requests 11 | kubernetes 12 | lightkube 13 | lightkube-models 14 | -------------------------------------------------------------------------------- /src/state.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | 3 | 4 | class AprStateBackend: 5 | @staticmethod 6 | def temperature_in_delft(): 7 | # todo: curl "https://api.open-meteo.com/v1/forecast?latitude=52.52&longitude=13.41¤t_weather=true&hourly=temperature_2m,relativehumidity_2m,windspeed_10m" 8 | return 4.2 # brr 9 | 10 | 11 | @dataclasses.dataclass 12 | class AprState: 13 | is_up: bool 14 | temperature_in_delft: int 15 | -------------------------------------------------------------------------------- /charmcraft.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | type: charm 5 | bases: 6 | - build-on: 7 | - name: "ubuntu" 8 | channel: "22.04" 9 | architectures: ["amd64"] 10 | run-on: 11 | - name: "ubuntu" 12 | channel: "22.04" 13 | architectures: ["amd64"] 14 | - name: "ubuntu" 15 | channel: "20.04" 16 | architectures: ["amd64"] 17 | parts: 18 | charm: 19 | build-packages: 20 | - git 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Apr Operator 2 | This charm deploys and operates `apr`, a tiny dice-rolling fastapi server. 3 | 4 | It's obviously not about the workload. 5 | 6 | This is a 'demo' charm repo for playing around with ideas on how to structure complex, large, real charms, pushing the boundaries of what patterns and best practices to adopt when setting off on some new large project. 7 | 8 | It also tangentially investigates how to structure a universal charm: one that works on machine and kubernetes substrates. -------------------------------------------------------------------------------- /machine_metadata.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | name: apr 5 | 6 | description: Apr operator. 7 | summary: This is a apr operator. 8 | maintainers: 9 | - Pietro Pasotti 10 | 11 | website: https://charmhub.io/apr-operator 12 | 13 | #subordinate: false 14 | series: 15 | - jammy 16 | - focal 17 | 18 | requires: 19 | # juju-info: 20 | # interface: juju-info 21 | # scope: container 22 | 23 | logging-consumer: 24 | interface: loki_push_api 25 | 26 | peers: 27 | cluster: 28 | interface: apr_replicas 29 | -------------------------------------------------------------------------------- /src/k8s_charm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright 2022 Canonical Ltd. 4 | # See LICENSE file for licensing details. 5 | 6 | """A juju charm for Grafana Agent on Kubernetes.""" 7 | import logging 8 | 9 | from apr_charm import AprCharm 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | APR_CONFIG = "/apr.cfg" 14 | APR_LOGS = "/apr.log" 15 | 16 | 17 | class AprK8sCharm(AprCharm): 18 | """Kubernetes version of the Apr charm.""" 19 | 20 | def __init__(self, *args): 21 | super().__init__(*args) 22 | 23 | def install(self) -> None: 24 | """Install the apr software.""" 25 | 26 | def start(self) -> None: 27 | """Start the apr software.""" 28 | -------------------------------------------------------------------------------- /src/machine_charm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright 2022 Canonical Ltd. 4 | # See LICENSE file for licensing details. 5 | 6 | """A juju charm for Grafana Agent on Kubernetes.""" 7 | import logging 8 | 9 | from apr_charm import AprCharm 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | APR_CONFIG = "/var/apr/apr.cfg" 14 | APR_LOGS = "/log/apr.log" 15 | 16 | 17 | class AprMachineCharm(AprCharm): 18 | """Machine version of the Apr charm.""" 19 | 20 | def __init__(self, *args): 21 | super().__init__(*args) 22 | 23 | def install(self) -> None: 24 | """Install the apr software.""" 25 | 26 | def start(self) -> None: 27 | """Start the apr software.""" 28 | -------------------------------------------------------------------------------- /k8s_metadata.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | # Copyright 2021 Canonical Ltd. 5 | # See LICENSE file for licensing details. 6 | 7 | name: apr 8 | 9 | description: Apr operator. 10 | summary: This is a apr operator. 11 | maintainers: 12 | - Pietro Pasotti 13 | 14 | website: https://charmhub.io/apr-operator 15 | 16 | #subordinate: false 17 | series: 18 | - jammy 19 | - focal 20 | 21 | containers: 22 | agent: 23 | resource: image 24 | mounts: 25 | - storage: data 26 | location: /tmp/agent/data 27 | 28 | resources: 29 | image: 30 | type: oci-image 31 | upstream-source: python:slim-buster 32 | description: OCI image for Apr Operator 33 | 34 | storage: 35 | data: 36 | type: filesystem 37 | 38 | requires: 39 | # juju-info: 40 | # interface: juju-info 41 | # scope: container 42 | 43 | logging-consumer: 44 | interface: loki_push_api 45 | 46 | peers: 47 | cluster: 48 | interface: apr_replicas 49 | -------------------------------------------------------------------------------- /_workload/apr.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import random 5 | from pathlib import Path 6 | from typing import Dict 7 | 8 | from fastapi import FastAPI 9 | from uvicorn import run 10 | 11 | APR_CONFIG = Path(os.getenv('APR_CONFIG')) 12 | APR_LOGS = Path(os.getenv('APR_LOGS')) 13 | 14 | logger = logging.getLogger(__file__) 15 | logger.addHandler(logging.FileHandler(APR_LOGS)) 16 | logger.setLevel('INFO') 17 | 18 | 19 | def serve(cfg: Dict): 20 | app = FastAPI() 21 | 22 | @app.get("/") 23 | async def root(): 24 | sides = cfg['sides'] 25 | roll = random.randint(0, sides) 26 | logger.debug(f'rolled {roll}') 27 | return {"roll": str(roll)} 28 | 29 | logger.info('Ready...') 30 | run(app, host="0.0.0.0") 31 | 32 | 33 | def apr(): 34 | cfg = APR_CONFIG 35 | if not cfg.exists(): 36 | raise RuntimeError('config not found, cannot apr.') 37 | parsed_cfg = json.loads(cfg.read_text()) 38 | logger.debug(f'found cfg at {cfg}') 39 | serve(parsed_cfg) 40 | 41 | 42 | if __name__ == '__main__': 43 | apr() 44 | -------------------------------------------------------------------------------- /src/apr.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | """Workload abstraction layer for apr operator.""" 5 | 6 | import logging 7 | 8 | import pydantic 9 | 10 | from src.state import AprState, AprStateBackend 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class AprConfig(pydantic.BaseModel): 16 | sides: int 17 | 18 | @pydantic.validator("sides") 19 | def _validate_sides(self, value): 20 | if not value in (2, 4, 6, 8, 10, 12, 20, 30, 40, 100): 21 | raise ValueError(f"How unplatonic: {value}.") # Barbaric. 22 | 23 | 24 | class Apr: 25 | """Apr software facade.""" 26 | 27 | def __init__(self, sides: int, is_up: bool): 28 | self._sides = sides 29 | backend = AprStateBackend() 30 | self._state = AprState(is_up=is_up, temperature_in_delft=backend.temperature_in_delft()) 31 | 32 | def _generate_config(self): 33 | return {"sides": self._sides} 34 | 35 | @property 36 | def config(self) -> AprConfig: 37 | """Apr configuration object.""" 38 | return AprConfig(**self._generate_config()) 39 | -------------------------------------------------------------------------------- /src/charm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2022 Canonical Ltd. 3 | # See LICENSE file for licensing details. 4 | import shlex 5 | from logging import getLogger 6 | from subprocess import PIPE, run 7 | from typing import Literal 8 | 9 | logger = getLogger(__file__) 10 | 11 | 12 | def get_substrate() -> Literal["k8s", "machine"]: 13 | """Are we on kubernetes or on machine?""" 14 | proc = run(*shlex.split("cat /proc/1/sched | head -n 1"), text=True, stdout=PIPE) 15 | out = proc.stdout 16 | substrate: Literal["k8s", "machine"] 17 | if out.startswith("pebble"): 18 | substrate = "k8s" 19 | elif out.startswith("systemd"): 20 | substrate = "machine" 21 | else: 22 | logger.error(f"unknown substrate prefix: {out.split()[0]}; guessing machine.") 23 | substrate = "machine" 24 | 25 | logger.info(f"detected substrate: {substrate}") 26 | return substrate 27 | 28 | 29 | if __name__ == "__main__": 30 | from ops.main import main 31 | 32 | _sub = get_substrate() 33 | if _sub == "machine": 34 | from machine_charm import AprMachineCharm 35 | 36 | logger.info("starting up machine charm") 37 | main(AprMachineCharm) 38 | elif _sub == "k8s": 39 | from k8s_charm import AprK8sCharm 40 | 41 | logger.info("starting up k8s charm") 42 | main(AprK8sCharm) 43 | raise TypeError(_sub) 44 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | [project] 5 | name = "apr-operator" 6 | dependencies = [ 7 | "ops>=2.0", 8 | "pydantic", 9 | "requests", 10 | "kubernetes", 11 | "lightkube", 12 | "lightkube-models", 13 | ] 14 | version = "0.1" 15 | 16 | # Build tools configuration 17 | # required for tox to work with isolated_build 18 | [build-system] 19 | requires = [ 20 | "setuptools >= 35.0.2", 21 | "setuptools_scm >= 2.0.0, <3" 22 | ] 23 | 24 | build-backend = "setuptools.build_meta" 25 | 26 | # Formatting tools configuration 27 | [tool.black] 28 | line-length = 99 29 | target-version = ["py38"] 30 | 31 | [tool.isort] 32 | profile = "black" 33 | 34 | # Linting tools configuration 35 | [tool.flake8] 36 | max-line-length = 99 37 | max-doc-length = 99 38 | max-complexity = 10 39 | exclude = [".git", "__pycache__", ".tox", "build", "dist", "*.egg_info", "venv"] 40 | select = ["E", "W", "F", "C", "N", "R", "D", "H"] 41 | # Ignore W503, E501 because using black creates errors with this 42 | # Ignore D107 Missing docstring in __init__ 43 | ignore = ["W503", "E501", "D107"] 44 | # D100, D101, D102, D103: Ignore missing docstrings in tests 45 | per-file-ignores = ["tests/*:D100,D101,D102,D103,C901", "lib/*:C901"] 46 | docstring-convention = "google" 47 | # Check for properly formatted copyright header in each file 48 | copyright-check = "True" 49 | copyright-author = "Canonical Ltd." 50 | copyright-regexp = "Copyright\\s\\d{4}([-,]\\d{4})*\\s+%(author)s" 51 | 52 | # Checking tools configuration 53 | [tool.coverage.run] 54 | branch = true 55 | 56 | [tool.coverage.report] 57 | show_missing = true 58 | 59 | [tool.pytest.ini_options] 60 | minversion = "6.0" 61 | log_cli_level = "INFO" 62 | asyncio_mode = "auto" 63 | 64 | [tool.pyright] 65 | pythonVersion = "3.8" 66 | pythonPlatform = "All" 67 | typeCheckingMode = "basic" # we'll work up to strict 68 | reportIncompatibleMethodOverride = false 69 | reportImportCycles = false 70 | -------------------------------------------------------------------------------- /src/apr_charm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | """Common logic for both k8s and machine charms for Grafana Agent.""" 5 | import logging 6 | from contextlib import contextmanager 7 | from typing import Any, Dict, Tuple, Type 8 | 9 | from ops.charm import CharmBase 10 | from ops.framework import Framework 11 | from ops.model import ActiveStatus, ErrorStatus, MaintenanceStatus 12 | from requests.packages.urllib3.util import Retry # type: ignore 13 | 14 | from src.apr import Apr 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class AprCharm(CharmBase): 20 | """Apr charm shared logic.""" 21 | 22 | def __new__(cls, *args: Any, **kwargs: Dict[Any, Any]): 23 | """Forbid the usage of Apr directly.""" 24 | if cls is AprCharm: 25 | raise TypeError("This is a base class and cannot be instantiated directly.") 26 | return super().__new__(cls) 27 | 28 | def __init__(self, framework: Framework): 29 | super().__init__(framework) 30 | self.framework.observe(self.on.install, self._on_install) 31 | self.framework.observe(self.on.start, self._on_start) 32 | self._apr = None 33 | 34 | @property 35 | def apr(self): 36 | if not self._apr: 37 | self._apr = Apr( 38 | sides=int(self.config.get("sides")), 39 | is_up=self._is_apr_up(), 40 | ) 41 | return self._apr 42 | 43 | @contextmanager 44 | def safe_yield( 45 | self, maintenance_message: str, catch: Tuple[Type[Exception]] = (Exception,) 46 | ) -> bool: 47 | self.unit.status = MaintenanceStatus(maintenance_message) 48 | try: 49 | yield 50 | except catch as e: 51 | self.unit.status = ErrorStatus(str(e)) 52 | self.unit.status = ActiveStatus() 53 | 54 | def _on_install(self, _event) -> None: 55 | """Install the apr software.""" 56 | with self.safe_yield("Installing apr software"): 57 | self.install() 58 | 59 | def _on_start(self, _event) -> None: 60 | """Start the apr software.""" 61 | with self.safe_yield("Starting apr software"): 62 | self.start() 63 | 64 | def __notimpl__(self): 65 | raise NotImplementedError() 66 | 67 | _is_apr_up = __notimpl__ 68 | start = __notimpl__ 69 | install = __notimpl__ 70 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | [tox] 4 | isolated_build = True 5 | skip_missing_interpreters = True 6 | envlist = lint, static-{charm,lib}, unit-{k8s,machine}, scenario 7 | 8 | [vars] 9 | src_path = {toxinidir}/src 10 | tst_path = {toxinidir}/tests 11 | all_path = {[vars]src_path} {[vars]tst_path} 12 | 13 | [testenv] 14 | basepython = python3 15 | setenv = 16 | PYTHONPATH = {toxinidir}:{toxinidir}/lib:{[vars]src_path} 17 | PYTHONBREAKPOINT=ipdb.set_trace 18 | 19 | [testenv:fmt] 20 | skip_install=True 21 | description = Apply coding style standards to code 22 | deps = 23 | black 24 | isort 25 | commands = 26 | isort {[vars]all_path} 27 | black {[vars]all_path} 28 | 29 | [testenv:pack-k8s] 30 | skip_install=True 31 | description = Pack as k8s charm 32 | allowlist_externals = 33 | cp 34 | charmcraft 35 | commands = 36 | cp {toxinidir}/k8s_metadata.yaml {toxinidir}/metadata.yaml 37 | charmcraft pack 38 | rm {toxinidir}/metadata.yaml 39 | 40 | [testenv:pack-machine] 41 | skip_install=True 42 | description = Pack as machine charm 43 | allowlist_externals = 44 | cp 45 | charmcraft 46 | commands = 47 | cp {toxinidir}/machine_metadata.yaml {toxinidir}/metadata.yaml 48 | charmcraft pack 49 | rm {toxinidir}/metadata.yaml 50 | 51 | [testenv:lint] 52 | skip_install=True 53 | description = Check code against coding style standards 54 | deps = 55 | black 56 | flake8 < 5 57 | flake8-docstrings 58 | flake8-copyright 59 | flake8-builtins 60 | pyproject-flake8 61 | pep8-naming 62 | isort 63 | codespell 64 | commands = 65 | codespell . --skip .git --skip .tox --skip build --skip lib --skip venv --skip .mypy_cache --skip *.svg 66 | 67 | # pflake8 wrapper supports config from pyproject.toml 68 | pflake8 {[vars]all_path} 69 | isort --check-only --diff {[vars]all_path} 70 | black --check --diff {[vars]all_path} 71 | 72 | [testenv:static-{charm,lib}] 73 | skip_install=True 74 | description = Run static analysis checks 75 | deps = 76 | pyright 77 | -r {toxinidir}/requirements.txt 78 | lib: ops 79 | commands = 80 | pyright {[vars]src_path} 81 | 82 | [testenv:unit] 83 | description = Run charm unit tests 84 | deps = 85 | pytest 86 | pytest-subtests 87 | coverage[toml] 88 | deepdiff 89 | fs 90 | toml 91 | responses 92 | allowlist_externals = cp 93 | commands = 94 | # render as k8s 95 | cp k8s_metadata.yaml metadata.yaml 96 | 97 | coverage run \ 98 | --source={[vars]src_path} \ 99 | -m pytest -v --tb native --log-cli-level=INFO -s {posargs} {[vars]tst_path}/unit/k8s 100 | coverage report -m 101 | 102 | 103 | [testenv:integration] 104 | skip_install=True 105 | description = Run integration tests 106 | deps = 107 | aiohttp 108 | asyncstdlib 109 | # Libjuju needs to track the juju version 110 | juju ~= 3.0.0 111 | pytest 112 | prometheus-api-client 113 | pytest-operator 114 | allowlist_externals = cp 115 | commands = 116 | # use a better solution when we actually have machine code 117 | cp k8s_metadata.yaml metadata.yaml 118 | pytest -v --tb native --log-cli-level=INFO -s {posargs} {[vars]tst_path}/integration 119 | 120 | [testenv:scenario] 121 | description = Run scenario tests 122 | deps = 123 | pytest 124 | ops-scenario @ git+https://github.com/canonical/ops-scenario@relation-unit-and-app 125 | commands = 126 | pytest -v --tb native --log-cli-level=INFO -s {posargs} {[vars]tst_path}/scenario 127 | 128 | [testenv:check] 129 | skip_install=True 130 | depends = 131 | lint 132 | static 133 | 134 | [coverage:run] 135 | relative_files = True 136 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /lib/charms/loki_k8s/v0/loki_push_api.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2021 Canonical Ltd. 3 | # See LICENSE file for licensing details. 4 | # 5 | # Learn more at: https://juju.is/docs/sdk 6 | 7 | r"""## Overview. 8 | 9 | This document explains how to use the two principal objects this library provides: 10 | 11 | - `LokiPushApiProvider`: This object is meant to be used by any Charmed Operator that needs to 12 | implement the provider side of the `loki_push_api` relation interface. For instance, a Loki charm. 13 | The provider side of the relation represents the server side, to which logs are being pushed. 14 | 15 | - `LokiPushApiConsumer`: This object is meant to be used by any Charmed Operator that needs to 16 | send log to Loki by implementing the consumer side of the `loki_push_api` relation interface. 17 | For instance, a Promtail or Grafana agent charm which needs to send logs to Loki. 18 | 19 | - `LogProxyConsumer`: This object can be used by any Charmed Operator which needs to 20 | send telemetry, such as logs, to Loki through a Log Proxy by implementing the consumer side of the 21 | `loki_push_api` relation interface. 22 | 23 | Filtering logs in Loki is largely performed on the basis of labels. In the Juju ecosystem, Juju 24 | topology labels are used to uniquely identify the workload which generates telemetry like logs. 25 | 26 | In order to be able to control the labels on the logs pushed this object adds a Pebble layer 27 | that runs Promtail in the workload container, injecting Juju topology labels into the 28 | logs on the fly. 29 | 30 | ## LokiPushApiProvider Library Usage 31 | 32 | This object may be used by any Charmed Operator which implements the `loki_push_api` interface. 33 | For instance, Loki or Grafana Agent. 34 | 35 | For this purposes a charm needs to instantiate the `LokiPushApiProvider` object with one mandatory 36 | and three optional arguments. 37 | 38 | - `charm`: A reference to the parent (Loki) charm. 39 | 40 | - `relation_name`: The name of the relation that the charm uses to interact 41 | with its clients, which implement `LokiPushApiConsumer` or `LogProxyConsumer`. 42 | 43 | If provided, this relation name must match a provided relation in metadata.yaml with the 44 | `loki_push_api` interface. 45 | 46 | Typically `LokiPushApiConsumer` use "logging" as a relation_name and `LogProxyConsumer` use 47 | "log_proxy". 48 | 49 | The default value of this arguments is "logging". 50 | 51 | An example of this in a `metadata.yaml` file should have the following section: 52 | 53 | ```yaml 54 | provides: 55 | logging: 56 | interface: loki_push_api 57 | ``` 58 | 59 | For example, a Loki charm may instantiate the `LokiPushApiProvider` in its constructor as 60 | follows: 61 | 62 | from charms.loki_k8s.v0.loki_push_api import LokiPushApiProvider 63 | from loki_server import LokiServer 64 | ... 65 | 66 | class LokiOperatorCharm(CharmBase): 67 | ... 68 | 69 | def __init__(self, *args): 70 | super().__init__(*args) 71 | ... 72 | self._loki_ready() 73 | ... 74 | 75 | def _loki_ready(self): 76 | try: 77 | version = self._loki_server.version 78 | self.loki_provider = LokiPushApiProvider(self) 79 | logger.debug("Loki Provider is available. Loki version: %s", version) 80 | except LokiServerNotReadyError as e: 81 | self.unit.status = MaintenanceStatus(str(e)) 82 | except LokiServerError as e: 83 | self.unit.status = BlockedStatus(str(e)) 84 | 85 | - `port`: Loki Push Api endpoint port. Default value: 3100. 86 | - `rules_dir`: Directory to store alert rules. Default value: "/loki/rules". 87 | 88 | 89 | The `LokiPushApiProvider` object has several responsibilities: 90 | 91 | 1. Set the URL of the Loki Push API in the relation application data bag; the URL 92 | must be unique to all instances (e.g. using a load balancer). 93 | 94 | 2. Set the Promtail binary URL (`promtail_binary_zip_url`) so clients that use 95 | `LogProxyConsumer` object can downloaded and configure it. 96 | 97 | 3. Process the metadata of the consumer application, provided via the 98 | "metadata" field of the consumer data bag, which are used to annotate the 99 | alert rules (see next point). An example for "metadata" is the following: 100 | 101 | {'model': 'loki', 102 | 'model_uuid': '0b7d1071-ded2-4bf5-80a3-10a81aeb1386', 103 | 'application': 'promtail-k8s' 104 | } 105 | 106 | 4. Process alert rules set into the relation by the `LokiPushApiConsumer` 107 | objects, e.g.: 108 | 109 | '{ 110 | "groups": [{ 111 | "name": "loki_0b7d1071-ded2-4bf5-80a3-10a81aeb1386_promtail-k8s_alerts", 112 | "rules": [{ 113 | "alert": "HighPercentageError", 114 | "expr": "sum(rate({app=\\"foo\\", env=\\"production\\"} |= \\"error\\" [5m])) 115 | by (job) \\n /\\nsum(rate({app=\\"foo\\", env=\\"production\\"}[5m])) 116 | by (job)\\n > 0.05 117 | \\n", "for": "10m", 118 | "labels": { 119 | "severity": "page", 120 | "juju_model": "loki", 121 | "juju_model_uuid": "0b7d1071-ded2-4bf5-80a3-10a81aeb1386", 122 | "juju_application": "promtail-k8s" 123 | }, 124 | "annotations": { 125 | "summary": "High request latency" 126 | } 127 | }] 128 | }] 129 | }' 130 | 131 | 132 | Once these alert rules are sent over relation data, the `LokiPushApiProvider` object 133 | stores these files in the directory `/loki/rules` inside the Loki charm container. After 134 | storing alert rules files, the object will check alert rules by querying Loki API 135 | endpoint: [`loki/api/v1/rules`](https://grafana.com/docs/loki/latest/api/#list-rule-groups). 136 | If there are changes in the alert rules a `loki_push_api_alert_rules_changed` event will 137 | be emitted with details about the `RelationEvent` which triggered it. 138 | 139 | This events should be observed in the charm that uses `LokiPushApiProvider`: 140 | 141 | ```python 142 | def __init__(self, *args): 143 | super().__init__(*args) 144 | ... 145 | self.loki_provider = LokiPushApiProvider(self) 146 | self.framework.observe( 147 | self.loki_provider.on.loki_push_api_alert_rules_changed, 148 | self._loki_push_api_alert_rules_changed, 149 | ) 150 | ``` 151 | 152 | 153 | ## LokiPushApiConsumer Library Usage 154 | 155 | This Loki charm interacts with its clients using the Loki charm library. Charms 156 | seeking to send log to Loki, must do so using the `LokiPushApiConsumer` object from 157 | this charm library. 158 | 159 | > **NOTE**: `LokiPushApiConsumer` also depends on an additional charm library. 160 | > 161 | > Ensure sure you `charmcraft fetch-lib charms.observability_libs.v0.juju_topology` 162 | > when using this library. 163 | 164 | For the simplest use cases, using the `LokiPushApiConsumer` object only requires 165 | instantiating it, typically in the constructor of your charm (the one which 166 | sends logs). 167 | 168 | ```python 169 | from charms.loki_k8s.v0.loki_push_api import LokiPushApiConsumer 170 | 171 | class LokiClientCharm(CharmBase): 172 | 173 | def __init__(self, *args): 174 | super().__init__(*args) 175 | ... 176 | self._loki_consumer = LokiPushApiConsumer(self) 177 | ``` 178 | 179 | The `LokiPushApiConsumer` constructor requires two things: 180 | 181 | - A reference to the parent (LokiClientCharm) charm. 182 | 183 | - Optionally, the name of the relation that the Loki charm uses to interact 184 | with its clients. If provided, this relation name must match a required 185 | relation in metadata.yaml with the `loki_push_api` interface. 186 | 187 | This argument is not required if your metadata.yaml has precisely one 188 | required relation in metadata.yaml with the `loki_push_api` interface, as the 189 | lib will automatically resolve the relation name inspecting the using the 190 | meta information of the charm 191 | 192 | Any time the relation between a Loki provider charm and a Loki consumer charm is 193 | established, a `LokiPushApiEndpointJoined` event is fired. In the consumer side 194 | is it possible to observe this event with: 195 | 196 | ```python 197 | 198 | self.framework.observe( 199 | self._loki_consumer.on.loki_push_api_endpoint_joined, 200 | self._on_loki_push_api_endpoint_joined, 201 | ) 202 | ``` 203 | 204 | Any time there are departures in relations between the consumer charm and Loki 205 | the consumer charm is informed, through a `LokiPushApiEndpointDeparted` event, for instance: 206 | 207 | ```python 208 | self.framework.observe( 209 | self._loki_consumer.on.loki_push_api_endpoint_departed, 210 | self._on_loki_push_api_endpoint_departed, 211 | ) 212 | ``` 213 | 214 | The consumer charm can then choose to update its configuration in both situations. 215 | 216 | Note that LokiPushApiConsumer does not add any labels automatically on its own. In 217 | order to better integrate with the Canonical Observability Stack, you may want to configure your 218 | software to add Juju topology labels. The 219 | [observability-libs](https://charmhub.io/observability-libs) library can be used to get topology 220 | labels in charm code. See :func:`LogProxyConsumer._scrape_configs` for an example of how 221 | to do this with promtail. 222 | 223 | ## LogProxyConsumer Library Usage 224 | 225 | Let's say that we have a workload charm that produces logs and we need to send those logs to a 226 | workload implementing the `loki_push_api` interface, such as `Loki` or `Grafana Agent`. 227 | 228 | Adopting this object in a Charmed Operator consist of two steps: 229 | 230 | 1. Use the `LogProxyConsumer` class by instanting it in the `__init__` method of the charmed 231 | operator. There are two ways to get logs in to promtail. You can give it a list of files to read 232 | or you can write to it using the syslog protocol. 233 | 234 | For example: 235 | 236 | ```python 237 | from charms.loki_k8s.v0.loki_push_api import LogProxyConsumer 238 | 239 | ... 240 | 241 | def __init__(self, *args): 242 | ... 243 | self._log_proxy = LogProxyConsumer( 244 | charm=self, log_files=LOG_FILES, container_name=PEER, enable_syslog=True 245 | ) 246 | 247 | self.framework.observe( 248 | self._log_proxy.on.promtail_digest_error, 249 | self._promtail_error, 250 | ) 251 | 252 | def _promtail_error(self, event): 253 | logger.error(event.message) 254 | self.unit.status = BlockedStatus(event.message) 255 | ``` 256 | 257 | Any time the relation between a provider charm and a LogProxy consumer charm is 258 | established, a `LogProxyEndpointJoined` event is fired. In the consumer side is it 259 | possible to observe this event with: 260 | 261 | ```python 262 | 263 | self.framework.observe( 264 | self._log_proxy.on.log_proxy_endpoint_joined, 265 | self._on_log_proxy_endpoint_joined, 266 | ) 267 | ``` 268 | 269 | Any time there are departures in relations between the consumer charm and the provider 270 | the consumer charm is informed, through a `LogProxyEndpointDeparted` event, for instance: 271 | 272 | ```python 273 | self.framework.observe( 274 | self._log_proxy.on.log_proxy_endpoint_departed, 275 | self._on_log_proxy_endpoint_departed, 276 | ) 277 | ``` 278 | 279 | The consumer charm can then choose to update its configuration in both situations. 280 | 281 | Note that: 282 | 283 | - `LOG_FILES` is a `list` containing the log files we want to send to `Loki` or 284 | `Grafana Agent`, for instance: 285 | 286 | ```python 287 | LOG_FILES = [ 288 | "/var/log/apache2/access.log", 289 | "/var/log/alternatives.log", 290 | ] 291 | ``` 292 | 293 | - `container_name` is the name of the container in which the application is running. 294 | If in the Pod there is only one container, this argument can be omitted. 295 | 296 | - You can configure your syslog software using `localhost` as the address and the method 297 | `LogProxyConsumer.syslog_port` to get the port, or, alternatively, if you are using rsyslog 298 | you may use the method `LogProxyConsumer.rsyslog_config()`. 299 | 300 | 2. Modify the `metadata.yaml` file to add: 301 | 302 | - The `log-proxy` relation in the `requires` section: 303 | ```yaml 304 | requires: 305 | log-proxy: 306 | interface: loki_push_api 307 | optional: true 308 | ``` 309 | 310 | Once the library is implemented in a Charmed Operator and a relation is established with 311 | the charm that implements the `loki_push_api` interface, the library will inject a 312 | Pebble layer that runs Promtail in the workload container to send logs. 313 | 314 | By default, the promtail binary injected into the container will be downloaded from the internet. 315 | If, for any reason, the container has limited network access, you may allow charm administrators 316 | to provide their own promtail binary at runtime by adding the following snippet to your charm 317 | metadata: 318 | 319 | ```yaml 320 | resources: 321 | promtail-bin: 322 | type: file 323 | description: Promtail binary for logging 324 | filename: promtail-linux 325 | ``` 326 | 327 | Which would then allow operators to deploy the charm this way: 328 | 329 | ``` 330 | juju deploy \ 331 | ./your_charm.charm \ 332 | --resource promtail-bin=/tmp/promtail-linux-amd64 333 | ``` 334 | 335 | If a different resource name is used, it can be specified with the `promtail_resource_name` 336 | argument to the `LogProxyConsumer` constructor. 337 | 338 | The object can emit a `PromtailDigestError` event: 339 | 340 | - Promtail binary cannot be downloaded. 341 | - The sha256 sum mismatch for promtail binary. 342 | 343 | The object can raise a `ContainerNotFoundError` event: 344 | 345 | - No `container_name` parameter has been specified and the Pod has more than 1 container. 346 | 347 | These can be monitored via the PromtailDigestError events via: 348 | 349 | ```python 350 | self.framework.observe( 351 | self._loki_consumer.on.promtail_digest_error, 352 | self._promtail_error, 353 | ) 354 | 355 | def _promtail_error(self, event): 356 | logger.error(msg) 357 | self.unit.status = BlockedStatus(event.message) 358 | ) 359 | ``` 360 | 361 | ## Alerting Rules 362 | 363 | This charm library also supports gathering alerting rules from all related Loki client 364 | charms and enabling corresponding alerts within the Loki charm. Alert rules are 365 | automatically gathered by `LokiPushApiConsumer` object from a directory conventionally 366 | named `loki_alert_rules`. 367 | 368 | This directory must reside at the top level in the `src` folder of the 369 | consumer charm. Each file in this directory is assumed to be a single alert rule 370 | in YAML format. The file name must have the `.rule` extension. 371 | The format of this alert rule conforms to the 372 | [Loki docs](https://grafana.com/docs/loki/latest/rules/#alerting-rules). 373 | 374 | An example of the contents of one such file is shown below. 375 | 376 | ```yaml 377 | alert: HighPercentageError 378 | expr: | 379 | sum(rate({%%juju_topology%%} |= "error" [5m])) by (job) 380 | / 381 | sum(rate({%%juju_topology%%}[5m])) by (job) 382 | > 0.05 383 | for: 10m 384 | labels: 385 | severity: page 386 | annotations: 387 | summary: High request latency 388 | 389 | ``` 390 | 391 | It is **critical** to use the `%%juju_topology%%` filter in the expression for the alert 392 | rule shown above. This filter is a stub that is automatically replaced by the 393 | `LokiPushApiConsumer` following Loki Client's Juju topology (application, model and its 394 | UUID). Such a topology filter is essential to ensure that alert rules submitted by one 395 | provider charm generates alerts only for that same charm. 396 | 397 | The Loki charm may be related to multiple Loki client charms. Without this, filter 398 | rules submitted by one provider charm will also result in corresponding alerts for other 399 | provider charms. Hence every alert rule expression must include such a topology filter stub. 400 | 401 | Gathering alert rules and generating rule files within the Loki charm is easily done using 402 | the `alerts()` method of `LokiPushApiProvider`. Alerts generated by Loki will automatically 403 | include Juju topology labels in the alerts. These labels indicate the source of the alert. 404 | 405 | The following labels are automatically added to every alert 406 | 407 | - `juju_model` 408 | - `juju_model_uuid` 409 | - `juju_application` 410 | 411 | 412 | Whether alert rules files does not contain the keys `alert` or `expr` or there is no alert 413 | rules file in `alert_rules_path` a `loki_push_api_alert_rules_error` event is emitted. 414 | 415 | To handle these situations the event must be observed in the `LokiClientCharm` charm.py file: 416 | 417 | ```python 418 | class LokiClientCharm(CharmBase): 419 | 420 | def __init__(self, *args): 421 | super().__init__(*args) 422 | ... 423 | self._loki_consumer = LokiPushApiConsumer(self) 424 | 425 | self.framework.observe( 426 | self._loki_consumer.on.loki_push_api_alert_rules_error, 427 | self._alert_rules_error 428 | ) 429 | 430 | def _alert_rules_error(self, event): 431 | self.unit.status = BlockedStatus(event.message) 432 | ``` 433 | 434 | ## Relation Data 435 | 436 | The Loki charm uses both application and unit relation data to obtain information regarding 437 | Loki Push API and alert rules. 438 | 439 | Units of consumer charm send their alert rules over app relation data using the `alert_rules` 440 | key. 441 | """ 442 | 443 | import json 444 | import logging 445 | import os 446 | import platform 447 | import re 448 | import socket 449 | import subprocess 450 | import tempfile 451 | import typing 452 | from copy import deepcopy 453 | from gzip import GzipFile 454 | from hashlib import sha256 455 | from io import BytesIO 456 | from pathlib import Path 457 | from typing import Any, Dict, List, Optional, Tuple, Union, cast 458 | from urllib import request 459 | from urllib.error import HTTPError 460 | 461 | import yaml 462 | from charms.observability_libs.v0.juju_topology import JujuTopology 463 | from ops.charm import ( 464 | CharmBase, 465 | HookEvent, 466 | RelationBrokenEvent, 467 | RelationCreatedEvent, 468 | RelationDepartedEvent, 469 | RelationEvent, 470 | RelationJoinedEvent, 471 | RelationRole, 472 | WorkloadEvent, 473 | ) 474 | from ops.framework import EventBase, EventSource, Object, ObjectEvents 475 | from ops.model import Container, ModelError, Relation 476 | from ops.pebble import APIError, ChangeError, PathError, ProtocolError 477 | 478 | # The unique Charmhub library identifier, never change it 479 | LIBID = "bf76f23cdd03464b877c52bd1d2f563e" 480 | 481 | # Increment this major API version when introducing breaking changes 482 | LIBAPI = 0 483 | 484 | # Increment this PATCH version before using `charmcraft publish-lib` or reset 485 | # to 0 if you are raising the major API version 486 | LIBPATCH = 17 487 | 488 | logger = logging.getLogger(__name__) 489 | 490 | RELATION_INTERFACE_NAME = "loki_push_api" 491 | DEFAULT_RELATION_NAME = "logging" 492 | DEFAULT_ALERT_RULES_RELATIVE_PATH = "./src/loki_alert_rules" 493 | DEFAULT_LOG_PROXY_RELATION_NAME = "log-proxy" 494 | 495 | PROMTAIL_BASE_URL = "https://github.com/canonical/loki-k8s-operator/releases/download" 496 | # To update Promtail version you only need to change the PROMTAIL_VERSION and 497 | # update all sha256 sums in PROMTAIL_BINARIES. To support a new architecture 498 | # you only need to add a new key value pair for the architecture in PROMTAIL_BINARIES. 499 | PROMTAIL_VERSION = "v2.5.0" 500 | PROMTAIL_BINARIES = { 501 | "amd64": { 502 | "filename": "promtail-static-amd64", 503 | "zipsha": "543e333b0184e14015a42c3c9e9e66d2464aaa66eca48b29e185a6a18f67ab6d", 504 | "binsha": "17e2e271e65f793a9fbe81eab887b941e9d680abe82d5a0602888c50f5e0cac9", 505 | }, 506 | } 507 | 508 | # Paths in `charm` container 509 | BINARY_DIR = "/tmp" 510 | 511 | # Paths in `workload` container 512 | WORKLOAD_BINARY_DIR = "/opt/promtail" 513 | WORKLOAD_CONFIG_DIR = "/etc/promtail" 514 | WORKLOAD_CONFIG_FILE_NAME = "promtail_config.yaml" 515 | WORKLOAD_CONFIG_PATH = "{}/{}".format(WORKLOAD_CONFIG_DIR, WORKLOAD_CONFIG_FILE_NAME) 516 | WORKLOAD_POSITIONS_PATH = "{}/positions.yaml".format(WORKLOAD_BINARY_DIR) 517 | WORKLOAD_SERVICE_NAME = "promtail" 518 | 519 | HTTP_LISTEN_PORT = 9080 520 | GRPC_LISTEN_PORT = 9095 521 | 522 | 523 | class RelationNotFoundError(ValueError): 524 | """Raised if there is no relation with the given name.""" 525 | 526 | def __init__(self, relation_name: str): 527 | self.relation_name = relation_name 528 | self.message = "No relation named '{}' found".format(relation_name) 529 | 530 | super().__init__(self.message) 531 | 532 | 533 | class RelationInterfaceMismatchError(Exception): 534 | """Raised if the relation with the given name has a different interface.""" 535 | 536 | def __init__( 537 | self, 538 | relation_name: str, 539 | expected_relation_interface: str, 540 | actual_relation_interface: str, 541 | ): 542 | self.relation_name = relation_name 543 | self.expected_relation_interface = expected_relation_interface 544 | self.actual_relation_interface = actual_relation_interface 545 | self.message = ( 546 | "The '{}' relation has '{}' as interface rather than the expected '{}'".format( 547 | relation_name, actual_relation_interface, expected_relation_interface 548 | ) 549 | ) 550 | super().__init__(self.message) 551 | 552 | 553 | class RelationRoleMismatchError(Exception): 554 | """Raised if the relation with the given name has a different direction.""" 555 | 556 | def __init__( 557 | self, 558 | relation_name: str, 559 | expected_relation_role: RelationRole, 560 | actual_relation_role: RelationRole, 561 | ): 562 | self.relation_name = relation_name 563 | self.expected_relation_interface = expected_relation_role 564 | self.actual_relation_role = actual_relation_role 565 | self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format( 566 | relation_name, repr(actual_relation_role), repr(expected_relation_role) 567 | ) 568 | super().__init__(self.message) 569 | 570 | 571 | def _validate_relation_by_interface_and_direction( 572 | charm: CharmBase, 573 | relation_name: str, 574 | expected_relation_interface: str, 575 | expected_relation_role: RelationRole, 576 | ): 577 | """Verifies that a relation has the necessary characteristics. 578 | 579 | Verifies that the `relation_name` provided: (1) exists in metadata.yaml, 580 | (2) declares as interface the interface name passed as `relation_interface` 581 | and (3) has the right "direction", i.e., it is a relation that `charm` 582 | provides or requires. 583 | 584 | Args: 585 | charm: a `CharmBase` object to scan for the matching relation. 586 | relation_name: the name of the relation to be verified. 587 | expected_relation_interface: the interface name to be matched by the 588 | relation named `relation_name`. 589 | expected_relation_role: whether the `relation_name` must be either 590 | provided or required by `charm`. 591 | 592 | Raises: 593 | RelationNotFoundError: If there is no relation in the charm's metadata.yaml 594 | with the same name as provided via `relation_name` argument. 595 | RelationInterfaceMismatchError: The relation with the same name as provided 596 | via `relation_name` argument does not have the same relation interface 597 | as specified via the `expected_relation_interface` argument. 598 | RelationRoleMismatchError: If the relation with the same name as provided 599 | via `relation_name` argument does not have the same role as specified 600 | via the `expected_relation_role` argument. 601 | """ 602 | if relation_name not in charm.meta.relations: 603 | raise RelationNotFoundError(relation_name) 604 | 605 | relation = charm.meta.relations[relation_name] 606 | 607 | actual_relation_interface = relation.interface_name 608 | if actual_relation_interface != expected_relation_interface: 609 | raise RelationInterfaceMismatchError( 610 | relation_name, expected_relation_interface, actual_relation_interface 611 | ) 612 | 613 | if expected_relation_role == RelationRole.provides: 614 | if relation_name not in charm.meta.provides: 615 | raise RelationRoleMismatchError( 616 | relation_name, RelationRole.provides, RelationRole.requires 617 | ) 618 | elif expected_relation_role == RelationRole.requires: 619 | if relation_name not in charm.meta.requires: 620 | raise RelationRoleMismatchError( 621 | relation_name, RelationRole.requires, RelationRole.provides 622 | ) 623 | else: 624 | raise Exception("Unexpected RelationDirection: {}".format(expected_relation_role)) 625 | 626 | 627 | class InvalidAlertRulePathError(Exception): 628 | """Raised if the alert rules folder cannot be found or is otherwise invalid.""" 629 | 630 | def __init__( 631 | self, 632 | alert_rules_absolute_path: Path, 633 | message: str, 634 | ): 635 | self.alert_rules_absolute_path = alert_rules_absolute_path 636 | self.message = message 637 | 638 | super().__init__(self.message) 639 | 640 | 641 | def _is_official_alert_rule_format(rules_dict: dict) -> bool: 642 | """Are alert rules in the upstream format as supported by Loki. 643 | 644 | Alert rules in dictionary format are in "official" form if they 645 | contain a "groups" key, since this implies they contain a list of 646 | alert rule groups. 647 | 648 | Args: 649 | rules_dict: a set of alert rules in Python dictionary format 650 | 651 | Returns: 652 | True if alert rules are in official Loki file format. 653 | """ 654 | return "groups" in rules_dict 655 | 656 | 657 | def _is_single_alert_rule_format(rules_dict: dict) -> bool: 658 | """Are alert rules in single rule format. 659 | 660 | The Loki charm library supports reading of alert rules in a 661 | custom format that consists of a single alert rule per file. This 662 | does not conform to the official Loki alert rule file format 663 | which requires that each alert rules file consists of a list of 664 | alert rule groups and each group consists of a list of alert 665 | rules. 666 | 667 | Alert rules in dictionary form are considered to be in single rule 668 | format if in the least it contains two keys corresponding to the 669 | alert rule name and alert expression. 670 | 671 | Returns: 672 | True if alert rule is in single rule file format. 673 | """ 674 | # one alert rule per file 675 | return set(rules_dict) >= {"alert", "expr"} 676 | 677 | 678 | class AlertRules: 679 | """Utility class for amalgamating Loki alert rule files and injecting juju topology. 680 | 681 | An `AlertRules` object supports aggregating alert rules from files and directories in both 682 | official and single rule file formats using the `add_path()` method. All the alert rules 683 | read are annotated with Juju topology labels and amalgamated into a single data structure 684 | in the form of a Python dictionary using the `as_dict()` method. Such a dictionary can be 685 | easily dumped into JSON format and exchanged over relation data. The dictionary can also 686 | be dumped into YAML format and written directly into an alert rules file that is read by 687 | Loki. Note that multiple `AlertRules` objects must not be written into the same file, 688 | since Loki allows only a single list of alert rule groups per alert rules file. 689 | 690 | The official Loki format is a YAML file conforming to the Loki documentation 691 | (https://grafana.com/docs/loki/latest/api/#list-rule-groups). 692 | The custom single rule format is a subsection of the official YAML, having a single alert 693 | rule, effectively "one alert per file". 694 | """ 695 | 696 | # This class uses the following terminology for the various parts of a rule file: 697 | # - alert rules file: the entire groups[] yaml, including the "groups:" key. 698 | # - alert groups (plural): the list of groups[] (a list, i.e. no "groups:" key) - it is a list 699 | # of dictionaries that have the "name" and "rules" keys. 700 | # - alert group (singular): a single dictionary that has the "name" and "rules" keys. 701 | # - alert rules (plural): all the alerts in a given alert group - a list of dictionaries with 702 | # the "alert" and "expr" keys. 703 | # - alert rule (singular): a single dictionary that has the "alert" and "expr" keys. 704 | 705 | def __init__(self, topology: Optional[JujuTopology] = None): 706 | """Build and alert rule object. 707 | 708 | Args: 709 | topology: a `JujuTopology` instance that is used to annotate all alert rules. 710 | """ 711 | self.topology = topology 712 | self.tool = CosTool(None) 713 | self.alert_groups = [] # type: List[dict] 714 | 715 | def _from_file(self, root_path: Path, file_path: Path) -> List[dict]: 716 | """Read a rules file from path, injecting juju topology. 717 | 718 | Args: 719 | root_path: full path to the root rules folder (used only for generating group name) 720 | file_path: full path to a *.rule file. 721 | 722 | Returns: 723 | A list of dictionaries representing the rules file, if file is valid (the structure is 724 | formed by `yaml.safe_load` of the file); an empty list otherwise. 725 | """ 726 | with file_path.open() as rf: 727 | # Load a list of rules from file then add labels and filters 728 | try: 729 | rule_file = yaml.safe_load(rf) or {} 730 | 731 | except Exception as e: 732 | logger.error("Failed to read alert rules from %s: %s", file_path.name, e) 733 | return [] 734 | 735 | if _is_official_alert_rule_format(rule_file): 736 | alert_groups = rule_file["groups"] 737 | elif _is_single_alert_rule_format(rule_file): 738 | # convert to list of alert groups 739 | # group name is made up from the file name 740 | alert_groups = [{"name": file_path.stem, "rules": [rule_file]}] 741 | else: 742 | # invalid/unsupported 743 | reason = "file is empty" if not rule_file else "unexpected file structure" 744 | logger.error("Invalid rules file (%s): %s", reason, file_path.name) 745 | return [] 746 | 747 | # update rules with additional metadata 748 | for alert_group in alert_groups: 749 | # update group name with topology and sub-path 750 | alert_group["name"] = self._group_name( 751 | str(root_path), 752 | str(file_path), 753 | alert_group["name"], 754 | ) 755 | 756 | # add "juju_" topology labels 757 | for alert_rule in alert_group["rules"]: 758 | if "labels" not in alert_rule: 759 | alert_rule["labels"] = {} 760 | 761 | if self.topology: 762 | alert_rule["labels"].update(self.topology.label_matcher_dict) 763 | # insert juju topology filters into a prometheus alert rule 764 | # logql doesn't like empty matchers, so add a job matcher which hits 765 | # any string as a "wildcard" which the topology labels will 766 | # filter down 767 | alert_rule["expr"] = self.tool.inject_label_matchers( 768 | re.sub(r"%%juju_topology%%", r'job=~".+"', alert_rule["expr"]), 769 | self.topology.label_matcher_dict, 770 | ) 771 | 772 | return alert_groups 773 | 774 | def _group_name( 775 | self, 776 | root_path: typing.Union[Path, str], 777 | file_path: typing.Union[Path, str], 778 | group_name: str, 779 | ) -> str: 780 | """Generate group name from path and topology. 781 | 782 | The group name is made up of the relative path between the root dir_path, the file path, 783 | and topology identifier. 784 | 785 | Args: 786 | root_path: path to the root rules dir. 787 | file_path: path to rule file. 788 | group_name: original group name to keep as part of the new augmented group name 789 | 790 | Returns: 791 | New group name, augmented by juju topology and relative path. 792 | """ 793 | file_path = Path(file_path) if not isinstance(file_path, Path) else file_path 794 | root_path = Path(root_path) if not isinstance(root_path, Path) else root_path 795 | rel_path = file_path.parent.relative_to(root_path.as_posix()) 796 | 797 | # We should account for both absolute paths and Windows paths. Convert it to a POSIX 798 | # string, strip off any leading /, then join it 799 | 800 | path_str = "" 801 | if not rel_path == Path("."): 802 | # Get rid of leading / and optionally drive letters so they don't muck up 803 | # the template later, since Path.parts returns them. The 'if relpath.is_absolute ...' 804 | # isn't even needed since re.sub doesn't throw exceptions if it doesn't match, so it's 805 | # optional, but it makes it clear what we're doing. 806 | 807 | # Note that Path doesn't actually care whether the path is valid just to instantiate 808 | # the object, so we can happily strip that stuff out to make templating nicer 809 | rel_path = Path( 810 | re.sub(r"^([A-Za-z]+:)?/", "", rel_path.as_posix()) 811 | if rel_path.is_absolute() 812 | else str(rel_path) 813 | ) 814 | 815 | # Get rid of relative path characters in the middle which both os.path and pathlib 816 | # leave hanging around. We could use path.resolve(), but that would lead to very 817 | # long template strings when rules come from pods and/or other deeply nested charm 818 | # paths 819 | path_str = "_".join(filter(lambda x: x not in ["..", "/"], rel_path.parts)) 820 | 821 | # Generate group name: 822 | # - name, from juju topology 823 | # - suffix, from the relative path of the rule file; 824 | group_name_parts = [self.topology.identifier] if self.topology else [] 825 | group_name_parts.extend([path_str, group_name, "alerts"]) 826 | # filter to remove empty strings 827 | return "_".join(filter(lambda x: x, group_name_parts)) 828 | 829 | @classmethod 830 | def _multi_suffix_glob( 831 | cls, dir_path: Path, suffixes: List[str], recursive: bool = True 832 | ) -> list: 833 | """Helper function for getting all files in a directory that have a matching suffix. 834 | 835 | Args: 836 | dir_path: path to the directory to glob from. 837 | suffixes: list of suffixes to include in the glob (items should begin with a period). 838 | recursive: a flag indicating whether a glob is recursive (nested) or not. 839 | 840 | Returns: 841 | List of files in `dir_path` that have one of the suffixes specified in `suffixes`. 842 | """ 843 | all_files_in_dir = dir_path.glob("**/*" if recursive else "*") 844 | return list(filter(lambda f: f.is_file() and f.suffix in suffixes, all_files_in_dir)) 845 | 846 | def _from_dir(self, dir_path: Path, recursive: bool) -> List[dict]: 847 | """Read all rule files in a directory. 848 | 849 | All rules from files for the same directory are loaded into a single 850 | group. The generated name of this group includes juju topology. 851 | By default, only the top directory is scanned; for nested scanning, pass `recursive=True`. 852 | 853 | Args: 854 | dir_path: directory containing *.rule files (alert rules without groups). 855 | recursive: flag indicating whether to scan for rule files recursively. 856 | 857 | Returns: 858 | a list of dictionaries representing prometheus alert rule groups, each dictionary 859 | representing an alert group (structure determined by `yaml.safe_load`). 860 | """ 861 | alert_groups = [] # type: List[dict] 862 | 863 | # Gather all alerts into a list of groups 864 | for file_path in self._multi_suffix_glob(dir_path, [".rule", ".rules"], recursive): 865 | alert_groups_from_file = self._from_file(dir_path, file_path) 866 | if alert_groups_from_file: 867 | logger.debug("Reading alert rule from %s", file_path) 868 | alert_groups.extend(alert_groups_from_file) 869 | 870 | return alert_groups 871 | 872 | def add_path(self, path: str, *, recursive: bool = False): 873 | """Add rules from a dir path. 874 | 875 | All rules from files are aggregated into a data structure representing a single rule file. 876 | All group names are augmented with juju topology. 877 | 878 | Args: 879 | path: either a rules file or a dir of rules files. 880 | recursive: whether to read files recursively or not (no impact if `path` is a file). 881 | 882 | Raises: 883 | InvalidAlertRulePathError: if the provided path is invalid. 884 | """ 885 | path = Path(path) # type: Path 886 | if path.is_dir(): 887 | self.alert_groups.extend(self._from_dir(path, recursive)) 888 | elif path.is_file(): 889 | self.alert_groups.extend(self._from_file(path.parent, path)) 890 | else: 891 | logger.debug("The alerts file does not exist: %s", path) 892 | 893 | def as_dict(self) -> dict: 894 | """Return standard alert rules file in dict representation. 895 | 896 | Returns: 897 | a dictionary containing a single list of alert rule groups. 898 | The list of alert rule groups is provided as value of the 899 | "groups" dictionary key. 900 | """ 901 | return {"groups": self.alert_groups} if self.alert_groups else {} 902 | 903 | 904 | def _resolve_dir_against_charm_path(charm: CharmBase, *path_elements: str) -> str: 905 | """Resolve the provided path items against the directory of the main file. 906 | 907 | Look up the directory of the `main.py` file being executed. This is normally 908 | going to be the charm.py file of the charm including this library. Then, resolve 909 | the provided path elements and, if the result path exists and is a directory, 910 | return its absolute path; otherwise, raise en exception. 911 | 912 | Raises: 913 | InvalidAlertRulePathError, if the path does not exist or is not a directory. 914 | """ 915 | charm_dir = Path(str(charm.charm_dir)) 916 | if not charm_dir.exists() or not charm_dir.is_dir(): 917 | # Operator Framework does not currently expose a robust 918 | # way to determine the top level charm source directory 919 | # that is consistent across deployed charms and unit tests 920 | # Hence for unit tests the current working directory is used 921 | # TODO: updated this logic when the following ticket is resolved 922 | # https://github.com/canonical/operator/issues/643 923 | charm_dir = Path(os.getcwd()) 924 | 925 | alerts_dir_path = charm_dir.absolute().joinpath(*path_elements) 926 | 927 | if not alerts_dir_path.exists(): 928 | raise InvalidAlertRulePathError(alerts_dir_path, "directory does not exist") 929 | if not alerts_dir_path.is_dir(): 930 | raise InvalidAlertRulePathError(alerts_dir_path, "is not a directory") 931 | 932 | return str(alerts_dir_path) 933 | 934 | 935 | class NoRelationWithInterfaceFoundError(Exception): 936 | """No relations with the given interface are found in the charm meta.""" 937 | 938 | def __init__(self, charm: CharmBase, relation_interface: Optional[str] = None): 939 | self.charm = charm 940 | self.relation_interface = relation_interface 941 | self.message = ( 942 | "No relations with interface '{}' found in the meta of the '{}' charm".format( 943 | relation_interface, charm.meta.name 944 | ) 945 | ) 946 | 947 | super().__init__(self.message) 948 | 949 | 950 | class MultipleRelationsWithInterfaceFoundError(Exception): 951 | """Multiple relations with the given interface are found in the charm meta.""" 952 | 953 | def __init__(self, charm: CharmBase, relation_interface: str, relations: list): 954 | self.charm = charm 955 | self.relation_interface = relation_interface 956 | self.relations = relations 957 | self.message = ( 958 | "Multiple relations with interface '{}' found in the meta of the '{}' charm.".format( 959 | relation_interface, charm.meta.name 960 | ) 961 | ) 962 | super().__init__(self.message) 963 | 964 | 965 | class LokiPushApiEndpointDeparted(EventBase): 966 | """Event emitted when Loki departed.""" 967 | 968 | 969 | class LokiPushApiEndpointJoined(EventBase): 970 | """Event emitted when Loki joined.""" 971 | 972 | 973 | class LokiPushApiAlertRulesChanged(EventBase): 974 | """Event emitted if there is a change in the alert rules.""" 975 | 976 | def __init__(self, handle, relation, relation_id, app=None, unit=None): 977 | """Pretend we are almost like a RelationEvent. 978 | 979 | Fields to serialize: 980 | { 981 | "relation_name": , 982 | "relation_id": , 983 | "app_name": , 984 | "unit_name": 985 | } 986 | 987 | In this way, we can transparently use `RelationEvent.snapshot()` to pass 988 | it back if we need to log it. 989 | """ 990 | super().__init__(handle) 991 | self.relation = relation 992 | self.relation_id = relation_id 993 | self.app = app 994 | self.unit = unit 995 | 996 | def snapshot(self) -> Dict: 997 | """Save event information.""" 998 | snapshot = {"relation_name": self.relation.name, "relation_id": self.relation.id} 999 | if self.app: 1000 | snapshot["app_name"] = self.app.name 1001 | if self.unit: 1002 | snapshot["unit_name"] = self.unit.name 1003 | return snapshot 1004 | 1005 | def restore(self, snapshot: dict): 1006 | """Restore event information.""" 1007 | self.relation = self.framework.model.get_relation( 1008 | snapshot["relation_name"], snapshot["relation_id"] 1009 | ) 1010 | app_name = snapshot.get("app_name") 1011 | if app_name: 1012 | self.app = self.framework.model.get_app(app_name) 1013 | else: 1014 | self.app = None 1015 | unit_name = snapshot.get("unit_name") 1016 | if unit_name: 1017 | self.unit = self.framework.model.get_unit(unit_name) 1018 | else: 1019 | self.unit = None 1020 | 1021 | 1022 | class InvalidAlertRuleEvent(EventBase): 1023 | """Event emitted when alert rule files are not parsable. 1024 | 1025 | Enables us to set a clear status on the provider. 1026 | """ 1027 | 1028 | def __init__(self, handle, errors: str = "", valid: bool = False): 1029 | super().__init__(handle) 1030 | self.errors = errors 1031 | self.valid = valid 1032 | 1033 | def snapshot(self) -> Dict: 1034 | """Save alert rule information.""" 1035 | return { 1036 | "valid": self.valid, 1037 | "errors": self.errors, 1038 | } 1039 | 1040 | def restore(self, snapshot): 1041 | """Restore alert rule information.""" 1042 | self.valid = snapshot["valid"] 1043 | self.errors = snapshot["errors"] 1044 | 1045 | 1046 | class LokiPushApiEvents(ObjectEvents): 1047 | """Event descriptor for events raised by `LokiPushApiProvider`.""" 1048 | 1049 | loki_push_api_endpoint_departed = EventSource(LokiPushApiEndpointDeparted) 1050 | loki_push_api_endpoint_joined = EventSource(LokiPushApiEndpointJoined) 1051 | loki_push_api_alert_rules_changed = EventSource(LokiPushApiAlertRulesChanged) 1052 | alert_rule_status_changed = EventSource(InvalidAlertRuleEvent) 1053 | 1054 | 1055 | class LokiPushApiProvider(Object): 1056 | """A LokiPushApiProvider class.""" 1057 | 1058 | on = LokiPushApiEvents() 1059 | 1060 | def __init__( 1061 | self, 1062 | charm, 1063 | relation_name: str = DEFAULT_RELATION_NAME, 1064 | *, 1065 | port: Union[str, int] = 3100, 1066 | scheme: str = "http", 1067 | address: str = "localhost", 1068 | path: str = "loki/api/v1/push", 1069 | ): 1070 | """A Loki service provider. 1071 | 1072 | Args: 1073 | charm: a `CharmBase` instance that manages this 1074 | instance of the Loki service. 1075 | relation_name: an optional string name of the relation between `charm` 1076 | and the Loki charmed service. The default is "logging". 1077 | It is strongly advised not to change the default, so that people 1078 | deploying your charm will have a consistent experience with all 1079 | other charms that consume metrics endpoints. 1080 | 1081 | Raises: 1082 | RelationNotFoundError: If there is no relation in the charm's metadata.yaml 1083 | with the same name as provided via `relation_name` argument. 1084 | RelationInterfaceMismatchError: The relation with the same name as provided 1085 | via `relation_name` argument does not have the `loki_push_api` relation 1086 | interface. 1087 | RelationRoleMismatchError: If the relation with the same name as provided 1088 | via `relation_name` argument does not have the `RelationRole.requires` 1089 | role. 1090 | """ 1091 | _validate_relation_by_interface_and_direction( 1092 | charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides 1093 | ) 1094 | super().__init__(charm, relation_name) 1095 | self._charm = charm 1096 | self._relation_name = relation_name 1097 | self._tool = CosTool(self) 1098 | self.port = int(port) 1099 | self.scheme = scheme 1100 | self.address = address 1101 | self.path = path 1102 | 1103 | events = self._charm.on[relation_name] 1104 | self.framework.observe(self._charm.on.upgrade_charm, self._on_lifecycle_event) 1105 | self.framework.observe(events.relation_joined, self._on_logging_relation_joined) 1106 | self.framework.observe(events.relation_changed, self._on_logging_relation_changed) 1107 | self.framework.observe(events.relation_departed, self._on_logging_relation_departed) 1108 | self.framework.observe(events.relation_broken, self._on_logging_relation_broken) 1109 | 1110 | def _on_lifecycle_event(self, _): 1111 | # Upgrade event or other charm-level event 1112 | should_update = False 1113 | for relation in self._charm.model.relations[self._relation_name]: 1114 | # Don't accidentally flip a True result back. 1115 | should_update = should_update or self._process_logging_relation_changed(relation) 1116 | if should_update: 1117 | # We don't have a RelationEvent, so build it up by hand 1118 | first_rel = self._charm.model.relations[self._relation_name][0] 1119 | self.on.loki_push_api_alert_rules_changed.emit( 1120 | relation=first_rel, 1121 | relation_id=first_rel.id, 1122 | ) 1123 | 1124 | def _on_logging_relation_joined(self, event: RelationJoinedEvent): 1125 | """Set basic data on relation joins. 1126 | 1127 | Set the promtail binary URL location, which will not change, and anything 1128 | else which may be required, but is static.. 1129 | 1130 | Args: 1131 | event: a `CharmEvent` in response to which the consumer 1132 | charm must set its relation data. 1133 | """ 1134 | if self._charm.unit.is_leader(): 1135 | event.relation.data[self._charm.app].update(self._promtail_binary_url) 1136 | logger.debug("Saved promtail binary url: %s", self._promtail_binary_url) 1137 | 1138 | def _on_logging_relation_changed(self, event: HookEvent): 1139 | """Handle changes in related consumers. 1140 | 1141 | Anytime there are changes in the relation between Loki 1142 | and its consumers charms. 1143 | 1144 | Args: 1145 | event: a `CharmEvent` in response to which the consumer 1146 | charm must update its relation data. 1147 | """ 1148 | should_update = self._process_logging_relation_changed(event.relation) 1149 | if should_update: 1150 | self.on.loki_push_api_alert_rules_changed.emit( 1151 | relation=event.relation, 1152 | relation_id=event.relation.id, 1153 | app=self._charm.app, 1154 | unit=self._charm.unit, 1155 | ) 1156 | 1157 | def _on_logging_relation_broken(self, event: RelationBrokenEvent): 1158 | """Removes alert rules files when consumer charms left the relation with Loki. 1159 | 1160 | Args: 1161 | event: a `CharmEvent` in response to which the Loki 1162 | charm must update its relation data. 1163 | """ 1164 | self.on.loki_push_api_alert_rules_changed.emit( 1165 | relation=event.relation, 1166 | relation_id=event.relation.id, 1167 | app=self._charm.app, 1168 | unit=self._charm.unit, 1169 | ) 1170 | 1171 | def _on_logging_relation_departed(self, event: RelationDepartedEvent): 1172 | """Removes alert rules files when consumer charms left the relation with Loki. 1173 | 1174 | Args: 1175 | event: a `CharmEvent` in response to which the Loki 1176 | charm must update its relation data. 1177 | """ 1178 | self.on.loki_push_api_alert_rules_changed.emit( 1179 | relation=event.relation, 1180 | relation_id=event.relation.id, 1181 | app=self._charm.app, 1182 | unit=self._charm.unit, 1183 | ) 1184 | 1185 | def _should_update_alert_rules(self, relation) -> bool: 1186 | """Determine whether alert rules should be regenerated. 1187 | 1188 | If there are alert rules in the relation data bag, tell the charm 1189 | whether or not to regenerate them based on the boolean returned here. 1190 | """ 1191 | if relation.data.get(relation.app).get("alert_rules", None) is not None: 1192 | return True 1193 | return False 1194 | 1195 | def _process_logging_relation_changed(self, relation: Relation) -> bool: 1196 | """Handle changes in related consumers. 1197 | 1198 | Anytime there are changes in relations between Loki 1199 | and its consumers charms, Loki set the `loki_push_api` 1200 | into the relation data. Set the endpoint building 1201 | appropriately, and if there are alert rules present in 1202 | the relation, let the caller know. 1203 | Besides Loki generates alert rules files based what 1204 | consumer charms forwards, 1205 | 1206 | Args: 1207 | relation: the `Relation` instance to update. 1208 | 1209 | Returns: 1210 | A boolean indicating whether an event should be emitted so we 1211 | only emit one on lifecycle events 1212 | """ 1213 | relation.data[self._charm.unit]["public_address"] = socket.getfqdn() or "" 1214 | self.update_endpoint(relation=relation) 1215 | return self._should_update_alert_rules(relation) 1216 | 1217 | @property 1218 | def _promtail_binary_url(self) -> dict: 1219 | """URL from which Promtail binary can be downloaded.""" 1220 | # construct promtail binary url paths from parts 1221 | promtail_binaries = {} 1222 | for arch, info in PROMTAIL_BINARIES.items(): 1223 | info["url"] = "{}/promtail-{}/{}.gz".format( 1224 | PROMTAIL_BASE_URL, PROMTAIL_VERSION, info["filename"] 1225 | ) 1226 | promtail_binaries[arch] = info 1227 | 1228 | return {"promtail_binary_zip_url": json.dumps(promtail_binaries)} 1229 | 1230 | def update_endpoint(self, url: str = "", relation: Optional[Relation] = None) -> None: 1231 | """Triggers programmatically the update of endpoint in unit relation data. 1232 | 1233 | This method should be used when the charm relying on this library needs 1234 | to update the relation data in response to something occurring outside 1235 | of the `logging` relation lifecycle, e.g., in case of a 1236 | host address change because the charmed operator becomes connected to an 1237 | Ingress after the `logging` relation is established. 1238 | 1239 | Args: 1240 | url: An optional url value to update relation data. 1241 | relation: An optional instance of `class:ops.model.Relation` to update. 1242 | """ 1243 | # if no relation is specified update all of them 1244 | if not relation: 1245 | if not self._charm.model.relations.get(self._relation_name): 1246 | return 1247 | 1248 | relations_list = self._charm.model.relations.get(self._relation_name) 1249 | else: 1250 | relations_list = [relation] 1251 | 1252 | endpoint = self._endpoint(url or self._url) 1253 | 1254 | for relation in relations_list: 1255 | relation.data[self._charm.unit].update({"endpoint": json.dumps(endpoint)}) 1256 | 1257 | logger.debug("Saved endpoint in unit relation data") 1258 | 1259 | @property 1260 | def _url(self) -> str: 1261 | """Get local Loki Push API url. 1262 | 1263 | Return url to loki, including port number, but without the endpoint subpath. 1264 | """ 1265 | return "http://{}:{}".format(socket.getfqdn(), self.port) 1266 | 1267 | def _endpoint(self, url) -> dict: 1268 | """Get Loki push API endpoint for a given url. 1269 | 1270 | Args: 1271 | url: A loki unit URL. 1272 | 1273 | Returns: str 1274 | """ 1275 | endpoint = "/loki/api/v1/push" 1276 | return {"url": url.rstrip("/") + endpoint} 1277 | 1278 | @property 1279 | def alerts(self) -> dict: # noqa: C901 1280 | """Fetch alerts for all relations. 1281 | 1282 | A Loki alert rules file consists of a list of "groups". Each 1283 | group consists of a list of alerts (`rules`) that are sequentially 1284 | executed. This method returns all the alert rules provided by each 1285 | related metrics provider charm. These rules may be used to generate a 1286 | separate alert rules file for each relation since the returned list 1287 | of alert groups are indexed by relation ID. Also for each relation ID 1288 | associated scrape metadata such as Juju model, UUID and application 1289 | name are provided so the a unique name may be generated for the rules 1290 | file. For each relation the structure of data returned is a dictionary 1291 | with four keys 1292 | 1293 | - groups 1294 | - model 1295 | - model_uuid 1296 | - application 1297 | 1298 | The value of the `groups` key is such that it may be used to generate 1299 | a Loki alert rules file directly using `yaml.dump` but the 1300 | `groups` key itself must be included as this is required by Loki, 1301 | for example as in `yaml.dump({"groups": alerts["groups"]})`. 1302 | 1303 | Currently only accepts a list of rules and these 1304 | rules are all placed into a single group, even though Loki itself 1305 | allows for multiple groups within a single alert rules file. 1306 | 1307 | Returns: 1308 | a dictionary of alert rule groups and associated scrape 1309 | metadata indexed by relation ID. 1310 | """ 1311 | alerts = {} # type: Dict[str, dict] # mapping b/w juju identifiers and alert rule files 1312 | for relation in self._charm.model.relations[self._relation_name]: 1313 | if not relation.units or not relation.app: 1314 | continue 1315 | 1316 | alert_rules = json.loads(relation.data[relation.app].get("alert_rules", "{}")) 1317 | if not alert_rules: 1318 | continue 1319 | 1320 | alert_rules = self._inject_alert_expr_labels(alert_rules) 1321 | 1322 | identifier, topology = self._get_identifier_by_alert_rules(alert_rules) 1323 | if not topology: 1324 | try: 1325 | metadata = json.loads(relation.data[relation.app]["metadata"]) 1326 | identifier = JujuTopology.from_dict(metadata).identifier 1327 | alerts[identifier] = self._tool.apply_label_matchers(alert_rules) # type: ignore 1328 | 1329 | except KeyError as e: 1330 | logger.debug( 1331 | "Relation %s has no 'metadata': %s", 1332 | relation.id, 1333 | e, 1334 | ) 1335 | 1336 | if not identifier: 1337 | logger.error( 1338 | "Alert rules were found but no usable group or identifier was present." 1339 | ) 1340 | continue 1341 | 1342 | _, errmsg = self._tool.validate_alert_rules(alert_rules) 1343 | if errmsg: 1344 | relation.data[self._charm.app]["event"] = json.dumps({"errors": errmsg}) 1345 | continue 1346 | 1347 | alerts[identifier] = alert_rules 1348 | 1349 | return alerts 1350 | 1351 | def _get_identifier_by_alert_rules( 1352 | self, rules: dict 1353 | ) -> Tuple[Union[str, None], Union[JujuTopology, None]]: 1354 | """Determine an appropriate dict key for alert rules. 1355 | 1356 | The key is used as the filename when writing alerts to disk, so the structure 1357 | and uniqueness is important. 1358 | 1359 | Args: 1360 | rules: a dict of alert rules 1361 | Returns: 1362 | A tuple containing an identifier, if found, and a JujuTopology, if it could 1363 | be constructed. 1364 | """ 1365 | if "groups" not in rules: 1366 | logger.debug("No alert groups were found in relation data") 1367 | return None, None 1368 | 1369 | # Construct an ID based on what's in the alert rules if they have labels 1370 | for group in rules["groups"]: 1371 | try: 1372 | labels = group["rules"][0]["labels"] 1373 | topology = JujuTopology( 1374 | # Don't try to safely get required constructor fields. There's already 1375 | # a handler for KeyErrors 1376 | model_uuid=labels["juju_model_uuid"], 1377 | model=labels["juju_model"], 1378 | application=labels["juju_application"], 1379 | unit=labels.get("juju_unit", ""), 1380 | charm_name=labels.get("juju_charm", ""), 1381 | ) 1382 | return topology.identifier, topology 1383 | except KeyError: 1384 | logger.debug("Alert rules were found but no usable labels were present") 1385 | continue 1386 | 1387 | logger.warning( 1388 | "No labeled alert rules were found, and no 'scrape_metadata' " 1389 | "was available. Using the alert group name as filename." 1390 | ) 1391 | try: 1392 | for group in rules["groups"]: 1393 | return group["name"], None 1394 | except KeyError: 1395 | logger.debug("No group name was found to use as identifier") 1396 | 1397 | return None, None 1398 | 1399 | def _inject_alert_expr_labels(self, rules: Dict[str, Any]) -> Dict[str, Any]: 1400 | """Iterate through alert rules and inject topology into expressions. 1401 | 1402 | Args: 1403 | rules: a dict of alert rules 1404 | """ 1405 | if "groups" not in rules: 1406 | return rules 1407 | 1408 | modified_groups = [] 1409 | for group in rules["groups"]: 1410 | # Copy off rules, so we don't modify an object we're iterating over 1411 | rules_copy = group["rules"] 1412 | for idx, rule in enumerate(rules_copy): 1413 | labels = rule.get("labels") 1414 | 1415 | if labels: 1416 | try: 1417 | topology = JujuTopology( 1418 | # Don't try to safely get required constructor fields. There's already 1419 | # a handler for KeyErrors 1420 | model_uuid=labels["juju_model_uuid"], 1421 | model=labels["juju_model"], 1422 | application=labels["juju_application"], 1423 | unit=labels.get("juju_unit", ""), 1424 | charm_name=labels.get("juju_charm", ""), 1425 | ) 1426 | 1427 | # Inject topology and put it back in the list 1428 | rule["expr"] = self._tool.inject_label_matchers( 1429 | re.sub(r"%%juju_topology%%,?", "", rule["expr"]), 1430 | topology.label_matcher_dict, 1431 | ) 1432 | except KeyError: 1433 | # Some required JujuTopology key is missing. Just move on. 1434 | pass 1435 | 1436 | group["rules"][idx] = rule 1437 | 1438 | modified_groups.append(group) 1439 | 1440 | rules["groups"] = modified_groups 1441 | return rules 1442 | 1443 | 1444 | class ConsumerBase(Object): 1445 | """Consumer's base class.""" 1446 | 1447 | def __init__( 1448 | self, 1449 | charm: CharmBase, 1450 | relation_name: str = DEFAULT_RELATION_NAME, 1451 | alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, 1452 | recursive: bool = False, 1453 | ): 1454 | super().__init__(charm, relation_name) 1455 | self._charm = charm 1456 | self._relation_name = relation_name 1457 | self.topology = JujuTopology.from_charm(charm) 1458 | 1459 | try: 1460 | alert_rules_path = _resolve_dir_against_charm_path(charm, alert_rules_path) 1461 | except InvalidAlertRulePathError as e: 1462 | logger.debug( 1463 | "Invalid Loki alert rules folder at %s: %s", 1464 | e.alert_rules_absolute_path, 1465 | e.message, 1466 | ) 1467 | self._alert_rules_path = alert_rules_path 1468 | 1469 | self._recursive = recursive 1470 | 1471 | def _handle_alert_rules(self, relation): 1472 | if not self._charm.unit.is_leader(): 1473 | return 1474 | 1475 | alert_rules = AlertRules(self.topology) 1476 | alert_rules.add_path(self._alert_rules_path, recursive=self._recursive) 1477 | alert_rules_as_dict = alert_rules.as_dict() 1478 | 1479 | relation.data[self._charm.app]["metadata"] = json.dumps(self.topology.as_dict()) 1480 | relation.data[self._charm.app]["alert_rules"] = json.dumps( 1481 | alert_rules_as_dict, 1482 | sort_keys=True, # sort, to prevent unnecessary relation_changed events 1483 | ) 1484 | 1485 | @property 1486 | def loki_endpoints(self) -> List[dict]: 1487 | """Fetch Loki Push API endpoints sent from LokiPushApiProvider through relation data. 1488 | 1489 | Returns: 1490 | A list of dictionaries with Loki Push API endpoints, for instance: 1491 | [ 1492 | {"url": "http://loki1:3100/loki/api/v1/push"}, 1493 | {"url": "http://loki2:3100/loki/api/v1/push"}, 1494 | ] 1495 | """ 1496 | endpoints = [] # type: list 1497 | 1498 | for relation in self._charm.model.relations[self._relation_name]: 1499 | for unit in relation.units: 1500 | if unit.app == self._charm.app: 1501 | # This is a peer unit 1502 | continue 1503 | 1504 | endpoint = relation.data[unit].get("endpoint") 1505 | if endpoint: 1506 | deserialized_endpoint = json.loads(endpoint) 1507 | endpoints.append(deserialized_endpoint) 1508 | 1509 | return endpoints 1510 | 1511 | 1512 | class LokiPushApiConsumer(ConsumerBase): 1513 | """Loki Consumer class.""" 1514 | 1515 | on = LokiPushApiEvents() 1516 | 1517 | def __init__( 1518 | self, 1519 | charm: CharmBase, 1520 | relation_name: str = DEFAULT_RELATION_NAME, 1521 | alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, 1522 | recursive: bool = True, 1523 | ): 1524 | """Construct a Loki charm client. 1525 | 1526 | The `LokiPushApiConsumer` object provides configurations to a Loki client charm. 1527 | A charm instantiating this object needs Loki information, for instance the 1528 | Loki API endpoint to push logs. 1529 | The `LokiPushApiConsumer` can be instantiated as follows: 1530 | 1531 | self._loki_consumer = LokiPushApiConsumer(self) 1532 | 1533 | Args: 1534 | charm: a `CharmBase` object that manages this `LokiPushApiConsumer` object. 1535 | Typically this is `self` in the instantiating class. 1536 | relation_name: the string name of the relation interface to look up. 1537 | If `charm` has exactly one relation with this interface, the relation's 1538 | name is returned. If none or multiple relations with the provided interface 1539 | are found, this method will raise either an exception of type 1540 | NoRelationWithInterfaceFoundError or MultipleRelationsWithInterfaceFoundError, 1541 | respectively. 1542 | alert_rules_path: a string indicating a path where alert rules can be found 1543 | recursive: Whether or not to scan for rule files recursively. 1544 | 1545 | Raises: 1546 | RelationNotFoundError: If there is no relation in the charm's metadata.yaml 1547 | with the same name as provided via `relation_name` argument. 1548 | RelationInterfaceMismatchError: The relation with the same name as provided 1549 | via `relation_name` argument does not have the `loki_push_api` relation 1550 | interface. 1551 | RelationRoleMismatchError: If the relation with the same name as provided 1552 | via `relation_name` argument does not have the `RelationRole.provides` 1553 | role. 1554 | 1555 | Emits: 1556 | loki_push_api_endpoint_joined: This event is emitted when the relation between the 1557 | Charmed Operator that instantiates `LokiPushApiProvider` (Loki charm for instance) 1558 | and the Charmed Operator that instantiates `LokiPushApiConsumer` is established. 1559 | loki_push_api_endpoint_departed: This event is emitted when the relation between the 1560 | Charmed Operator that implements `LokiPushApiProvider` (Loki charm for instance) 1561 | and the Charmed Operator that implements `LokiPushApiConsumer` is removed. 1562 | loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules 1563 | file is encountered or if `alert_rules_path` is empty. 1564 | """ 1565 | _validate_relation_by_interface_and_direction( 1566 | charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires 1567 | ) 1568 | super().__init__(charm, relation_name, alert_rules_path, recursive) 1569 | events = self._charm.on[relation_name] 1570 | self.framework.observe(self._charm.on.upgrade_charm, self._on_lifecycle_event) 1571 | self.framework.observe(events.relation_joined, self._on_logging_relation_joined) 1572 | self.framework.observe(events.relation_changed, self._on_logging_relation_changed) 1573 | self.framework.observe(events.relation_departed, self._on_logging_relation_departed) 1574 | 1575 | def _on_lifecycle_event(self, _: HookEvent): 1576 | """Update require relation data on charm upgrades and other lifecycle events. 1577 | 1578 | Args: 1579 | event: a `CharmEvent` in response to which the consumer 1580 | charm must update its relation data. 1581 | """ 1582 | # Upgrade event or other charm-level event 1583 | self._reinitialize_alert_rules() 1584 | self.on.loki_push_api_endpoint_joined.emit() 1585 | 1586 | def _on_logging_relation_joined(self, event: RelationJoinedEvent): 1587 | """Handle changes in related consumers. 1588 | 1589 | Update relation data and emit events when a relation is established. 1590 | 1591 | Args: 1592 | event: a `CharmEvent` in response to which the consumer 1593 | charm must update its relation data. 1594 | 1595 | Emits: 1596 | loki_push_api_endpoint_joined: Once the relation is established, this event is emitted. 1597 | loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules 1598 | file is encountered or if `alert_rules_path` is empty. 1599 | """ 1600 | # Alert rules will not change over the lifecycle of a charm, and do not need to be 1601 | # constantly set on every relation_changed event. Leave them here. 1602 | self._handle_alert_rules(event.relation) 1603 | self.on.loki_push_api_endpoint_joined.emit() 1604 | 1605 | def _on_logging_relation_changed(self, event: RelationEvent): 1606 | """Handle changes in related consumers. 1607 | 1608 | Anytime there are changes in the relation between Loki 1609 | and its consumers charms. 1610 | 1611 | Args: 1612 | event: a `CharmEvent` in response to which the consumer 1613 | charm must update its relation data. 1614 | 1615 | Emits: 1616 | loki_push_api_endpoint_joined: Once the relation is established, this event is emitted. 1617 | loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules 1618 | file is encountered or if `alert_rules_path` is empty. 1619 | """ 1620 | if self._charm.unit.is_leader(): 1621 | ev = json.loads(event.relation.data[event.app].get("event", "{}")) 1622 | 1623 | if ev: 1624 | valid = bool(ev.get("valid", True)) 1625 | errors = ev.get("errors", "") 1626 | 1627 | if valid and not errors: 1628 | self.on.alert_rule_status_changed.emit(valid=valid) 1629 | else: 1630 | self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) 1631 | 1632 | self.on.loki_push_api_endpoint_joined.emit() 1633 | 1634 | def _reinitialize_alert_rules(self): 1635 | """Reloads alert rules and updates all relations.""" 1636 | for relation in self._charm.model.relations[self._relation_name]: 1637 | self._handle_alert_rules(relation) 1638 | 1639 | def _process_logging_relation_changed(self, relation: Relation): 1640 | self._handle_alert_rules(relation) 1641 | self.on.loki_push_api_endpoint_joined.emit() 1642 | 1643 | def _on_logging_relation_departed(self, _: RelationEvent): 1644 | """Handle departures in related providers. 1645 | 1646 | Anytime there are departures in relations between the consumer charm and Loki 1647 | the consumer charm is informed, through a `LokiPushApiEndpointDeparted` event. 1648 | The consumer charm can then choose to update its configuration. 1649 | """ 1650 | # Provide default to avoid throwing, as in some complicated scenarios with 1651 | # upgrades and hook failures we might not have data in the storage 1652 | self.on.loki_push_api_endpoint_departed.emit() 1653 | 1654 | 1655 | class ContainerNotFoundError(Exception): 1656 | """Raised if the specified container does not exist.""" 1657 | 1658 | def __init__(self): 1659 | msg = "The specified container does not exist." 1660 | self.message = msg 1661 | 1662 | super().__init__(self.message) 1663 | 1664 | 1665 | class MultipleContainersFoundError(Exception): 1666 | """Raised if no container name is passed but multiple containers are present.""" 1667 | 1668 | def __init__(self): 1669 | msg = ( 1670 | "No 'container_name' parameter has been specified; since this Charmed Operator" 1671 | " is has multiple containers, container_name must be specified for the container" 1672 | " to get logs from." 1673 | ) 1674 | self.message = msg 1675 | 1676 | super().__init__(self.message) 1677 | 1678 | 1679 | class PromtailDigestError(EventBase): 1680 | """Event emitted when there is an error with Promtail initialization.""" 1681 | 1682 | def __init__(self, handle, message): 1683 | super().__init__(handle) 1684 | self.message = message 1685 | 1686 | def snapshot(self): 1687 | """Save message information.""" 1688 | return {"message": self.message} 1689 | 1690 | def restore(self, snapshot): 1691 | """Restore message information.""" 1692 | self.message = snapshot["message"] 1693 | 1694 | 1695 | class LogProxyEndpointDeparted(EventBase): 1696 | """Event emitted when a Log Proxy has departed.""" 1697 | 1698 | 1699 | class LogProxyEndpointJoined(EventBase): 1700 | """Event emitted when a Log Proxy joins.""" 1701 | 1702 | 1703 | class LogProxyEvents(ObjectEvents): 1704 | """Event descriptor for events raised by `LogProxyConsumer`.""" 1705 | 1706 | promtail_digest_error = EventSource(PromtailDigestError) 1707 | log_proxy_endpoint_departed = EventSource(LogProxyEndpointDeparted) 1708 | log_proxy_endpoint_joined = EventSource(LogProxyEndpointJoined) 1709 | 1710 | 1711 | class LogProxyConsumer(ConsumerBase): 1712 | """LogProxyConsumer class. 1713 | 1714 | The `LogProxyConsumer` object provides a method for attaching `promtail` to 1715 | a workload in order to generate structured logging data from applications 1716 | which traditionally log to syslog or do not have native Loki integration. 1717 | The `LogProxyConsumer` can be instantiated as follows: 1718 | 1719 | self._log_proxy_consumer = LogProxyConsumer(self, log_files=["/var/log/messages"]) 1720 | 1721 | Args: 1722 | charm: a `CharmBase` object that manages this `LokiPushApiConsumer` object. 1723 | Typically, this is `self` in the instantiating class. 1724 | log_files: a list of log files to monitor with Promtail. 1725 | relation_name: the string name of the relation interface to look up. 1726 | If `charm` has exactly one relation with this interface, the relation's 1727 | name is returned. If none or multiple relations with the provided interface 1728 | are found, this method will raise either an exception of type 1729 | NoRelationWithInterfaceFoundError or MultipleRelationsWithInterfaceFoundError, 1730 | respectively. 1731 | enable_syslog: Whether to enable syslog integration. 1732 | syslog_port: The port syslog is attached to. 1733 | alert_rules_path: an optional path for the location of alert rules 1734 | files. Defaults to "./src/loki_alert_rules", 1735 | resolved from the directory hosting the charm entry file. 1736 | The alert rules are automatically updated on charm upgrade. 1737 | recursive: Whether to scan for rule files recursively. 1738 | container_name: An optional container name to inject the payload into. 1739 | promtail_resource_name: An optional promtail resource name from metadata 1740 | if it has been modified and attached 1741 | 1742 | Raises: 1743 | RelationNotFoundError: If there is no relation in the charm's metadata.yaml 1744 | with the same name as provided via `relation_name` argument. 1745 | RelationInterfaceMismatchError: The relation with the same name as provided 1746 | via `relation_name` argument does not have the `loki_push_api` relation 1747 | interface. 1748 | RelationRoleMismatchError: If the relation with the same name as provided 1749 | via `relation_name` argument does not have the `RelationRole.provides` 1750 | role. 1751 | """ 1752 | 1753 | on = LogProxyEvents() 1754 | 1755 | def __init__( 1756 | self, 1757 | charm, 1758 | log_files: Optional[list] = None, 1759 | relation_name: str = DEFAULT_LOG_PROXY_RELATION_NAME, 1760 | enable_syslog: bool = False, 1761 | syslog_port: int = 1514, 1762 | alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, 1763 | recursive: bool = False, 1764 | container_name: str = "", 1765 | promtail_resource_name: Optional[str] = None, 1766 | ): 1767 | super().__init__(charm, relation_name, alert_rules_path, recursive) 1768 | self._charm = charm 1769 | self._relation_name = relation_name 1770 | self._container = self._get_container(container_name) 1771 | self._container_name = self._get_container_name(container_name) 1772 | self._log_files = log_files or [] 1773 | self._syslog_port = syslog_port 1774 | self._is_syslog = enable_syslog 1775 | self.topology = JujuTopology.from_charm(charm) 1776 | self._promtail_resource_name = promtail_resource_name or "promtail-bin" 1777 | 1778 | # architechure used for promtail binary 1779 | arch = platform.processor() 1780 | self._arch = "amd64" if arch == "x86_64" else arch 1781 | 1782 | events = self._charm.on[relation_name] 1783 | self.framework.observe(events.relation_created, self._on_relation_created) 1784 | self.framework.observe(events.relation_changed, self._on_relation_changed) 1785 | self.framework.observe(events.relation_departed, self._on_relation_departed) 1786 | # turn the container name to a valid Python identifier 1787 | snake_case_container_name = self._container_name.replace("-", "_") 1788 | self.framework.observe( 1789 | getattr(self._charm.on, "{}_pebble_ready".format(snake_case_container_name)), 1790 | self._on_pebble_ready, 1791 | ) 1792 | 1793 | def _on_pebble_ready(self, _: WorkloadEvent): 1794 | """Event handler for `pebble_ready`.""" 1795 | if self.model.relations[self._relation_name]: 1796 | self._setup_promtail() 1797 | 1798 | def _on_relation_created(self, _: RelationCreatedEvent) -> None: 1799 | """Event handler for `relation_created`.""" 1800 | if not self._container.can_connect(): 1801 | return 1802 | self._setup_promtail() 1803 | 1804 | def _on_relation_changed(self, event: RelationEvent) -> None: 1805 | """Event handler for `relation_changed`. 1806 | 1807 | Args: 1808 | event: The event object `RelationChangedEvent`. 1809 | """ 1810 | self._handle_alert_rules(event.relation) 1811 | 1812 | if self._charm.unit.is_leader(): 1813 | ev = json.loads(event.relation.data[event.app].get("event", "{}")) 1814 | 1815 | if ev: 1816 | valid = bool(ev.get("valid", True)) 1817 | errors = ev.get("errors", "") 1818 | 1819 | if valid and not errors: 1820 | self.on.alert_rule_status_changed.emit(valid=valid) 1821 | else: 1822 | self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) 1823 | 1824 | if not self._container.can_connect(): 1825 | return 1826 | if self.model.relations[self._relation_name]: 1827 | if "promtail" not in self._container.get_plan().services: 1828 | self._setup_promtail() 1829 | return 1830 | 1831 | new_config = self._promtail_config 1832 | if new_config != self._current_config: 1833 | self._container.push( 1834 | WORKLOAD_CONFIG_PATH, yaml.safe_dump(new_config), make_dirs=True 1835 | ) 1836 | 1837 | # Loki may send endpoints late. Don't necessarily start, there may be 1838 | # no clients 1839 | if new_config["clients"]: 1840 | self._container.restart(WORKLOAD_SERVICE_NAME) 1841 | self.on.log_proxy_endpoint_joined.emit() 1842 | else: 1843 | self.on.promtail_digest_error.emit("No promtail client endpoints available!") 1844 | 1845 | def _on_relation_departed(self, _: RelationEvent) -> None: 1846 | """Event handler for `relation_departed`. 1847 | 1848 | Args: 1849 | event: The event object `RelationDepartedEvent`. 1850 | """ 1851 | if not self._container.can_connect(): 1852 | return 1853 | if not self._charm.model.relations[self._relation_name]: 1854 | self._container.stop(WORKLOAD_SERVICE_NAME) 1855 | return 1856 | 1857 | new_config = self._promtail_config 1858 | if new_config != self._current_config: 1859 | self._container.push(WORKLOAD_CONFIG_PATH, yaml.safe_dump(new_config), make_dirs=True) 1860 | 1861 | if new_config["clients"]: 1862 | self._container.restart(WORKLOAD_SERVICE_NAME) 1863 | else: 1864 | self._container.stop(WORKLOAD_SERVICE_NAME) 1865 | self.on.log_proxy_endpoint_departed.emit() 1866 | 1867 | def _get_container(self, container_name: str = "") -> Container: 1868 | """Gets a single container by name or using the only container running in the Pod. 1869 | 1870 | If there is more than one container in the Pod a `PromtailDigestError` is emitted. 1871 | 1872 | Args: 1873 | container_name: The container name. 1874 | 1875 | Returns: 1876 | A `ops.model.Container` object representing the container. 1877 | 1878 | Emits: 1879 | PromtailDigestError, if there was a problem obtaining a container. 1880 | """ 1881 | try: 1882 | container_name = self._get_container_name(container_name) 1883 | return self._charm.unit.get_container(container_name) 1884 | except (MultipleContainersFoundError, ContainerNotFoundError, ModelError) as e: 1885 | msg = str(e) 1886 | logger.warning(msg) 1887 | self.on.promtail_digest_error.emit(msg) 1888 | 1889 | def _get_container_name(self, container_name: str = "") -> str: 1890 | """Helper function for getting/validating a container name. 1891 | 1892 | Args: 1893 | container_name: The container name to be validated (optional). 1894 | 1895 | Returns: 1896 | container_name: The same container_name that was passed (if it exists) or the only 1897 | container name that is present (if no container_name was passed). 1898 | 1899 | Raises: 1900 | ContainerNotFoundError, if container_name does not exist. 1901 | MultipleContainersFoundError, if container_name was not provided but multiple 1902 | containers are present. 1903 | """ 1904 | containers = dict(self._charm.model.unit.containers) 1905 | if len(containers) == 0: 1906 | raise ContainerNotFoundError 1907 | 1908 | if not container_name: 1909 | # container_name was not provided - will get it ourselves, if it is the only one 1910 | if len(containers) > 1: 1911 | raise MultipleContainersFoundError 1912 | 1913 | # Get the first key in the containers' dict. 1914 | # Need to "cast", otherwise: 1915 | # error: Incompatible return value type (got "Optional[str]", expected "str") 1916 | container_name = cast(str, next(iter(containers.keys()))) 1917 | 1918 | elif container_name not in containers: 1919 | raise ContainerNotFoundError 1920 | 1921 | return container_name 1922 | 1923 | def _add_pebble_layer(self, workload_binary_path: str) -> None: 1924 | """Adds Pebble layer that manages Promtail service in Workload container. 1925 | 1926 | Args: 1927 | workload_binary_path: string providing path to promtail binary in workload container. 1928 | """ 1929 | pebble_layer = { 1930 | "summary": "promtail layer", 1931 | "description": "pebble config layer for promtail", 1932 | "services": { 1933 | WORKLOAD_SERVICE_NAME: { 1934 | "override": "replace", 1935 | "summary": WORKLOAD_SERVICE_NAME, 1936 | "command": "{} {}".format(workload_binary_path, self._cli_args), 1937 | "startup": "disabled", 1938 | } 1939 | }, 1940 | } 1941 | self._container.add_layer(self._container_name, pebble_layer, combine=True) 1942 | 1943 | def _create_directories(self) -> None: 1944 | """Creates the directories for Promtail binary and config file.""" 1945 | self._container.make_dir(path=WORKLOAD_BINARY_DIR, make_parents=True) 1946 | self._container.make_dir(path=WORKLOAD_CONFIG_DIR, make_parents=True) 1947 | 1948 | def _obtain_promtail(self, promtail_info: dict) -> None: 1949 | """Obtain promtail binary from an attached resource or download it. 1950 | 1951 | Args: 1952 | promtail_info: dictionary containing information about promtail binary 1953 | that must be used. The dictionary must have three keys 1954 | - "filename": filename of promtail binary 1955 | - "zipsha": sha256 sum of zip file of promtail binary 1956 | - "binsha": sha256 sum of unpacked promtail binary 1957 | """ 1958 | workload_binary_path = os.path.join(WORKLOAD_BINARY_DIR, promtail_info["filename"]) 1959 | if self._promtail_attached_as_resource: 1960 | self._push_promtail_if_attached(workload_binary_path) 1961 | return 1962 | 1963 | if self._promtail_must_be_downloaded(promtail_info): 1964 | self._download_and_push_promtail_to_workload(promtail_info) 1965 | else: 1966 | binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) 1967 | self._push_binary_to_workload(binary_path, workload_binary_path) 1968 | 1969 | def _push_binary_to_workload(self, binary_path: str, workload_binary_path: str) -> None: 1970 | """Push promtail binary into workload container. 1971 | 1972 | Args: 1973 | binary_path: path in charm container from which promtail binary is read. 1974 | workload_binary_path: path in workload container to which promtail binary is pushed. 1975 | """ 1976 | with open(binary_path, "rb") as f: 1977 | self._container.push(workload_binary_path, f, permissions=0o755, make_dirs=True) 1978 | logger.debug("The promtail binary file has been pushed to the workload container.") 1979 | 1980 | @property 1981 | def _promtail_attached_as_resource(self) -> bool: 1982 | """Checks whether Promtail binary is attached to the charm or not. 1983 | 1984 | Returns: 1985 | a boolean representing whether Promtail binary is attached as a resource or not. 1986 | """ 1987 | try: 1988 | self._charm.model.resources.fetch(self._promtail_resource_name) 1989 | return True 1990 | except ModelError: 1991 | return False 1992 | except NameError as e: 1993 | if "invalid resource name" in str(e): 1994 | return False 1995 | else: 1996 | raise 1997 | 1998 | def _push_promtail_if_attached(self, workload_binary_path: str) -> bool: 1999 | """Checks whether Promtail binary is attached to the charm or not. 2000 | 2001 | Args: 2002 | workload_binary_path: string specifying expected path of promtail 2003 | in workload container 2004 | 2005 | Returns: 2006 | a boolean representing whether Promtail binary is attached or not. 2007 | """ 2008 | logger.info("Promtail binary file has been obtained from an attached resource.") 2009 | resource_path = self._charm.model.resources.fetch(self._promtail_resource_name) 2010 | self._push_binary_to_workload(resource_path, workload_binary_path) 2011 | return True 2012 | 2013 | def _promtail_must_be_downloaded(self, promtail_info: dict) -> bool: 2014 | """Checks whether promtail binary must be downloaded or not. 2015 | 2016 | Args: 2017 | promtail_info: dictionary containing information about promtail binary 2018 | that must be used. The dictionary must have three keys 2019 | - "filename": filename of promtail binary 2020 | - "zipsha": sha256 sum of zip file of promtail binary 2021 | - "binsha": sha256 sum of unpacked promtail binary 2022 | 2023 | Returns: 2024 | a boolean representing whether Promtail binary must be downloaded or not. 2025 | """ 2026 | binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) 2027 | if not self._is_promtail_binary_in_charm(binary_path): 2028 | return True 2029 | 2030 | if not self._sha256sums_matches(binary_path, promtail_info["binsha"]): 2031 | return True 2032 | 2033 | logger.debug("Promtail binary file is already in the the charm container.") 2034 | return False 2035 | 2036 | def _sha256sums_matches(self, file_path: str, sha256sum: str) -> bool: 2037 | """Checks whether a file's sha256sum matches or not with an specific sha256sum. 2038 | 2039 | Args: 2040 | file_path: A string representing the files' patch. 2041 | sha256sum: The sha256sum against which we want to verify. 2042 | 2043 | Returns: 2044 | a boolean representing whether a file's sha256sum matches or not with 2045 | an specific sha256sum. 2046 | """ 2047 | try: 2048 | with open(file_path, "rb") as f: 2049 | file_bytes = f.read() 2050 | result = sha256(file_bytes).hexdigest() 2051 | 2052 | if result != sha256sum: 2053 | msg = "File sha256sum mismatch, expected:'{}' but got '{}'".format( 2054 | sha256sum, result 2055 | ) 2056 | logger.debug(msg) 2057 | return False 2058 | 2059 | return True 2060 | except (APIError, FileNotFoundError): 2061 | msg = "File: '{}' could not be opened".format(file_path) 2062 | logger.error(msg) 2063 | return False 2064 | 2065 | def _is_promtail_binary_in_charm(self, binary_path: str) -> bool: 2066 | """Check if Promtail binary is already stored in charm container. 2067 | 2068 | Args: 2069 | binary_path: string path of promtail binary to check 2070 | 2071 | Returns: 2072 | a boolean representing whether Promtail is present or not. 2073 | """ 2074 | return True if Path(binary_path).is_file() else False 2075 | 2076 | def _download_and_push_promtail_to_workload(self, promtail_info: dict) -> None: 2077 | """Downloads a Promtail zip file and pushes the binary to the workload. 2078 | 2079 | Args: 2080 | promtail_info: dictionary containing information about promtail binary 2081 | that must be used. The dictionary must have three keys 2082 | - "filename": filename of promtail binary 2083 | - "zipsha": sha256 sum of zip file of promtail binary 2084 | - "binsha": sha256 sum of unpacked promtail binary 2085 | """ 2086 | with request.urlopen(promtail_info["url"]) as r: 2087 | file_bytes = r.read() 2088 | file_path = os.path.join(BINARY_DIR, promtail_info["filename"] + ".gz") 2089 | with open(file_path, "wb") as f: 2090 | f.write(file_bytes) 2091 | logger.info( 2092 | "Promtail binary zip file has been downloaded and stored in: %s", 2093 | file_path, 2094 | ) 2095 | 2096 | decompressed_file = GzipFile(fileobj=BytesIO(file_bytes)) 2097 | binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) 2098 | with open(binary_path, "wb") as outfile: 2099 | outfile.write(decompressed_file.read()) 2100 | logger.debug("Promtail binary file has been downloaded.") 2101 | 2102 | workload_binary_path = os.path.join(WORKLOAD_BINARY_DIR, promtail_info["filename"]) 2103 | self._push_binary_to_workload(binary_path, workload_binary_path) 2104 | 2105 | @property 2106 | def _cli_args(self) -> str: 2107 | """Return the cli arguments to pass to promtail. 2108 | 2109 | Returns: 2110 | The arguments as a string 2111 | """ 2112 | return "-config.file={}".format(WORKLOAD_CONFIG_PATH) 2113 | 2114 | @property 2115 | def _current_config(self) -> dict: 2116 | """Property that returns the current Promtail configuration. 2117 | 2118 | Returns: 2119 | A dict containing Promtail configuration. 2120 | """ 2121 | if not self._container.can_connect(): 2122 | logger.debug("Could not connect to promtail container!") 2123 | return {} 2124 | try: 2125 | raw_current = self._container.pull(WORKLOAD_CONFIG_PATH).read() 2126 | return yaml.safe_load(raw_current) 2127 | except (ProtocolError, PathError) as e: 2128 | logger.warning( 2129 | "Could not check the current promtail configuration due to " 2130 | "a failure in retrieving the file: %s", 2131 | e, 2132 | ) 2133 | return {} 2134 | 2135 | @property 2136 | def _promtail_config(self) -> dict: 2137 | """Generates the config file for Promtail.""" 2138 | config = {"clients": self._clients_list()} 2139 | config.update(self._server_config()) 2140 | config.update(self._positions()) 2141 | config.update(self._scrape_configs()) 2142 | return config 2143 | 2144 | def _clients_list(self) -> list: 2145 | """Generates a list of clients for use in the promtail config. 2146 | 2147 | Returns: 2148 | A list of endpoints 2149 | """ 2150 | return self.loki_endpoints 2151 | 2152 | def _server_config(self) -> dict: 2153 | """Generates the server section of the Promtail config file. 2154 | 2155 | Returns: 2156 | A dict representing the `server` section. 2157 | """ 2158 | return { 2159 | "server": { 2160 | "http_listen_port": HTTP_LISTEN_PORT, 2161 | "grpc_listen_port": GRPC_LISTEN_PORT, 2162 | } 2163 | } 2164 | 2165 | def _positions(self) -> dict: 2166 | """Generates the positions section of the Promtail config file. 2167 | 2168 | Returns: 2169 | A dict representing the `positions` section. 2170 | """ 2171 | return {"positions": {"filename": WORKLOAD_POSITIONS_PATH}} 2172 | 2173 | def _scrape_configs(self) -> dict: 2174 | """Generates the scrape_configs section of the Promtail config file. 2175 | 2176 | Returns: 2177 | A dict representing the `scrape_configs` section. 2178 | """ 2179 | job_name = "juju_{}".format(self.topology.identifier) 2180 | 2181 | # The new JujuTopology doesn't include unit, but LogProxyConsumer should have it 2182 | common_labels = { 2183 | "juju_{}".format(k): v 2184 | for k, v in self.topology.as_dict(remapped_keys={"charm_name": "charm"}).items() 2185 | } 2186 | scrape_configs = [] 2187 | 2188 | # Files config 2189 | labels = common_labels.copy() 2190 | labels.update( 2191 | { 2192 | "job": job_name, 2193 | "__path__": "", 2194 | } 2195 | ) 2196 | config = {"targets": ["localhost"], "labels": labels} 2197 | scrape_config = { 2198 | "job_name": "system", 2199 | "static_configs": self._generate_static_configs(config), 2200 | } 2201 | scrape_configs.append(scrape_config) 2202 | 2203 | # Syslog config 2204 | if self._is_syslog: 2205 | relabel_mappings = [ 2206 | "severity", 2207 | "facility", 2208 | "hostname", 2209 | "app_name", 2210 | "proc_id", 2211 | "msg_id", 2212 | ] 2213 | syslog_labels = common_labels.copy() 2214 | syslog_labels.update({"job": "{}_syslog".format(job_name)}) 2215 | syslog_config = { 2216 | "job_name": "syslog", 2217 | "syslog": { 2218 | "listen_address": "127.0.0.1:{}".format(self._syslog_port), 2219 | "label_structured_data": True, 2220 | "labels": syslog_labels, 2221 | }, 2222 | "relabel_configs": [ 2223 | {"source_labels": ["__syslog_message_{}".format(val)], "target_label": val} 2224 | for val in relabel_mappings 2225 | ] 2226 | + [{"action": "labelmap", "regex": "__syslog_message_sd_(.+)"}], 2227 | } 2228 | scrape_configs.append(syslog_config) # type: ignore 2229 | 2230 | return {"scrape_configs": scrape_configs} 2231 | 2232 | def _generate_static_configs(self, config: dict) -> list: 2233 | """Generates static_configs section. 2234 | 2235 | Returns: 2236 | - a list of dictionaries representing static_configs section 2237 | """ 2238 | static_configs = [] 2239 | 2240 | for _file in self._log_files: 2241 | conf = deepcopy(config) 2242 | conf["labels"]["__path__"] = _file 2243 | static_configs.append(conf) 2244 | 2245 | return static_configs 2246 | 2247 | def _setup_promtail(self) -> None: 2248 | # Use the first 2249 | relations = self._charm.model.relations[self._relation_name] 2250 | if len(relations) > 1: 2251 | logger.debug( 2252 | "Multiple log_proxy relations. Getting Promtail from application {}".format( 2253 | relations[0].app.name 2254 | ) 2255 | ) 2256 | relation = relations[0] 2257 | promtail_binaries = json.loads( 2258 | relation.data[relation.app].get("promtail_binary_zip_url", "{}") 2259 | ) 2260 | if not promtail_binaries: 2261 | return 2262 | 2263 | if not self._is_promtail_installed(promtail_binaries[self._arch]): 2264 | try: 2265 | self._obtain_promtail(promtail_binaries[self._arch]) 2266 | except HTTPError as e: 2267 | msg = "Promtail binary couldn't be downloaded - {}".format(str(e)) 2268 | logger.warning(msg) 2269 | self.on.promtail_digest_error.emit(msg) 2270 | return 2271 | 2272 | workload_binary_path = os.path.join( 2273 | WORKLOAD_BINARY_DIR, promtail_binaries[self._arch]["filename"] 2274 | ) 2275 | 2276 | self._create_directories() 2277 | self._container.push( 2278 | WORKLOAD_CONFIG_PATH, yaml.safe_dump(self._promtail_config), make_dirs=True 2279 | ) 2280 | 2281 | self._add_pebble_layer(workload_binary_path) 2282 | 2283 | if self._current_config.get("clients"): 2284 | try: 2285 | self._container.restart(WORKLOAD_SERVICE_NAME) 2286 | except ChangeError as e: 2287 | self.on.promtail_digest_error.emit(str(e)) 2288 | else: 2289 | self.on.log_proxy_endpoint_joined.emit() 2290 | else: 2291 | self.on.promtail_digest_error.emit("No promtail client endpoints available!") 2292 | 2293 | def _is_promtail_installed(self, promtail_info: dict) -> bool: 2294 | """Determine if promtail has already been installed to the container. 2295 | 2296 | Args: 2297 | promtail_info: dictionary containing information about promtail binary 2298 | that must be used. The dictionary must at least contain a key 2299 | "filename" giving the name of promtail binary 2300 | """ 2301 | workload_binary_path = "{}/{}".format(WORKLOAD_BINARY_DIR, promtail_info["filename"]) 2302 | try: 2303 | self._container.list_files(workload_binary_path) 2304 | except (APIError, FileNotFoundError): 2305 | return False 2306 | return True 2307 | 2308 | @property 2309 | def syslog_port(self) -> str: 2310 | """Gets the port on which promtail is listening for syslog. 2311 | 2312 | Returns: 2313 | A str representing the port 2314 | """ 2315 | return str(self._syslog_port) 2316 | 2317 | @property 2318 | def rsyslog_config(self) -> str: 2319 | """Generates a config line for use with rsyslog. 2320 | 2321 | Returns: 2322 | The rsyslog config line as a string 2323 | """ 2324 | return 'action(type="omfwd" protocol="tcp" target="127.0.0.1" port="{}" Template="RSYSLOG_SyslogProtocol23Format" TCP_Framing="octet-counted")'.format( 2325 | self._syslog_port 2326 | ) 2327 | 2328 | 2329 | class CosTool: 2330 | """Uses cos-tool to inject label matchers into alert rule expressions and validate rules.""" 2331 | 2332 | _path = None 2333 | _disabled = False 2334 | 2335 | def __init__(self, charm): 2336 | self._charm = charm 2337 | 2338 | @property 2339 | def path(self): 2340 | """Lazy lookup of the path of cos-tool.""" 2341 | if self._disabled: 2342 | return None 2343 | if not self._path: 2344 | self._path = self._get_tool_path() 2345 | if not self._path: 2346 | logger.debug("Skipping injection of juju topology as label matchers") 2347 | self._disabled = True 2348 | return self._path 2349 | 2350 | def apply_label_matchers(self, rules) -> dict: 2351 | """Will apply label matchers to the expression of all alerts in all supplied groups.""" 2352 | if not self.path: 2353 | return rules 2354 | for group in rules["groups"]: 2355 | rules_in_group = group.get("rules", []) 2356 | for rule in rules_in_group: 2357 | topology = {} 2358 | # if the user for some reason has provided juju_unit, we'll need to honor it 2359 | # in most cases, however, this will be empty 2360 | for label in [ 2361 | "juju_model", 2362 | "juju_model_uuid", 2363 | "juju_application", 2364 | "juju_charm", 2365 | "juju_unit", 2366 | ]: 2367 | if label in rule["labels"]: 2368 | topology[label] = rule["labels"][label] 2369 | 2370 | rule["expr"] = self.inject_label_matchers(rule["expr"], topology) 2371 | return rules 2372 | 2373 | def validate_alert_rules(self, rules: dict) -> Tuple[bool, str]: 2374 | """Will validate correctness of alert rules, returning a boolean and any errors.""" 2375 | if not self.path: 2376 | logger.debug("`cos-tool` unavailable. Not validating alert correctness.") 2377 | return True, "" 2378 | 2379 | with tempfile.TemporaryDirectory() as tmpdir: 2380 | rule_path = Path(tmpdir + "/validate_rule.yaml") 2381 | 2382 | # Smash "our" rules format into what upstream actually uses, which is more like: 2383 | # 2384 | # groups: 2385 | # - name: foo 2386 | # rules: 2387 | # - alert: SomeAlert 2388 | # expr: up 2389 | # - alert: OtherAlert 2390 | # expr: up 2391 | transformed_rules = {"groups": []} # type: ignore 2392 | for rule in rules["groups"]: 2393 | transformed_rules["groups"].append(rule) 2394 | 2395 | rule_path.write_text(yaml.dump(transformed_rules)) 2396 | args = [str(self.path), "--format", "logql", "validate", str(rule_path)] 2397 | # noinspection PyBroadException 2398 | try: 2399 | self._exec(args) 2400 | return True, "" 2401 | except subprocess.CalledProcessError as e: 2402 | logger.debug("Validating the rules failed: %s", e.output) 2403 | return False, ", ".join([line for line in e.output if "error validating" in line]) 2404 | 2405 | def inject_label_matchers(self, expression, topology) -> str: 2406 | """Add label matchers to an expression.""" 2407 | if not topology: 2408 | return expression 2409 | if not self.path: 2410 | logger.debug("`cos-tool` unavailable. Leaving expression unchanged: %s", expression) 2411 | return expression 2412 | args = [str(self.path), "--format", "logql", "transform"] 2413 | args.extend( 2414 | ["--label-matcher={}={}".format(key, value) for key, value in topology.items()] 2415 | ) 2416 | 2417 | args.extend(["{}".format(expression)]) 2418 | # noinspection PyBroadException 2419 | try: 2420 | return self._exec(args) 2421 | except subprocess.CalledProcessError as e: 2422 | logger.debug('Applying the expression failed: "%s", falling back to the original', e) 2423 | print('Applying the expression failed: "{}", falling back to the original'.format(e)) 2424 | return expression 2425 | 2426 | def _get_tool_path(self) -> Optional[Path]: 2427 | arch = platform.processor() 2428 | arch = "amd64" if arch == "x86_64" else arch 2429 | res = "cos-tool-{}".format(arch) 2430 | try: 2431 | path = Path(res).resolve() 2432 | path.chmod(0o777) 2433 | return path 2434 | except NotImplementedError: 2435 | logger.debug("System lacks support for chmod") 2436 | except FileNotFoundError: 2437 | logger.debug('Could not locate cos-tool at: "{}"'.format(res)) 2438 | return None 2439 | 2440 | def _exec(self, cmd) -> str: 2441 | result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE) 2442 | output = result.stdout.decode("utf-8").strip() 2443 | return output 2444 | --------------------------------------------------------------------------------