├── .trivyignore ├── dev-requirements.txt ├── tests ├── __init__.py ├── unit │ ├── __init__.py │ ├── test_state.py │ ├── test_template_utils.py │ └── test_structured_config.py ├── integration │ ├── temporal_client │ │ ├── activities.py │ │ └── workflows.py │ ├── test_charm.py │ ├── conftest.py │ └── helpers.py └── conftest.py ├── .gitignore ├── documentation ├── media │ └── architecture.png ├── tutorial │ ├── 01-get-started.md │ ├── 04-deploy-airbyte.md │ ├── 03-deploy-supporting-charms.md │ └── 02-environment-setup.md ├── explanation │ └── architecture.md ├── index.md └── how-to │ └── secure-airbyte-deployments.md ├── requirements.txt ├── .github ├── workflows │ ├── test.yaml │ ├── publish_charm.yaml │ ├── integration_test.yaml │ └── promote_charm.yaml └── ISSUE_TEMPLATE │ ├── enhancement_proposal.yml │ └── bug_report.yml ├── .woke.yaml ├── templates └── flags.jinja ├── .licenserc.yaml ├── src ├── log.py ├── utils.py ├── relations │ ├── airbyte_ui.py │ ├── postgresql.py │ ├── minio.py │ └── s3.py ├── state.py ├── s3_helpers.py ├── literals.py ├── structured_config.py ├── charm_helpers.py └── charm.py ├── pyproject.toml ├── README.md ├── tox.ini ├── airbyte_rock ├── local-files │ └── pod-sweeper.sh └── rockcraft.yaml ├── icon.svg ├── CONTRIBUTING.md ├── LICENSE ├── lib └── charms │ └── data_platform_libs │ └── v0 │ ├── data_models.py │ └── database_requires.py └── charmcraft.yaml /.trivyignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | tox==4.15.1 2 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | """Tests module.""" 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv/ 2 | build/ 3 | *.charm 4 | .tox/ 5 | .coverage 6 | __pycache__/ 7 | *.py[cod] 8 | .idea 9 | .vscode/ 10 | -------------------------------------------------------------------------------- /documentation/media/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/canonical/airbyte-k8s-operator/main/documentation/media/architecture.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ops ~= 2.5 2 | pydantic==1.10.13 3 | boto3==1.34.31 4 | serialized-data-interface==0.7.0 5 | charmed-kubeflow-chisme==0.3.0 6 | kubernetes==24.2.0 7 | jinja2~=3.1 8 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | 5 | """Unit tests config.""" 6 | 7 | import ops.testing 8 | 9 | ops.testing.SIMULATE_CAN_CONNECT = True 10 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | pull_request: 5 | 6 | jobs: 7 | unit-tests: 8 | uses: canonical/operator-workflows/.github/workflows/test.yaml@cea2ac306b4f4c1475d73b1a4c766d62e5b1c8a9 9 | secrets: inherit 10 | -------------------------------------------------------------------------------- /.github/workflows/publish_charm.yaml: -------------------------------------------------------------------------------- 1 | name: Publish to edge 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | test-and-publish-charm: 10 | uses: canonical/operator-workflows/.github/workflows/publish_charm.yaml@main 11 | secrets: inherit 12 | with: 13 | channel: latest/edge 14 | -------------------------------------------------------------------------------- /.woke.yaml: -------------------------------------------------------------------------------- 1 | ignore_files: 2 | # Ignore ingress charm library as it uses non compliant terminology: 3 | # whitelist. 4 | - lib/charms/data_platform_libs/v0/data_models.py 5 | - lib/charms/data_platform_libs/v0/database_requires.py 6 | - lib/charms/data_platform_libs/v0/s3.py 7 | rules: 8 | # Ignore "master" - the database relation event received from the library. 9 | - name: master 10 | -------------------------------------------------------------------------------- /tests/integration/temporal_client/activities.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | 5 | """Temporal client activity.""" 6 | 7 | from temporalio import activity 8 | 9 | 10 | @activity.defn 11 | async def say_hello(name: str) -> str: 12 | """Temporal activity. 13 | 14 | Args: 15 | name: used to run the dynamic activity. 16 | 17 | Returns: 18 | String in the form "Hello, {name}! 19 | """ 20 | return f"Hello, {name}!" 21 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | """Fixtures for charm tests.""" 5 | 6 | import pytest 7 | 8 | 9 | def pytest_addoption(parser: pytest.Parser): 10 | """Parse additional pytest options. 11 | 12 | Args: 13 | parser: pytest command line parser. 14 | """ 15 | # The prebuilt charm file. 16 | parser.addoption("--charm-file", action="append", default=[]) 17 | # The charm image name:tag. 18 | parser.addoption("--airbyte-image", action="store", default="") 19 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/enhancement_proposal.yml: -------------------------------------------------------------------------------- 1 | name: Enhancement Proposal 2 | description: File an enhancement proposal 3 | labels: ["Type: Enhancement", "Status: Triage"] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: > 8 | Thanks for taking the time to fill out this enhancement proposal! Before submitting your issue, please make 9 | sure there isn't already a prior issue concerning this. If there is, please join that discussion instead. 10 | - type: textarea 11 | id: enhancement-proposal 12 | attributes: 13 | label: Enhancement Proposal 14 | description: > 15 | Describe the enhancement you would like to see in as much detail as needed. 16 | validations: 17 | required: true 18 | -------------------------------------------------------------------------------- /documentation/tutorial/01-get-started.md: -------------------------------------------------------------------------------- 1 | This guide describes how to get started with Charmed Airbyte K8s Operator. From setting up MicroK8s in your environment, to deploying Airbyte and creating your first connection. 2 | 3 | The Charmed Airbyte K8s Operator automates operations management for Airbyte on Kubernetes. 4 | 5 | **Prerequisites** 6 | Before you begin, make sure you have: 7 | 8 | - Ubuntu 22.04 LTS or later 9 | - [Snap](https://snapcraft.io/docs/installing-snapd) installed 10 | - At least 8 GB of RAM 11 | - 2 CPU cores 12 | - At least 20 GB of available disk space 13 | 14 | ## Get Started 15 | 1. [Set up your environment](./02-environment-setup.md) 16 | 2. [Deploy supporting charms](./03-deploy-supporting-charms.md) 17 | 3. [Deploy Charmed Airbyte](./04-deploy-airbyte.md) -------------------------------------------------------------------------------- /.github/workflows/integration_test.yaml: -------------------------------------------------------------------------------- 1 | name: Integration tests 2 | 3 | concurrency: 4 | group: ${{ github.workflow }}-${{ github.ref }} 5 | cancel-in-progress: true 6 | 7 | on: 8 | pull_request: 9 | 10 | jobs: 11 | integration-tests: 12 | uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main 13 | secrets: inherit 14 | with: 15 | channel: 1.34-strict/stable 16 | modules: '["test_charm.py"]' 17 | juju-channel: 3.6/stable 18 | self-hosted-runner: true 19 | self-hosted-runner-label: "xlarge" 20 | builder-runner-label: "xlarge" 21 | microk8s-addons: "dns ingress rbac storage metallb:10.15.119.2-10.15.119.4 registry" 22 | trivy-severity-config: CRITICAL 23 | rockcraft-enable-security-nesting: true 24 | tmate-debug: true 25 | -------------------------------------------------------------------------------- /.github/workflows/promote_charm.yaml: -------------------------------------------------------------------------------- 1 | name: Promote charm 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | origin-channel: 7 | type: choice 8 | description: "Origin Channel" 9 | options: 10 | - latest/edge 11 | destination-channel: 12 | type: choice 13 | description: "Destination Channel" 14 | options: 15 | - latest/stable 16 | secrets: 17 | CHARMHUB_TOKEN: 18 | required: true 19 | 20 | jobs: 21 | promote-charm: 22 | uses: canonical/operator-workflows/.github/workflows/promote_charm.yaml@main 23 | with: 24 | origin-channel: ${{ github.event.inputs.origin-channel }} 25 | destination-channel: ${{ github.event.inputs.destination-channel }} 26 | doc-automation-disabled: true 27 | secrets: inherit 28 | -------------------------------------------------------------------------------- /templates/flags.jinja: -------------------------------------------------------------------------------- 1 | flags: 2 | {% if heartbeat_max_seconds_between_messages is not none %} 3 | - name: heartbeat-max-seconds-between-messages 4 | serve: "{{ heartbeat_max_seconds_between_messages }}" 5 | {% endif %} 6 | {% if heartbeat_fail_sync is not none %} 7 | - name: heartbeat.failSync 8 | serve: {{ heartbeat_fail_sync | lower }} 9 | {% endif %} 10 | {% if destination_timeout_max_seconds is not none or destination_timeout_fail_sync is not none %} 11 | - name: destination-timeout-enabled 12 | serve: true 13 | {% endif %} 14 | {% if destination_timeout_max_seconds is not none %} 15 | - name: destination-timeout.seconds 16 | serve: "{{ destination_timeout_max_seconds }}" 17 | {% endif %} 18 | {% if destination_timeout_fail_sync is not none %} 19 | - name: destination-timeout.failSync 20 | serve: {{ destination_timeout_fail_sync | lower }} 21 | {% endif %} 22 | -------------------------------------------------------------------------------- /tests/integration/temporal_client/workflows.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | 5 | """Temporal client sample workflow.""" 6 | 7 | from datetime import timedelta 8 | 9 | from temporalio import workflow 10 | 11 | # Import our activity, passing it through the sandbox 12 | with workflow.unsafe.imports_passed_through(): 13 | from .activities import say_hello 14 | 15 | 16 | @workflow.defn 17 | class SayHello: 18 | """Temporal workflow class.""" 19 | 20 | @workflow.run 21 | async def run(self, name: str) -> str: 22 | """Workflow execution method. 23 | 24 | Args: 25 | name: used to run the dynamic activity. 26 | 27 | Returns: 28 | Workflow execution 29 | """ 30 | return await workflow.execute_activity(say_hello, name, schedule_to_close_timeout=timedelta(seconds=5)) 31 | -------------------------------------------------------------------------------- /.licenserc.yaml: -------------------------------------------------------------------------------- 1 | header: 2 | license: 3 | spdx-id: Apache-2.0 4 | copyright-owner: Canonical Ltd. 5 | copyright-year: 2024 6 | content: | 7 | Copyright [year] [owner] 8 | See LICENSE file for licensing details. 9 | paths: 10 | - '**' 11 | paths-ignore: 12 | - '.github/**' 13 | - '**/.gitkeep' 14 | - '**/*.cfg' 15 | - '**/*.conf' 16 | - '**/*.j2' 17 | - '**/*.json' 18 | - '**/*.md' 19 | - '**/*.rule' 20 | - '**/*.tmpl' 21 | - '**/*.txt' 22 | - '**/*.jinja' 23 | - '.codespellignore' 24 | - '.dockerignore' 25 | - '.flake8' 26 | - '.jujuignore' 27 | - '.gitignore' 28 | - '.licenserc.yaml' 29 | - '.trivyignore' 30 | - '.woke.yaml' 31 | - '.woke.yml' 32 | - 'CODEOWNERS' 33 | - 'icon.svg' 34 | - 'LICENSE' 35 | - 'trivy.yaml' 36 | - 'lib/**' 37 | - '**/*.patch' 38 | comment: on-failure 39 | -------------------------------------------------------------------------------- /tests/integration/test_charm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2024 Canonical Ltd. 3 | # See LICENSE file for licensing details. 4 | 5 | import logging 6 | 7 | import pytest 8 | import requests 9 | from helpers import APP_NAME_AIRBYTE_SERVER, get_unit_url, run_test_sync_job 10 | from pytest_operator.plugin import OpsTest 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | @pytest.mark.abort_on_fail 16 | @pytest.mark.usefixtures("deploy") 17 | class TestDeployment: 18 | """Integration tests for charm.""" 19 | 20 | async def test_deployment(self, ops_test: OpsTest): 21 | url = await get_unit_url(ops_test, application=APP_NAME_AIRBYTE_SERVER, unit=0, port=8001) 22 | logger.info("curling app address: %s", url) 23 | 24 | response = requests.get(f"{url}/api/v1/health", timeout=300) 25 | 26 | assert response.status_code == 200 27 | assert response.json().get("available") 28 | 29 | async def test_sync_job(self, ops_test: OpsTest): 30 | await run_test_sync_job(ops_test) 31 | -------------------------------------------------------------------------------- /src/log.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | """Define logging helpers.""" 5 | 6 | import functools 7 | 8 | 9 | def log_event_handler(logger): 10 | """Log with the provided logger when a event handler method is executed. 11 | 12 | Args: 13 | logger: logger used to log events. 14 | 15 | Returns: 16 | Decorator wrapper. 17 | """ 18 | 19 | def decorator(method): 20 | """Log decorator wrapper. 21 | 22 | Args: 23 | method: method wrapped by the decorator. 24 | 25 | Returns: 26 | Decorated method. 27 | """ 28 | 29 | @functools.wraps(method) 30 | def decorated(self, event): 31 | """Log decorator method. 32 | 33 | Args: 34 | event: The event triggered when the relation changes. 35 | 36 | Returns: 37 | Decorated method. 38 | """ 39 | logger.info(f"* running {self.__class__.__name__}.{method.__name__}") 40 | try: 41 | return method(self, event) 42 | finally: 43 | logger.info(f"* completed {self.__class__.__name__}.{method.__name__}") 44 | 45 | return decorated 46 | 47 | return decorator 48 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | [tool.bandit] 5 | exclude_dirs = ["/venv/"] 6 | [tool.bandit.assert_used] 7 | skips = ["*/*test.py", "*/test_*.py", "*tests/*.py"] 8 | 9 | # Testing tools configuration 10 | [tool.coverage.run] 11 | branch = true 12 | 13 | [tool.coverage.report] 14 | show_missing = true 15 | 16 | [tool.pytest.ini_options] 17 | minversion = "6.0" 18 | log_cli_level = "INFO" 19 | 20 | # Formatting tools configuration 21 | [tool.black] 22 | line-length = 120 23 | target-version = ["py38"] 24 | 25 | [tool.isort] 26 | profile = "black" 27 | 28 | # Linting tools configuration 29 | [tool.flake8] 30 | max-line-length = 120 31 | max-doc-length = 99 32 | max-complexity = 10 33 | exclude = [".git", "__pycache__", ".tox", "build", "dist", "*.egg_info", "venv"] 34 | select = ["E", "W", "F", "C", "N", "R", "D", "H"] 35 | # Ignore W503, E501 because using black creates errors with this 36 | # Ignore D107 Missing docstring in __init__ 37 | ignore = ["W503", "E501", "D107"] 38 | # D100, D101, D102, D103: Ignore missing docstrings in tests 39 | per-file-ignores = ["tests/*:D100,D101,D102,D103,D104"] 40 | docstring-convention = "google" 41 | # Check for properly formatted copyright header in each file 42 | copyright-check = "True" 43 | copyright-author = "Canonical Ltd." 44 | copyright-regexp = "Copyright\\s\\d{4}([-,]\\d{4})*\\s+%(author)s" 45 | -------------------------------------------------------------------------------- /documentation/explanation/architecture.md: -------------------------------------------------------------------------------- 1 | # Charmed Airbyte Architecture 2 | 3 | The Charmed Airbyte ecosystem consists of a number of different charmed operators related together. The diagram below shows a high-level illustration of the different charms and their communication. The components are as follows: 4 | 5 | ![Architecture](../media/architecture.png) 6 | 7 | # Component Descriptions 8 | 9 | ### Airbyte-k8s 10 | * Runs the server, scheduler and API. 11 | * Uses MinIO as object storage. 12 | * Uses a PostgreSQL database (DBaaS). 13 | * Integrates with: 14 | * OAuth2 Proxy for authentication 15 | * MinIO for blobs, logs, state 16 | * Ingress via the first nginx ingress integrator 17 | 18 | ### OAuth2 Proxy 19 | * Protects the Airbyte behind Google OAuth / GitHub OAuth / SSO. 20 | * Acts as a reverse proxy for the Airbyte. 21 | * Exposed through the same nginx ingress integrator as Airbyte. 22 | 23 | ### Nginx Ingress Integrator 24 | One instance for: 25 | * Airbyte 26 | * OAuth2 Proxy 27 | 28 | This ingress handles: 29 | * HTTP routing 30 | * TLS termination (if TLS secret is configured) 31 | * Source-range allowlist 32 | * Timeout configuration 33 | 34 | ### MinIO 35 | Its purpose is to store state, large logs (objects) and job artifacts 36 | 37 | ### Temporal-k8s 38 | 39 | Orchestration engine powering: 40 | * Job execution 41 | * Retries 42 | * Scheduling 43 | * Long-running sync pipelines 44 | 45 | ### Temporal Admin 46 | Provides: 47 | * Namespace administration 48 | * Workflow debugging tools -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2024 Canonical Ltd. 3 | # See LICENSE file for licensing details. 4 | 5 | """Utilities for the Airbyte charm.""" 6 | 7 | import os 8 | 9 | from jinja2 import Environment, FileSystemLoader 10 | 11 | 12 | def render_template(template_name: str, context: dict) -> str: 13 | """Render a Jinja2 template with the given context. 14 | 15 | Args: 16 | template_name: Name of the template file (e.g., "flags.jinja"). 17 | context: Dictionary of variables to pass to the template. 18 | 19 | Returns: 20 | Rendered template content as a string. 21 | """ 22 | # Get the absolute path of templates directory 23 | charm_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) 24 | templates_path = os.path.join(charm_dir, "templates") 25 | 26 | # Create Jinja2 environment and render template 27 | loader = FileSystemLoader(templates_path) 28 | env = Environment(loader=loader, autoescape=True) 29 | template = env.get_template(template_name) 30 | 31 | return template.render(**context) 32 | 33 | 34 | def use_feature_flags(config: dict) -> bool: 35 | """Determine if feature flags should be used based on environment variable. 36 | 37 | Args: 38 | config: Configuration dictionary containing feature flag settings. 39 | 40 | Returns: 41 | True if feature flags are enabled, False otherwise. 42 | """ 43 | return not all( 44 | [ 45 | config["heartbeat-max-seconds-between-messages"] is None, 46 | config["heartbeat-fail-sync"] is None, 47 | config["destination-timeout-max-seconds"] is None, 48 | config["destination-timeout-fail-sync"] is None, 49 | ] 50 | ) 51 | -------------------------------------------------------------------------------- /documentation/index.md: -------------------------------------------------------------------------------- 1 | [![Charmhub Badge](https://charmhub.io/airbyte-k8s/badge.svg)](https://charmhub.io/airbyte-k8s) 2 | [![Release Edge](https://github.com/canonical/airbyte-k8s-operator/actions/workflows/publish_charm.yaml/badge.svg)](https://github.com/canonical/airbyte-k8s-operator/actions/workflows/publish_charm.yaml) 3 | 4 | **Charmed Airbyte K8s Operator** is an open-source, production-ready data integration platform operator for **Kubernetes**, based on [Airbyte](https://airbyte.io/). 5 | 6 | Airbyte simplifies the process of **extracting and loading data** from various sources into a variety of destinations such as **data warehouses, data lakes, or data meshes**, enabling continuous, scheduled data synchronization to ensure data freshness and reliability. 7 | 8 | The Charmed Airbyte K8s Operator automates the **deployment, configuration, and lifecycle management** of the Airbyte server on Kubernetes using **Juju**. It wraps the official Airbyte server distribution and integrates with other charms to form a complete data ingestion pipeline within the Canonical data ecosystem. 9 | 10 | It is intended for **data engineers and platform teams** who want to automate and scale Airbyte deployments while maintaining consistency and observability across environments. 11 | 12 | ### Features 13 | 14 | - Automated deployment and scaling on Kubernetes 15 | - Seamless integration with PostgreSQL, Temporal, and object storage via Juju relations 16 | - Simple Airbyte UI access for connector configuration and monitoring 17 | - Ingress and authentication integration via Nginx and OAuth2 Proxy charms 18 | - Observability through Juju relation-based configuration 19 | 20 | ### In this documentation 21 | 22 | | Section | Description | 23 | | --- | --- | 24 | | **Tutorial** | **Get started** - A hands-on guide to deploying and configuring Charmed Airbyte, including creating your first data connection | 25 | | **How-to guides** | **Step-by-step guides** - Instructions for common operational tasks and advanced configurations | 26 | | **Reference** | **Technical reference** - Comprehensive details on configuration options, actions, relations, and APIs | -------------------------------------------------------------------------------- /src/relations/airbyte_ui.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | """Define the Airbyte server:ui relation.""" 5 | 6 | import logging 7 | 8 | from ops import framework 9 | from ops.model import ActiveStatus 10 | 11 | from log import log_event_handler 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | class AirbyteServerProvider(framework.Object): 17 | """Client for server:ui relation.""" 18 | 19 | def __init__(self, charm): 20 | """Construct. 21 | 22 | Args: 23 | charm: The charm to attach the hooks to. 24 | """ 25 | super().__init__(charm, "airbyte-server") 26 | self.charm = charm 27 | charm.framework.observe(charm.on.airbyte_server_relation_joined, self._on_airbyte_server_relation_joined) 28 | charm.framework.observe(charm.on.airbyte_server_relation_changed, self._on_airbyte_server_relation_joined) 29 | 30 | @log_event_handler(logger) 31 | def _on_airbyte_server_relation_joined(self, event): 32 | """Handle new server:ui relation. 33 | 34 | Attempt to provide server status to the ui application. 35 | 36 | Args: 37 | event: The event triggered when the relation changed. 38 | """ 39 | if self.charm.unit.is_leader(): 40 | self._provide_server_status() 41 | 42 | def _provide_server_status(self): 43 | """Provide server status to the UI charm.""" 44 | is_active = self.charm.model.unit.status == ActiveStatus() 45 | 46 | ui_relations = self.charm.model.relations["airbyte-server"] 47 | if not ui_relations: 48 | logger.debug("server:ui: not providing server status: ui not ready") 49 | return 50 | for relation in ui_relations: 51 | logger.debug(f"server:ui: providing server status on relation {relation.id}") 52 | relation.data[self.charm.app].update( 53 | { 54 | "server_name": self.charm.app.name, 55 | "server_status": "ready" if is_active else "blocked", 56 | } 57 | ) 58 | -------------------------------------------------------------------------------- /src/state.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | """Manager for handling charm state.""" 5 | 6 | import json 7 | 8 | 9 | class State: 10 | """A magic state that uses a relation as the data store. 11 | 12 | The get_relation callable is used to retrieve the relation. 13 | As relation data values must be strings, all values are JSON encoded. 14 | """ 15 | 16 | def __init__(self, app, get_relation): 17 | """Construct. 18 | 19 | Args: 20 | app: workload application 21 | get_relation: get peer relation method 22 | """ 23 | # Use __dict__ to avoid calling __setattr__ and subsequent infinite recursion. 24 | self.__dict__["_app"] = app 25 | self.__dict__["_get_relation"] = get_relation 26 | 27 | def __setattr__(self, name, value): 28 | """Set a value in the store with the given name. 29 | 30 | Args: 31 | name: name of value to set in store. 32 | value: value to set in store. 33 | """ 34 | v = json.dumps(value) 35 | self._get_relation().data[self._app].update({name: v}) 36 | 37 | def __getattr__(self, name): 38 | """Get from the store the value with the given name, or None. 39 | 40 | Args: 41 | name: name of value to get from store. 42 | 43 | Returns: 44 | value from store with given name. 45 | """ 46 | v = self._get_relation().data[self._app].get(name, "null") 47 | return json.loads(v) 48 | 49 | def __delattr__(self, name): 50 | """Delete the value with the given name from the store, if it exists. 51 | 52 | Args: 53 | name: name of value to delete from store. 54 | 55 | Returns: 56 | deleted value from store. 57 | """ 58 | return self._get_relation().data[self._app].pop(name, None) 59 | 60 | def is_ready(self): 61 | """Report whether the relation is ready to be used. 62 | 63 | Returns: 64 | A boolean representing whether the relation is ready to be used or not. 65 | """ 66 | return bool(self._get_relation()) 67 | -------------------------------------------------------------------------------- /tests/unit/test_state.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | # 4 | # Learn more about testing at: https://juju.is/docs/sdk/testing 5 | 6 | """Charm state unit tests.""" 7 | 8 | import json 9 | from unittest import TestCase 10 | 11 | from state import State 12 | 13 | 14 | class TestState(TestCase): 15 | """Unit tests for state. 16 | 17 | Attrs: 18 | maxDiff: Specifies max difference shown by failed tests. 19 | """ 20 | 21 | maxDiff = None 22 | 23 | def test_get(self): 24 | """It is possible to retrieve attributes from the state.""" 25 | state = make_state({"foo": json.dumps("bar")}) 26 | self.assertEqual(state.foo, "bar") 27 | self.assertIsNone(state.bad) 28 | 29 | def test_set(self): 30 | """It is possible to set attributes in the state.""" 31 | data = {"foo": json.dumps("bar")} 32 | state = make_state(data) 33 | state.foo = 42 34 | state.list = [1, 2, 3] 35 | self.assertEqual(state.foo, 42) 36 | self.assertEqual(state.list, [1, 2, 3]) 37 | self.assertEqual(data, {"foo": "42", "list": "[1, 2, 3]"}) 38 | 39 | def test_del(self): 40 | """It is possible to unset attributes in the state.""" 41 | data = {"foo": json.dumps("bar"), "answer": json.dumps(42)} 42 | state = make_state(data) 43 | del state.foo 44 | self.assertIsNone(state.foo) 45 | self.assertEqual(data, {"answer": "42"}) 46 | # Deleting a name that is not set does not error. 47 | del state.foo 48 | 49 | def test_is_ready(self): 50 | """The state is not ready when it is not possible to get relations.""" 51 | state = make_state({}) 52 | self.assertTrue(state.is_ready()) 53 | 54 | state = State("myapp", lambda: None) 55 | self.assertFalse(state.is_ready()) 56 | 57 | 58 | def make_state(data): 59 | """Create state object. 60 | 61 | Args: 62 | data: Data to be included in state. 63 | 64 | Returns: 65 | State object with data. 66 | """ 67 | app = "myapp" 68 | rel = type("Rel", (), {"data": {app: data}})() 69 | return State(app, lambda: rel) 70 | -------------------------------------------------------------------------------- /documentation/tutorial/04-deploy-airbyte.md: -------------------------------------------------------------------------------- 1 | 2 | # Deploy Charmed Airbyte 3 | 4 | This part of the tutorial explains how to deploy the Charmed Airbyte application and integrate it with its supporting components deployed in the previous step. 5 | 6 | ## 1. Deploy Charmed Airbyte 7 | 8 | Deploy Airbyte using the official charm: 9 | ```bash 10 | juju deploy airbyte-k8s --channel edge --trust 11 | ``` 12 | 13 | Verify the deployment: 14 | ```bash 15 | juju status --watch 2s 16 | ``` 17 | 18 | Initially, Airbyte will be in a **blocked** state with a message such as: 19 | ``` 20 | database relation not ready 21 | ``` 22 | Relations will be added in the next steps. 23 | 24 | ## 2. Integrate Airbyte with MinIO (Object Storage) 25 | 26 | Airbyte requires object storage for logs, artifacts, and state. 27 | 28 | Add the relation between MinIO and Airbyte: 29 | ```bash 30 | juju relate minio airbyte-k8s 31 | ``` 32 | 33 | Expected status after the relation settles: 34 | ``` 35 | airbyte-k8s/0: active | waiting for database connection 36 | ``` 37 | 38 | ## 3. Integrate Airbyte with PostgreSQL (Metadata Database) 39 | 40 | Airbyte depends on PostgreSQL to store metadata, configuration, and job history. 41 | 42 | Add the relation between PostgreSQL and Airbyte: 43 | ```bash 44 | juju relate postgresql-k8s airbyte-k8s 45 | ``` 46 | 47 | Airbyte will transition to a new blocked state until Temporal is related: 48 | ``` 49 | temporal relation not ready 50 | ``` 51 | 52 | ## 4. Integrate Airbyte with Temporal (Workflow Engine) 53 | 54 | Airbyte depends on two Temporal charms: 55 | * `temporal-k8s` — the Temporal workflow engine 56 | * `temporal-admin-k8s` — provides UI and admin capabilities 57 | 58 | Add the relations: 59 | ```bash 60 | juju relate temporal-k8s:db postgresql-k8s:database 61 | juju relate temporal-k8s:visibility postgresql-k8s:database 62 | juju relate temporal-k8s:admin temporal-admin-k8s:admin 63 | ``` 64 | 65 | After all relations and configurations are applied: 66 | 67 | ```bash 68 | juju status 69 | ``` 70 | 71 | All applications (`airbyte-k8s`, `temporal-k8s`, `temporal-admin-k8s`, `postgresql-k8s`, `minio`) should eventually show `active` status. At this point, Airbyte is fully operational. 72 | 73 | ## Next steps 74 | 75 | [Secure Airbyte deployments](../how-to/secure-airbyte-deployments.md) -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: File a bug report 3 | labels: ["Type: Bug", "Status: Triage"] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: > 8 | Thanks for taking the time to fill out this bug report! Before submitting your issue, please make 9 | sure you are using the latest version of the charm. If not, please switch to this image prior to 10 | posting your report to make sure it's not already solved. 11 | - type: textarea 12 | id: bug-description 13 | attributes: 14 | label: Bug Description 15 | description: > 16 | If applicable, add screenshots to help explain the problem you are facing. 17 | validations: 18 | required: true 19 | - type: textarea 20 | id: reproduction 21 | attributes: 22 | label: To Reproduce 23 | description: > 24 | Please provide a step-by-step instruction of how to reproduce the behavior. 25 | placeholder: | 26 | 1. `juju deploy ...` 27 | 2. `juju relate ...` 28 | 3. `juju status --relations` 29 | validations: 30 | required: true 31 | - type: textarea 32 | id: environment 33 | attributes: 34 | label: Environment 35 | description: > 36 | We need to know a bit more about the context in which you run the charm. 37 | - Are you running Juju locally, on lxd, in multipass or on some other platform? 38 | - What track and channel you deployed the charm from (ie. `latest/edge` or similar). 39 | - Version of any applicable components, like the juju snap, the model controller, lxd, microk8s, and/or multipass. 40 | validations: 41 | required: true 42 | - type: textarea 43 | id: logs 44 | attributes: 45 | label: Relevant log output 46 | description: > 47 | Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. 48 | Fetch the logs using `juju debug-log --replay` and `kubectl logs ...`. Additional details available in the juju docs 49 | at https://juju.is/docs/olm/juju-logs 50 | render: shell 51 | validations: 52 | required: true 53 | - type: textarea 54 | id: additional-context 55 | attributes: 56 | label: Additional context 57 | -------------------------------------------------------------------------------- /documentation/tutorial/03-deploy-supporting-charms.md: -------------------------------------------------------------------------------- 1 | # Deploy Supporting Charms 2 | 3 | This part of the tutorial focuses on deploying supporting charms that Airbyte requires for metadata storage, workflow orchestration, and object storage. 4 | 5 | | Requirement | Charm | Purpose | 6 | | ------------------- | -------------------------------------------------------------------------- | ----------------------------------------------------- | 7 | | **Database** | [`postgresql-k8s`](https://charmhub.io/postgresql-k8s) | Stores metadata, job configurations, and sync history | 8 | | **Workflow Engine** | [`temporal-k8s`](https://charmhub.io/temporal-k8s) | Manages task queues and workflow execution | 9 | | **Admin UI** | [`temporal-admin-k8s`](https://charmhub.io/temporal-admin-k8s) | Manages Temporal namespaces and admin tasks | 10 | | **Object Storage** | [`minio`](https://charmhub.io/minio) or [`S3 Integrator`](https://charmhub.io/s3-integrator) | Stores sync logs, state, and artifacts | 11 | | **Ingress** | [`nginx-ingress-integrator`](https://charmhub.io/nginx-ingress-integrator) | Provides TLS termination and routing | 12 | 13 | > Note: Either MinIO or S3 Integrator can be used; not both. 14 | 15 | ## Deploy PostgreSQL 16 | 17 | ```bash 18 | juju deploy postgresql-k8s --channel 14/edge --trust 19 | juju status --watch 2s 20 | ``` 21 | 22 | > Deployment may take ~10 minutes. Expect `active` status for all units once complete. 23 | 24 | ## Deploy MinIO 25 | 26 | ```bash 27 | juju deploy minio --channel edge 28 | juju status --watch 2s 29 | ``` 30 | 31 | > Deployment completes when all units are `active`. 32 | 33 | ## Deploy Temporal 34 | 35 | ```bash 36 | juju deploy temporal-k8s --config num-history-shards=4 # This value can be set to 1024 or 2048 for a production deployment 37 | juju deploy temporal-admin-k8s 38 | juju status --watch 2s 39 | ``` 40 | 41 | > Temporal requires `num-history-shards` to be a power of 2. 42 | 43 | Ignore temporary `blocked` messages; they will be resolved once relations are added in the next step. 44 | 45 | ## Deploy Nginx Ingress Integrator 46 | 47 | ```bash 48 | juju deploy nginx-ingress-integrator --trust 49 | juju status --watch 2s 50 | ``` 51 | 52 | **See next: 53 | [Deploy Charmed Airbyte](./04-deploy-airbyte.md)** -------------------------------------------------------------------------------- /src/relations/postgresql.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | """Define the Airbyte server postgresql relation.""" 5 | 6 | import logging 7 | 8 | from charms.data_platform_libs.v0.database_requires import DatabaseEvent 9 | from ops import framework 10 | from ops.model import WaitingStatus 11 | 12 | from literals import DB_NAME 13 | from log import log_event_handler 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class PostgresqlRelation(framework.Object): 19 | """Client for airbyte:postgresql relations.""" 20 | 21 | def __init__(self, charm): 22 | """Construct. 23 | 24 | Args: 25 | charm: The charm to attach the hooks to. 26 | """ 27 | super().__init__(charm, "db") 28 | self.charm = charm 29 | 30 | charm.framework.observe(charm.db.on.database_created, self._on_database_changed) 31 | charm.framework.observe(charm.db.on.endpoints_changed, self._on_database_changed) 32 | charm.framework.observe(charm.on.db_relation_broken, self._on_database_relation_broken) 33 | 34 | @log_event_handler(logger) 35 | def _on_database_changed(self, event: DatabaseEvent) -> None: 36 | """Handle database creation/change events. 37 | 38 | Args: 39 | event: The event triggered when the relation changed. 40 | """ 41 | if not self.charm.unit.is_leader(): 42 | return 43 | 44 | if not self.charm._state.is_ready(): 45 | event.defer() 46 | return 47 | 48 | self.charm.unit.status = WaitingStatus(f"handling {event.relation.name} change") 49 | host, port = event.endpoints.split(",", 1)[0].split(":") 50 | 51 | self.charm._state.database_connection = { 52 | "dbname": DB_NAME, 53 | "host": host, 54 | "port": port, 55 | "password": event.password, 56 | "user": event.username, 57 | } 58 | 59 | self.charm._update(event) 60 | 61 | @log_event_handler(logger) 62 | def _on_database_relation_broken(self, event: DatabaseEvent) -> None: 63 | """Handle broken relations with the database. 64 | 65 | Args: 66 | event: The event triggered when the relation changed. 67 | """ 68 | if not self.charm.unit.is_leader(): 69 | return 70 | 71 | if not self.charm._state.is_ready(): 72 | event.defer() 73 | return 74 | 75 | self.charm._state.database_connection = None 76 | self.charm._update(event) 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Charmhub Badge](https://charmhub.io/airbyte-k8s/badge.svg)](https://charmhub.io/airbyte-k8s) 2 | [![Release Edge](https://github.com/canonical/airbyte-k8s-operator/actions/workflows/publish_charm.yaml/badge.svg)](https://github.com/canonical/airbyte-k8s-operator/actions/workflows/publish_charm.yaml) 3 | 4 | **Charmed Airbyte K8s Operator** is an open-source, production-ready data integration platform operator for **Kubernetes**, based on [Airbyte](https://airbyte.io/). 5 | 6 | Airbyte simplifies the process of **extracting and loading data** from various sources into a variety of destinations such as **data warehouses, data lakes, or data meshes**, enabling continuous, scheduled data synchronization to ensure data freshness and reliability. 7 | 8 | The Charmed Airbyte K8s Operator automates the **deployment, configuration, and lifecycle management** of the Airbyte server on Kubernetes using **Juju**. It wraps the official Airbyte server distribution and integrates with other charms to form a complete data ingestion pipeline within the Canonical data ecosystem. 9 | 10 | It is intended for **data engineers and platform teams** who want to automate and scale Airbyte deployments while maintaining consistency and observability across environments. 11 | 12 | ### Key Dependencies 13 | 14 | | Requirement | Charm | Purpose | 15 | | --- | --- | --- | 16 | | **Database** | [`postgresql-k8s`](https://charmhub.io/postgresql-k8s) | Stores Airbyte metadata, job configurations, and sync history | 17 | | **Workflow Engine** | [`temporal-k8s`](https://charmhub.io/temporal-k8s) | Manages task queues and workflow execution | 18 | | **Object Storage** | [`minio`](https://charmhub.io/minio) or [`s3-integrator`](https://charmhub.io/s3-integrator) | Stores sync logs, state, and artifacts | 19 | 20 | > Note: Either MinIO or S3 Integrator can be used as the object store; not both. 21 | 22 | ### Features 23 | 24 | - Automated deployment and scaling on Kubernetes 25 | - Seamless integration with PostgreSQL, Temporal, and object storage via Juju relations 26 | - Simple Airbyte UI access for connector configuration and monitoring 27 | - Ingress and authentication integration via Nginx and OAuth2 Proxy charms 28 | - Observability through Juju relation-based configuration 29 | 30 | ### In this documentation 31 | 32 | | Section | Description | 33 | | --- | --- | 34 | | **Tutorial** | A hands-on guide to deploying and configuring Charmed Airbyte for new users | 35 | | **How-to guides** | Step-by-step instructions for common operational tasks, such as ingress, authentication, and upgrades | 36 | | **Reference** | Technical details on configuration options, actions, and relations | -------------------------------------------------------------------------------- /documentation/tutorial/02-environment-setup.md: -------------------------------------------------------------------------------- 1 | # Setup your environment 2 | 3 | This part of the tutorial focuses on how to set up your environment and install the required dependencies. 4 | 5 | ## Set up MicroK8s 6 | 7 | Charmed Airbyte relies on Kubernetes (K8s) as a container orchestration system. 8 | For this tutorial, you will use [MicroK8s](https://microk8s.io/docs), a lightweight distribution of K8s. 9 | 10 | Install MicroK8s and provide your user with the required permissions. You can do so by adding it to the `snap_microk8s` group and giving permissions to the `~/.kube` directory: 11 | 12 | ```bash 13 | sudo snap install microk8s --channel 1.34-strict/stable 14 | newgrp snap_microk8s 15 | sudo usermod -a -G snap_microk8s $USER 16 | sudo chown -f -R $USER ~/.kube 17 | ``` 18 | 19 | Enable the necessary MicroK8s add-ons as follows: 20 | ```bash 21 | sudo microk8s enable hostpath-storage dns 22 | ``` 23 | For ease in future use, you can set up a short alias for the Kubernetes CLI with: 24 | ```bash 25 | sudo snap alias microk8s.kubectl kubectl 26 | ``` 27 | 28 | ## Set up Juju 29 | 30 | Charmed Airbyte uses Juju as the orchestration engine for software operators. Install and connect it to your MicroK8s cloud with the following steps. 31 | 32 | Firstly, install `juju` from a snap: 33 | ```bash 34 | sudo snap install juju --channel 3.6/stable 35 | ``` 36 | 37 | [note] 38 | This charm requires juju with channel >= 3.1. 39 | [/note] 40 | 41 | Since the Juju package is strictly confined, you also need to manually create a path: 42 | ```bash 43 | mkdir -p ~/.local/share 44 | ``` 45 | Juju recognises a MicroK8s cloud automatically, as you can see by running `juju clouds`: 46 | ```bash 47 | # >>> Cloud Regions Default Type Credentials Source Description 48 | # >>> localhost 1 localhost lxd 0 built-in LXD Container Hypervisor 49 | # >>> microk8s 1 localhost k8s 1 built-in A Kubernetes Cluster 50 | ``` 51 | If for any reason MicroK8s is not recognised, register it manually using `juju add-k8s microk8s`. 52 | 53 | Next, install a Juju controller into your MicroK8s cloud. For this example, the controller is named "airbyte-controller": 54 | 55 | ```bash 56 | juju bootstrap microk8s airbyte-controller 57 | ``` 58 | 59 | Finally, create a model on this controller. For this example, the model is named "airbyte-model". Juju will create a Kubernetes namespace "airbyte-model": 60 | ```bash 61 | juju add-model airbyte-model 62 | ``` 63 | After this, you should see something similar to the below when running `juju status`: 64 | ```bash 65 | # >>> Model Controller Cloud/Region Version SLA Timestamp 66 | # >>> airbyte-model airbyte-controller microk8s/localhost 3.6.12 unsupported 12:45:50+03:00 67 | 68 | # >>> Model "admin/airbyte-model" is empty. 69 | ``` 70 | 71 | **See next: 72 | [Deploy supporting charms](./03-deploy-supporting-charms.md)** -------------------------------------------------------------------------------- /tests/integration/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | """Charm integration test config.""" 5 | 6 | import logging 7 | from pathlib import Path 8 | 9 | import pytest 10 | import pytest_asyncio 11 | from helpers import ( 12 | APP_NAME_AIRBYTE_SERVER, 13 | APP_NAME_TEMPORAL_ADMIN, 14 | APP_NAME_TEMPORAL_SERVER, 15 | create_default_namespace, 16 | perform_airbyte_integrations, 17 | perform_temporal_integrations, 18 | run_sample_workflow, 19 | ) 20 | from pytest import FixtureRequest 21 | from pytest_operator.plugin import OpsTest 22 | 23 | logger = logging.getLogger(__name__) 24 | 25 | 26 | @pytest.fixture(scope="module", name="charm_image") 27 | def charm_image_fixture(request: FixtureRequest) -> str: 28 | """The OCI image for charm.""" 29 | charm_image = request.config.getoption("--airbyte-image") 30 | assert charm_image, "--airbyte-image argument is required which should contain the name of the OCI image." 31 | return charm_image 32 | 33 | 34 | @pytest_asyncio.fixture(scope="module", name="charm") 35 | async def charm_fixture(request: FixtureRequest, ops_test: OpsTest) -> str | Path: 36 | """Fetch the path to charm.""" 37 | charms = request.config.getoption("--charm-file") 38 | if not charms: 39 | charm = await ops_test.build_charm(".") 40 | assert charm, "Charm not built" 41 | return charm 42 | return charms[0] 43 | 44 | 45 | @pytest_asyncio.fixture(name="deploy", scope="module") 46 | async def deploy(ops_test: OpsTest, charm: str, charm_image: str): 47 | """Test the app is up and running.""" 48 | await ops_test.model.set_config({"update-status-hook-interval": "1m"}) 49 | # resources = get_airbyte_charm_resources() 50 | resources = {"airbyte-image": charm_image} 51 | 52 | await ops_test.model.deploy(charm, resources=resources, application_name=APP_NAME_AIRBYTE_SERVER, trust=True) 53 | await ops_test.model.deploy( 54 | APP_NAME_TEMPORAL_SERVER, 55 | channel="edge", 56 | config={"num-history-shards": 4}, 57 | ) 58 | await ops_test.model.deploy(APP_NAME_TEMPORAL_ADMIN, channel="edge") 59 | await ops_test.model.deploy("postgresql-k8s", channel="14/stable", trust=True, revision=381) 60 | await ops_test.model.deploy("minio", channel="edge") 61 | 62 | async with ops_test.fast_forward(): 63 | await ops_test.model.wait_for_idle( 64 | apps=["postgresql-k8s", "minio"], 65 | status="active", 66 | raise_on_blocked=False, 67 | timeout=1200, 68 | ) 69 | await ops_test.model.wait_for_idle( 70 | apps=[APP_NAME_TEMPORAL_SERVER, APP_NAME_TEMPORAL_ADMIN], 71 | status="blocked", 72 | raise_on_blocked=False, 73 | timeout=600, 74 | ) 75 | 76 | await perform_temporal_integrations(ops_test) 77 | await create_default_namespace(ops_test) 78 | await run_sample_workflow(ops_test) 79 | 80 | await perform_airbyte_integrations(ops_test) 81 | -------------------------------------------------------------------------------- /tests/unit/test_template_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2024 Canonical Ltd. 3 | # See LICENSE file for licensing details. 4 | 5 | """Unit tests for template rendering utilities.""" 6 | 7 | import unittest 8 | 9 | from utils import render_template 10 | 11 | 12 | class TestTemplateUtils(unittest.TestCase): 13 | """Test template rendering functionality.""" 14 | 15 | def test_render_flags_template_all_values(self): 16 | """Test rendering flags template with all values set.""" 17 | context = { 18 | "heartbeat_max_seconds_between_messages": 3600, 19 | "heartbeat_fail_sync": True, 20 | "destination_timeout_max_seconds": 86400, 21 | "destination_timeout_fail_sync": False, 22 | } 23 | 24 | output = render_template("flags.jinja", context) 25 | 26 | self.assertIn("flags:", output) 27 | self.assertIn("heartbeat-max-seconds-between-messages", output) 28 | self.assertIn('serve: "3600"', output) 29 | self.assertIn("heartbeat.failSync", output) 30 | self.assertIn("serve: true", output) 31 | self.assertIn("destination-timeout-enabled", output) 32 | self.assertIn("destination-timeout.seconds", output) 33 | self.assertIn('serve: "86400"', output) 34 | self.assertIn("destination-timeout.failSync", output) 35 | self.assertIn("serve: false", output) 36 | 37 | def test_render_flags_template_heartbeat_only(self): 38 | """Test rendering flags template with only heartbeat values.""" 39 | context = { 40 | "heartbeat_max_seconds_between_messages": 1800, 41 | "heartbeat_fail_sync": None, 42 | "destination_timeout_max_seconds": None, 43 | "destination_timeout_fail_sync": None, 44 | } 45 | 46 | output = render_template("flags.jinja", context) 47 | 48 | self.assertIn("flags:", output) 49 | self.assertIn("heartbeat-max-seconds-between-messages", output) 50 | self.assertIn('serve: "1800"', output) 51 | self.assertNotIn("heartbeat.failSync", output) 52 | self.assertNotIn("destination-timeout", output) 53 | 54 | def test_render_flags_template_destination_timeout_only(self): 55 | """Test rendering flags template with only destination timeout values.""" 56 | context = { 57 | "heartbeat_max_seconds_between_messages": None, 58 | "heartbeat_fail_sync": None, 59 | "destination_timeout_max_seconds": 43200, 60 | "destination_timeout_fail_sync": True, 61 | } 62 | 63 | output = render_template("flags.jinja", context) 64 | 65 | self.assertIn("flags:", output) 66 | self.assertIn("destination-timeout-enabled", output) 67 | self.assertIn("destination-timeout.seconds", output) 68 | self.assertIn('serve: "43200"', output) 69 | self.assertIn("destination-timeout.failSync", output) 70 | self.assertIn("serve: true", output) 71 | self.assertNotIn("heartbeat-max-seconds-between-messages", output) 72 | self.assertNotIn("heartbeat.failSync", output) 73 | 74 | 75 | if __name__ == "__main__": 76 | unittest.main() 77 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | [tox] 5 | no_package = True 6 | skip_missing_interpreters = True 7 | env_list = fmt, lint, static, unit, coverage-report 8 | min_version = 4.0.0 9 | max-line-length=120 10 | 11 | [vars] 12 | src_path = {tox_root}/src 13 | tests_path = {tox_root}/tests 14 | all_path = {[vars]src_path} {[vars]tests_path} 15 | 16 | [testenv] 17 | set_env = 18 | PYTHONPATH = {tox_root}/lib:{[vars]src_path} 19 | PYTHONBREAKPOINT=pdb.set_trace 20 | PY_COLORS=1 21 | pass_env = 22 | PYTHONPATH 23 | CHARM_BUILD_DIR 24 | MODEL_SETTINGS 25 | 26 | [testenv:fmt] 27 | description = Format the code 28 | deps = 29 | black==22.8.0 30 | isort==5.10.1 31 | commands = 32 | isort {[vars]src_path} {[vars]tests_path} 33 | black {[vars]src_path} {[vars]tests_path} 34 | 35 | [testenv:lint] 36 | description = Lint the code 37 | deps = 38 | mypy 39 | pylint 40 | pydocstyle 41 | pytest 42 | black==22.8.0 43 | codespell==2.2.1 44 | flake8==5.0.4 45 | flake8-builtins==1.5.3 46 | flake8-copyright==0.2.3 47 | flake8-docstrings==1.6.0 48 | isort==5.10.1 49 | pep8-naming==0.13.2 50 | pyproject-flake8==5.0.4.post1 51 | flake8-docstrings-complete>=1.0.3 52 | flake8-test-docs>=1.0 53 | commands = 54 | pydocstyle {[vars]src_path} 55 | codespell {toxinidir} --skip {toxinidir}/.git --skip {toxinidir}/.tox \ 56 | --skip {toxinidir}/build --skip {toxinidir}/lib --skip {toxinidir}/venv \ 57 | --skip {toxinidir}/.mypy_cache --skip {toxinidir}/icon.svg 58 | pflake8 {[vars]src_path} {[vars]tests_path} 59 | isort --check-only --diff {[vars]src_path} {[vars]tests_path} 60 | black --check --diff {[vars]src_path} {[vars]tests_path} 61 | mypy {[vars]all_path} --ignore-missing-imports --follow-imports=skip --install-types --non-interactive 62 | pylint {[vars]src_path} {[vars]tests_path} --disable=E0401,W1203,W0613,W0718,R0903,W1514,C0103,R0913,C0301,W0212,R0902,C0104,W0640,R0801,W0511,R0914,R0912,E1120 63 | 64 | 65 | [testenv:unit] 66 | description = Run unit tests 67 | deps = 68 | pytest 69 | coverage[toml] 70 | -r {tox_root}/requirements.txt 71 | commands = 72 | coverage run --source={[vars]src_path} \ 73 | -m pytest \ 74 | --tb native \ 75 | -v \ 76 | -s \ 77 | {posargs} \ 78 | {[vars]tests_path}/unit 79 | coverage report 80 | 81 | [testenv:coverage-report] 82 | description = Create test coverage report 83 | deps = 84 | coverage[toml] 85 | pytest 86 | -r{toxinidir}/requirements.txt 87 | commands = 88 | coverage report 89 | 90 | [testenv:static] 91 | description = Run static analysis tests 92 | deps = 93 | bandit[toml] 94 | -r{toxinidir}/requirements.txt 95 | commands = 96 | bandit -c {toxinidir}/pyproject.toml -r {[vars]src_path} {[vars]tests_path} 97 | 98 | [testenv:integration] 99 | description = Run integration tests 100 | deps = 101 | ipdb==0.13.9 102 | juju==3.5.2.1 103 | pytest==7.1.3 104 | pytest-operator==0.35.0 105 | temporalio==1.6.0 106 | pytest-asyncio==0.21 107 | -r{toxinidir}/requirements.txt 108 | commands = 109 | pytest -v \ 110 | -s \ 111 | --tb native \ 112 | --log-cli-level=INFO \ 113 | {posargs} \ 114 | {[vars]tests_path}/integration/test_charm.py 115 | -------------------------------------------------------------------------------- /src/s3_helpers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | """S3 helpers.""" 5 | 6 | import logging 7 | 8 | import boto3 9 | from botocore.exceptions import ClientError 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class S3Client: 15 | """Client for S3 operations.""" 16 | 17 | def __init__(self, s3_parameters): 18 | """Initialize an S3 connection using the provided parameters. 19 | 20 | Args: 21 | s3_parameters: S3 connection parameters. 22 | 23 | Raises: 24 | ValueError: If a session fails to be created. 25 | """ 26 | self.s3_parameters = s3_parameters 27 | endpoint = s3_parameters.get("endpoint") 28 | session = boto3.session.Session( 29 | aws_access_key_id=s3_parameters.get("access-key"), 30 | aws_secret_access_key=s3_parameters.get("secret-key"), 31 | region_name=s3_parameters.get("region"), # Region can be optional for MinIO 32 | ) 33 | try: 34 | self.s3_resource = session.resource("s3", endpoint_url=endpoint) 35 | self.s3_client = session.client("s3", endpoint_url=endpoint) 36 | except Exception as e: 37 | logger.exception("Failed to create a session in region=%s.", s3_parameters.get("region")) 38 | raise ValueError("Failed to create a session") from e 39 | 40 | def create_bucket_if_not_exists(self, bucket_name): 41 | """Create the S3 bucket if it does not exist. 42 | 43 | Args: 44 | bucket_name (str): name of bucket to create 45 | 46 | Raises: 47 | e (ValueError): if a session could not be created. 48 | error (ClientError): if the bucket could not be created. 49 | """ 50 | region = self.s3_parameters.get("region") 51 | s3_bucket = self.s3_resource.Bucket(bucket_name) 52 | try: 53 | s3_bucket.meta.client.head_bucket(Bucket=bucket_name) 54 | logger.info("Bucket %s exists. Skipping creation.", bucket_name) 55 | exists = True 56 | except ClientError as e: 57 | error_code = int(e.response["Error"]["Code"]) 58 | if error_code == 404: 59 | logger.warning("Bucket %s doesn't exist or you don't have access to it.", bucket_name) 60 | exists = False 61 | else: 62 | logger.exception("Unexpected error: %s", e) 63 | raise 64 | 65 | if not exists: 66 | try: 67 | s3_bucket.create() 68 | s3_bucket.wait_until_exists() 69 | logger.info("Created bucket '%s' in region=%s", bucket_name, region) 70 | except ClientError as error: 71 | logger.exception("Couldn't create bucket named '%s' in region=%s.", bucket_name, region) 72 | raise error 73 | 74 | def set_bucket_lifecycle_policy(self, bucket_name, ttl): 75 | """Set lifecycle policy of bucket to purge files after a certain time. 76 | 77 | Args: 78 | bucket_name: Name of bucket. 79 | ttl: Time to live of logs (in days). 80 | """ 81 | lifecycle_policy = { 82 | "Rules": [ 83 | { 84 | "Expiration": {"Days": ttl}, 85 | "Filter": {"Prefix": ""}, 86 | "Status": "Enabled", 87 | "ID": "ttl", 88 | } 89 | ] 90 | } 91 | self.s3_client.put_bucket_lifecycle_configuration(Bucket=bucket_name, LifecycleConfiguration=lifecycle_policy) 92 | -------------------------------------------------------------------------------- /src/literals.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | """Charm literals.""" 5 | 6 | CONNECTOR_BUILDER_SERVER_API_PORT = 80 7 | INTERNAL_API_PORT = 8001 8 | AIRBYTE_API_PORT = 8006 9 | WORKLOAD_API_PORT = 8007 10 | WORKLOAD_LAUNCHER_PORT = 8016 11 | AIRBYTE_VERSION = "1.7.0" 12 | DB_NAME = "airbyte-k8s_db" 13 | AIRBYTE_AUTH_K8S_SECRET_NAME = "airbyte-auth-secrets" # nosec 14 | 15 | 16 | CONTAINER_HEALTH_CHECK_MAP = { 17 | "airbyte-workload-api-server": { 18 | "port": WORKLOAD_API_PORT, 19 | "health_endpoint": "/health", 20 | }, 21 | "airbyte-workload-launcher": { 22 | "port": WORKLOAD_LAUNCHER_PORT, 23 | "health_endpoint": "/health", 24 | }, 25 | "airbyte-bootloader": None, 26 | "airbyte-connector-builder-server": None, 27 | "airbyte-cron": { 28 | "port": 9001, 29 | "health_endpoint": "/health", 30 | }, 31 | "airbyte-pod-sweeper": None, 32 | "airbyte-server": { 33 | "port": INTERNAL_API_PORT, 34 | "health_endpoint": "/api/v1/health", 35 | }, 36 | "airbyte-workers": {"port": 9000, "health_endpoint": "/"}, 37 | } 38 | 39 | REQUIRED_S3_PARAMETERS = ["region", "endpoint", "access-key", "secret-key"] 40 | BUCKET_CONFIGS = [ 41 | "storage-bucket-logs", 42 | "storage-bucket-state", 43 | "storage-bucket-activity-payload", 44 | "storage-bucket-workload-output", 45 | ] 46 | LOGS_BUCKET_CONFIG = "storage-bucket-logs" 47 | 48 | BASE_ENV = { 49 | "API_URL": "/api/v1/", 50 | "AIRBYTE_VERSION": AIRBYTE_VERSION, 51 | "AIRBYTE_EDITION": "community", 52 | "AUTO_DETECT_SCHEMA": "true", 53 | "WORKSPACE_ROOT": "/workspace", 54 | "CONFIG_ROOT": "/configs", 55 | "CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION": "0.35.15.001", 56 | "JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION": "0.29.15.001", 57 | "MICRONAUT_ENVIRONMENTS": "control-plane", 58 | "WORKERS_MICRONAUT_ENVIRONMENTS": "control-plane", 59 | "CRON_MICRONAUT_ENVIRONMENTS": "control-plane", 60 | "WORKLOAD_API_HOST": "localhost", 61 | "MICROMETER_METRICS_ENABLED": "false", 62 | "LAUNCHER_MICRONAUT_ENVIRONMENTS": "control-plane,oss", 63 | "KEYCLOAK_INTERNAL_HOST": "localhost", 64 | "WORKER_ENVIRONMENT": "kubernetes", 65 | "SHOULD_RUN_NOTIFY_WORKFLOWS": "true", 66 | "CONNECTOR_BUILDER_API_URL": "/connector-builder-api", 67 | "TEMPORAL_WORKER_PORTS": "9001,9002,9003,9004,9005,9006,9007,9008,9009,9010,9011,9012,9013,9014,9015,9016,9017,9018,9019,9020,9021,9022,9023,9024,9025,9026,9027,9028,9029,9030", 68 | "CONTAINER_ORCHESTRATOR_ENABLED": "true", 69 | "CONTAINER_ORCHESTRATOR_IMAGE": f"airbyte/container-orchestrator:{AIRBYTE_VERSION}", 70 | "CONNECTOR_PROFILER_IMAGE": f"airbyte/async-profiler:{AIRBYTE_VERSION}", 71 | "CONNECTOR_SIDECAR_IMAGE": f"airbyte/connector-sidecar:{AIRBYTE_VERSION}", 72 | "WORKLOAD_INIT_IMAGE": f"airbyte/workload-init-container:{AIRBYTE_VERSION}", 73 | "LOG4J_CONFIGURATION_FILE": "log4j2-minio.xml", 74 | "ENTERPRISE_SOURCE_STUBS_URL": "https://connectors.airbyte.com/files/resources/connector_stubs/v0/connector_stubs.json", 75 | "PUB_SUB_ENABLED": "false", 76 | "PUB_SUB_TOPIC_NAME": "", 77 | "DATA_PLANE_ID": "local", 78 | "LOCAL_ROOT": "/tmp/airbyte_local", # nosec 79 | "RUN_DATABASE_MIGRATION_ON_STARTUP": "true", 80 | "API_AUTHORIZATION_ENABLED": "false", 81 | "DATAPLANE_CLIENT_ID_SECRET_NAME": "airbyte-auth-secrets", 82 | "DATAPLANE_CLIENT_ID_SECRET_KEY": "dataplane-client-id", 83 | "DATAPLANE_CLIENT_SECRET_SECRET_NAME": "airbyte-auth-secrets", 84 | "DATAPLANE_CLIENT_SECRET_SECRET_KEY": "dataplane-client-secret", 85 | } 86 | 87 | FLAGS_FILE_PATH = "/flags" 88 | -------------------------------------------------------------------------------- /airbyte_rock/local-files/pod-sweeper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2024 Canonical Ltd. 3 | # See LICENSE file for licensing details. 4 | 5 | # https://github.com/airbytehq/airbyte-platform/blob/v1.3.0/charts/airbyte-pod-sweeper/templates/configmap.yaml 6 | 7 | get_job_pods() { 8 | # echo "Running kubectl command to get job pods..." 9 | kubectl -n "${JOB_KUBE_NAMESPACE}" -L airbyte -l airbyte=job-pod \ 10 | get pods \ 11 | -o=jsonpath='{range .items[*]} {.metadata.name} {.status.phase} {.status.conditions[0].lastTransitionTime} {.status.startTime}{"\n"}{end}' 12 | } 13 | 14 | # Useful function when debugging 15 | fetch_pod_logs() { 16 | pod_name="$1" 17 | echo "Fetching logs for pod: ${pod_name}" 18 | kubectl -n "${JOB_KUBE_NAMESPACE}" describe pod "$pod_name" 19 | kubectl -n "${JOB_KUBE_NAMESPACE}" get pod "$pod_name" -o yaml | grep serviceAccount 20 | kubectl -n "${JOB_KUBE_NAMESPACE}" logs "$pod_name" 21 | kubectl -n "${JOB_KUBE_NAMESPACE}" logs "$pod_name" -c init 22 | kubectl -n "${JOB_KUBE_NAMESPACE}" logs "$pod_name" -c main 23 | } 24 | 25 | delete_pod() { 26 | printf "From status '%s' since '%s', " "$2" "$3" 27 | echo "$1" | grep -v "STATUS" | awk '{print $1}' | xargs --no-run-if-empty kubectl -n "${JOB_KUBE_NAMESPACE}" delete pod 28 | } 29 | 30 | while : 31 | do 32 | echo "Starting pod sweeper cycle:" 33 | 34 | if [ -n "${RUNNING_TTL_MINUTES}" ]; then 35 | # Time window for running pods 36 | RUNNING_DATE_STR=$(date -d "now - ${RUNNING_TTL_MINUTES} minutes" --utc -Ins) 37 | RUNNING_DATE=$(date -d "${RUNNING_DATE_STR}" +%s) 38 | echo "Will sweep running pods from before ${RUNNING_DATE_STR}" 39 | fi 40 | 41 | if [ -n "${SUCCEEDED_TTL_MINUTES}" ]; then 42 | # Shorter time window for succeeded pods 43 | SUCCESS_DATE_STR=$(date -d "now - ${SUCCEEDED_TTL_MINUTES} minutes" --utc -Ins) 44 | SUCCESS_DATE=$(date -d "${SUCCESS_DATE_STR}" +%s) 45 | echo "Will sweep succeeded pods from before ${SUCCESS_DATE_STR}" 46 | fi 47 | 48 | if [ -n "${UNSUCCESSFUL_TTL_MINUTES}" ]; then 49 | # Longer time window for unsuccessful pods (to debug) 50 | NON_SUCCESS_DATE_STR=$(date -d "now - ${UNSUCCESSFUL_TTL_MINUTES} minutes" --utc -Ins) 51 | NON_SUCCESS_DATE=$(date -d "${NON_SUCCESS_DATE_STR}" +%s) 52 | echo "Will sweep unsuccessful pods from before ${NON_SUCCESS_DATE_STR}" 53 | fi 54 | 55 | echo "Running kubectl command to get job pods..." 56 | get_job_pods | while read -r POD; do 57 | IFS=' ' read -r POD_NAME POD_STATUS POD_DATE_STR POD_START_DATE_STR <<< "$POD" 58 | 59 | POD_DATE=$(date -d "${POD_DATE_STR:-$POD_START_DATE_STR}" '+%s') 60 | echo "Evaluating pod: $POD_NAME with status $POD_STATUS since $POD_DATE_STR" 61 | 62 | if [ -n "${RUNNING_TTL_MINUTES}" ] && [ "$POD_STATUS" = "Running" ]; then 63 | if [ "$POD_DATE" -lt "$RUNNING_DATE" ]; then 64 | delete_pod "$POD_NAME" "$POD_STATUS" "$POD_DATE_STR" 65 | fi 66 | elif [ -n "${SUCCEEDED_TTL_MINUTES}" ] && { [[ "$POD_STATUS" = "Succeeded" ]] || [[ "$POD_STATUS" = "Completed" ]]; }; then 67 | if [ "$POD_DATE" -lt "$SUCCESS_DATE" ]; then 68 | delete_pod "$POD_NAME" "$POD_STATUS" "$POD_DATE_STR" 69 | fi 70 | elif [ -n "${UNSUCCESSFUL_TTL_MINUTES}" ] && [ "$POD_STATUS" != "Running" ] && [ "$POD_STATUS" != "Succeeded" ]; then 71 | if [ "$POD_DATE" -lt "$NON_SUCCESS_DATE" ]; then 72 | delete_pod "$POD_NAME" "$POD_STATUS" "$POD_DATE_STR" 73 | fi 74 | fi 75 | done 76 | 77 | echo "Completed pod sweeper cycle. Sleeping for 60 seconds..." 78 | sleep 60 79 | done 80 | -------------------------------------------------------------------------------- /tests/unit/test_structured_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2024 Canonical Ltd. 3 | # See LICENSE file for licensing details. 4 | 5 | """Structured config unit tests.""" 6 | 7 | import base64 8 | import logging 9 | from unittest import TestCase 10 | from unittest.mock import MagicMock, patch 11 | 12 | import pytest 13 | from ops.testing import Harness 14 | 15 | from charm import AirbyteK8SOperatorCharm 16 | from src.literals import CONTAINER_HEALTH_CHECK_MAP 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | 21 | class TestCharmStructuredConfig(TestCase): 22 | """Unit tests. 23 | 24 | Attrs: 25 | maxDiff: Specifies max difference shown by failed tests. 26 | """ 27 | 28 | maxDiff = None 29 | 30 | def setUp(self): 31 | """Set up for the unit tests.""" 32 | patcher1 = patch("kubernetes.config.load_incluster_config") 33 | patcher2 = patch("kubernetes.client.CoreV1Api") 34 | self.mock_incluster_config = patcher1.start() 35 | self.mock_k8s_api = patcher2.start() 36 | self.addCleanup(patcher1.stop) 37 | self.addCleanup(patcher2.stop) 38 | self.mock_core_v1_instance = MagicMock() 39 | self.mock_k8s_api.return_value = self.mock_core_v1_instance 40 | 41 | fake_secret = MagicMock() 42 | fake_secret.data = { 43 | "dataplane-client-id": base64.b64encode(b"sample-client-id"), 44 | "dataplane-client-secret": base64.b64encode(b"sample-client-secret"), 45 | } 46 | 47 | self.mock_core_v1_instance.read_namespaced_secret.return_value = fake_secret 48 | 49 | self.harness = Harness(AirbyteK8SOperatorCharm) 50 | self.addCleanup(self.harness.cleanup) 51 | for container_name in CONTAINER_HEALTH_CHECK_MAP: 52 | self.harness.set_can_connect(container_name, True) 53 | self.harness.set_leader(True) 54 | self.harness.set_model_name("airbyte-model") 55 | self.harness.add_network("10.0.0.10", endpoint="airbyte-peer") 56 | self.harness.begin() 57 | 58 | def test_config_parsing_parameters_integer_values(self) -> None: 59 | """Check that integer fields are parsed correctly.""" 60 | integer_fields = [ 61 | "logs-ttl", 62 | "pod-running-ttl-minutes", 63 | "pod-successful-ttl-minutes", 64 | "pod-unsuccessful-ttl-minutes", 65 | ] 66 | erroneus_values = [-5] 67 | valid_values = [42, 100, 1] 68 | for field in integer_fields: 69 | check_invalid_values(self.harness, field, erroneus_values) 70 | check_valid_values(self.harness, field, valid_values) 71 | 72 | def test_application_related_values(self) -> None: 73 | """Test specific parameters for application-related fields.""" 74 | erroneus_values = ["test-value", "foo", "bar"] 75 | 76 | # storage-type 77 | check_invalid_values(self.harness, "storage-type", erroneus_values) 78 | accepted_values = ["MINIO", "S3"] 79 | check_valid_values(self.harness, "storage-type", accepted_values) 80 | 81 | def test_cpu_related_values(self) -> None: 82 | """Test specific parameters for cpu-related fields.""" 83 | erroneus_values = ["-123", "0", "100f"] 84 | check_invalid_values(self.harness, "job-main-container-cpu-limit", erroneus_values) 85 | accepted_values = ["200m", "4"] 86 | check_valid_values(self.harness, "job-main-container-cpu-limit", accepted_values) 87 | 88 | def test_memory_related_values(self) -> None: 89 | """Test specific parameters for memory-related fields.""" 90 | erroneus_values = ["-123", "0", "100f"] 91 | check_invalid_values(self.harness, "job-main-container-memory-limit", erroneus_values) 92 | accepted_values = ["4Gi", "256Mi"] 93 | check_valid_values(self.harness, "job-main-container-memory-limit", accepted_values) 94 | 95 | 96 | def check_valid_values(harness, field: str, accepted_values: list) -> None: 97 | """Check the correctness of the passed values for a field. 98 | 99 | Args: 100 | harness: Harness object. 101 | field: The configuration field to test. 102 | accepted_values: List of accepted values for this field. 103 | """ 104 | for value in accepted_values: 105 | harness.update_config({field: value}) 106 | assert harness.charm.config[field] == value 107 | 108 | 109 | def check_invalid_values(harness, field: str, erroneus_values: list) -> None: 110 | """Check the incorrectness of the passed values for a field. 111 | 112 | Args: 113 | harness: Harness object. 114 | field: The configuration field to test. 115 | erroneus_values: List of invalid values for this field. 116 | """ 117 | for value in erroneus_values: 118 | harness.update_config({field: value}) 119 | with pytest.raises(ValueError): 120 | _ = harness.charm.config[field] 121 | -------------------------------------------------------------------------------- /documentation/how-to/secure-airbyte-deployments.md: -------------------------------------------------------------------------------- 1 | # Enable security features 2 | This guide describes the implementation of security features such as encryption and authentication. 3 | 4 | ## Terminate TLS at ingress 5 | Airbyte can terminate Transport Layer Security (TLS) at the ingress by leveraging the [Nginx Ingress Integrator Charm](https://charmhub.io/nginx-ingress-integrator). 6 | 7 | Deploy this by running: 8 | 9 | ```bash 10 | juju deploy nginx-ingress-integrator --trust 11 | ``` 12 | 13 | ### Using K8s secrets 14 | You can use a self-signed or production-grade TLS certificate stored in a Kubernetes secret. The secret is then associated with the ingress to encrypt traffic between clients and Airbyte. 15 | 16 | For self-signed certificates you can do the following: 17 | 18 | 1. First generate a private key using `openssl` and a certificate signing request using they key you just created. Replace `` with an appropriate hostname such as `airbyte-k8s.com`: 19 | 20 | ```bash 21 | openssl genrsa -out server.key 2048 22 | openssl req -new -key server.key -out server.csr -subj "/CN=" 23 | ``` 24 | 2. You can now sign this signing request, creating your self-signed certificate: 25 | ```bash 26 | openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt -extfile <(printf "subjectAltName=DNS:") 27 | ``` 28 | 3. Next, add this certificate and key as a Kubernetes secret to be used by the ingress: 29 | ```bash 30 | kubectl create secret tls airbyte-tls --cert=server.crt --key=server.key 31 | ``` 32 | 4. You then need to provide the name of the Kubernetes secret to the Airbyte charm, along with the hostname you included in the certificate: 33 | 34 | ```bash 35 | juju config airbyte-k8s tls-secret-name=airbyte-tls 36 | juju config airbyte-k8s external-hostname= 37 | 38 | ``` 39 | 5. Finally, relate Airbyte with the Nginx Ingress Integrator to create your ingress resource: 40 | ```bash 41 | juju relate airbyte-k8s nginx-ingress-integrator 42 | ``` 43 | [note] 44 | If you have a production-grade certificate, skip to step 3. 45 | [/note] 46 | 47 | Validate your ingress has been created with the TLS certificates: 48 | ```bash 49 | kubectl get ingress 50 | kubectl describe 51 | ``` 52 | The ingress has the format `--ingress`. The `describe` command should show something similar to the below, with the Kubernetes secret you configured in `TLS`: 53 | 54 | ``` 55 | Name: relation-201-airbyte-k8s-com-ingress 56 | Labels: app.juju.is/created-by=nginx-ingress-integrator 57 | nginx-ingress-integrator.charm.juju.is/managed-by=nginx-ingress-integrator 58 | Namespace: airbyte-model 59 | Address: 60 | Ingress Class: nginx-ingress-controller 61 | Default backend: 62 | TLS: 63 | airbyte-tls terminates airbyte-k8s.com 64 | ``` 65 | 66 | ## Enable Google Oauth 67 | Enabling Google Oauth for Charmed Airbyte allows users to authenticate using their Google accounts, streamlining login and increasing security. Google OAuth is handled by the `oauth2-proxy-k8s` charm, which sits in front of Airbyte and is exposed through `nginx-ingress-integrator`. 68 | 69 | To enable Google Oauth, you need a Google Cloud project. You can create one [here](https://console.cloud.google.com/projectcreate). 70 | 71 | #### Obtain Oauth2 credentials 72 | If you do not already have Oauth2 credentials set up, follow the steps below: 73 | 1. Navigate to https://console.cloud.google.com/apis/credentials. 74 | 2. Click `+ Create Credentials`. 75 | 3. Select `Oauth client ID`. 76 | 4. Select application type (`Web application`). 77 | 5. Name the application. 78 | 6. Add an Authorized redirect URI (`https://:8088/oauth-authorized/google`). 79 | 7. Create and download your client ID and client secret. 80 | 81 | ### Apply Oauth configuration to Nginx Ingress Integrator charm 82 | The oauth2-proxy-k8s charm manages all OAuth configuration for Airbyte. Create a file `oauth2-proxy.yaml` containinng your Google Oauth details: 83 | 84 | ```yaml 85 | oauth2-proxy-k8s: 86 | client_id: "" 87 | client_secret: "" 88 | cookie_secret: "" 89 | external_hostname: "airbyte.company.com" 90 | authenticated_emails_list: "user1@company.com,user2@company.com," 91 | additional_config: "--upstream-timeout=1200s --skip-jwt-bearer-tokens=true --extra-jwt-issuers=https://accounts.google.com=" 92 | upstream: "http://airbyte-k8s:8001" 93 | ``` 94 | - `cookie_secret` must be a 32-byte base64-encoded value 95 | - `external_hostname` must match what Google OAuth expects 96 | - `authenticated_emails_list` controls who can access Airbyte 97 | 98 | Apply the configuration 99 | ```bash 100 | juju config oauth2-proxy-k8s --file=path/to/oauth2-proxy.yaml 101 | ``` 102 | This will update the running `oauth2-proxy` unit and enforce Google OAuth in front of Airbyte. -------------------------------------------------------------------------------- /src/relations/minio.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | """Define the Airbyte server minio relation.""" 5 | 6 | import logging 7 | 8 | from charmed_kubeflow_chisme.exceptions import ErrorWithStatus 9 | from ops import framework 10 | from ops.model import BlockedStatus, WaitingStatus 11 | from serialized_data_interface import ( 12 | NoCompatibleVersions, 13 | NoVersionsListed, 14 | get_interfaces, 15 | ) 16 | 17 | from charm_helpers import construct_svc_endpoint 18 | from log import log_event_handler 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | class MinioRelation(framework.Object): 24 | """Client for airbyte:minio relation.""" 25 | 26 | def __init__(self, charm): 27 | """Construct. 28 | 29 | Args: 30 | charm: The charm to attach the hooks to. 31 | """ 32 | super().__init__(charm, "object-storage") 33 | self.charm = charm 34 | 35 | # Handle minio relation. 36 | charm.framework.observe(charm.on.object_storage_relation_joined, self._on_object_storage_relation_changed) 37 | charm.framework.observe(charm.on.object_storage_relation_changed, self._on_object_storage_relation_changed) 38 | charm.framework.observe(charm.on.object_storage_relation_broken, self._on_object_storage_relation_broken) 39 | 40 | @log_event_handler(logger) 41 | def _on_object_storage_relation_changed(self, event): 42 | """Handle changing object-storage relation. 43 | 44 | Args: 45 | event: The event triggered when the relation changed. 46 | """ 47 | if not self.charm.unit.is_leader(): 48 | return 49 | 50 | if not self.charm._state.is_ready(): 51 | event.defer() 52 | return 53 | 54 | try: 55 | interfaces = self._get_interfaces() 56 | storage_data = self._get_object_storage_data(interfaces) 57 | endpoint = construct_svc_endpoint( 58 | storage_data["service"], 59 | storage_data["namespace"], 60 | storage_data["port"], 61 | storage_data["secure"], 62 | ) 63 | 64 | self.charm._state.minio = { 65 | **storage_data, 66 | "endpoint": endpoint, 67 | } 68 | self.charm._update(event) 69 | except ErrorWithStatus as err: 70 | self.charm.unit.status = err.status 71 | logger.error(f"Event {event} stopped early with message: {str(err)}") 72 | return 73 | 74 | @log_event_handler(logger) 75 | def _on_object_storage_relation_broken(self, event) -> None: 76 | """Handle broken relation with object-storage. 77 | 78 | Args: 79 | event: The event triggered when the relation changed. 80 | """ 81 | if not self.charm.unit.is_leader(): 82 | return 83 | 84 | if not self.charm._state.is_ready(): 85 | event.defer() 86 | return 87 | 88 | self.charm._state.minio = None 89 | self.charm._update(event) 90 | 91 | def _get_interfaces(self): 92 | """Retrieve interface object. 93 | 94 | Returns: 95 | list of charm interfaces. 96 | 97 | Raises: 98 | ErrorWithStatus: if an anticipated error occurs. 99 | """ 100 | try: 101 | charm = self.charm 102 | # Hack: get_interfaces checks for peer relation which does not exist under 103 | # requires/provides list in charmcraft.yaml 104 | if "airbyte-peer" in charm.meta.relations: 105 | del charm.meta.relations["airbyte-peer"] 106 | interfaces = get_interfaces(charm) 107 | except NoVersionsListed as err: 108 | raise ErrorWithStatus(err, WaitingStatus) from err 109 | except NoCompatibleVersions as err: 110 | raise ErrorWithStatus(err, BlockedStatus) from err 111 | return interfaces 112 | 113 | def _get_object_storage_data(self, interfaces): 114 | """Unpacks and returns the object-storage relation data. 115 | 116 | Args: 117 | interfaces: list of charm interfaces. 118 | 119 | Returns: 120 | object storage connection data. 121 | 122 | Raises: 123 | ErrorWithStatus: if an anticipated error occurs. 124 | """ 125 | if not ((obj_storage := interfaces["object-storage"]) and obj_storage.get_data()): 126 | raise ErrorWithStatus("Waiting for object-storage relation data", WaitingStatus) 127 | 128 | try: 129 | logger.info(f"obj_storage get_data: {obj_storage.get_data()}") 130 | obj_storage = list(obj_storage.get_data().values())[0] 131 | except Exception as e: 132 | raise ErrorWithStatus( 133 | f"Unexpected error unpacking object storage data - data format not " 134 | f"as expected. Caught exception: '{str(e)}'", 135 | BlockedStatus, 136 | ) from e 137 | 138 | return obj_storage 139 | -------------------------------------------------------------------------------- /src/relations/s3.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | """S3 relation implementation.""" 5 | 6 | import logging 7 | 8 | import botocore 9 | from charms.data_platform_libs.v0.s3 import ( 10 | CredentialsChangedEvent, 11 | CredentialsGoneEvent, 12 | ) 13 | from ops import framework 14 | 15 | from log import log_event_handler 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | class S3Integrator(framework.Object): 21 | """Client for s3 relation.""" 22 | 23 | def __init__(self, charm): 24 | """Construct. 25 | 26 | Args: 27 | charm: The charm to attach the hooks to. 28 | """ 29 | super().__init__(charm, "s3") 30 | self.charm = charm 31 | charm.framework.observe(charm.s3_client.on.credentials_changed, self._on_s3_credentials_changed) 32 | charm.framework.observe(charm.s3_client.on.credentials_gone, self._on_s3_credentials_gone) 33 | 34 | @log_event_handler(logger) 35 | def _on_s3_credentials_changed(self, event: CredentialsChangedEvent): 36 | """Handle new s3 relation. 37 | 38 | Args: 39 | event: The event triggered when the relation changed. 40 | """ 41 | if not self.charm.unit.is_leader(): 42 | return 43 | 44 | s3_parameters, missing_parameters = self._retrieve_s3_parameters() 45 | if missing_parameters: 46 | return 47 | 48 | endpoint = _construct_endpoint(s3_parameters) 49 | self.charm._state.s3 = { 50 | "bucket": s3_parameters.get("bucket"), 51 | "endpoint": endpoint, 52 | "region": s3_parameters.get("region"), 53 | "access-key": s3_parameters.get("access-key"), 54 | "secret-key": s3_parameters.get("secret-key"), 55 | "uri_style": s3_parameters.get("s3-uri-style"), 56 | } 57 | self.charm._update(event) 58 | 59 | @log_event_handler(logger) 60 | def _on_s3_credentials_gone(self, event: CredentialsGoneEvent) -> None: 61 | """Handle s3 relation broken event. 62 | 63 | Args: 64 | event: The event triggered when the relation was broken. 65 | """ 66 | if not self.charm.unit.is_leader(): 67 | return 68 | 69 | self.charm._state.s3 = None 70 | self.charm._update(event) 71 | 72 | def _retrieve_s3_parameters(self): 73 | """Retrieve S3 parameters from the S3 integrator relation. 74 | 75 | Returns: 76 | s3 parameters (dict) and any missing parameters (list) from the relation. 77 | """ 78 | s3_parameters = self.charm.s3_client.get_s3_connection_info() 79 | required_parameters = [ 80 | "access-key", 81 | "secret-key", 82 | ] 83 | missing_required_parameters = [param for param in required_parameters if param not in s3_parameters] 84 | if missing_required_parameters: 85 | logger.warning( 86 | f"Missing required S3 parameters in relation with S3 integrator: {missing_required_parameters}" 87 | ) 88 | return {}, missing_required_parameters 89 | 90 | # Add some sensible defaults (as expected by the code) for missing optional parameters 91 | s3_parameters.setdefault("endpoint", "https://s3.amazonaws.com") 92 | s3_parameters.setdefault("region", "") 93 | s3_parameters.setdefault("path", "") 94 | s3_parameters.setdefault("s3-uri-style", "host") 95 | 96 | # Strip whitespaces from all parameters. 97 | for key, value in s3_parameters.items(): 98 | if isinstance(value, str): 99 | s3_parameters[key] = value.strip() 100 | 101 | # Clean up extra slash symbols to avoid issues on 3rd-party storages 102 | # like Ceph Object Gateway (radosgw). 103 | s3_parameters["endpoint"] = s3_parameters["endpoint"].rstrip("/") 104 | s3_parameters[ 105 | "path" 106 | ] = f'/{s3_parameters["path"].strip("/")}' # The slash in the beginning is required by pgBackRest. 107 | s3_parameters["bucket"] = s3_parameters["bucket"].strip("/") 108 | 109 | return s3_parameters, [] 110 | 111 | 112 | def _construct_endpoint(s3_parameters): 113 | """Construct the S3 service endpoint using the region. 114 | 115 | This is needed when the provided endpoint is from AWS, and it doesn't contain the region. 116 | 117 | Args: 118 | s3_parameters: s3 parameters fetched from the s3 integrator relation. 119 | 120 | Returns: 121 | S3 service endpoint. 122 | """ 123 | # Use the provided endpoint if a region is not needed. 124 | endpoint = s3_parameters["endpoint"] 125 | 126 | # Load endpoints data. 127 | loader = botocore.loaders.create_loader() 128 | data = loader.load_data("endpoints") 129 | 130 | # Construct the endpoint using the region. 131 | resolver = botocore.regions.EndpointResolver(data) 132 | endpoint_data = resolver.construct_endpoint("s3", s3_parameters["region"]) 133 | 134 | # Use the built endpoint if it is an AWS endpoint. 135 | if endpoint_data and endpoint.endswith(endpoint_data["dnsSuffix"]): 136 | endpoint = f'{endpoint.split("://")[0]}://{endpoint_data["hostname"]}' 137 | 138 | return endpoint 139 | -------------------------------------------------------------------------------- /icon.svg: -------------------------------------------------------------------------------- 1 | 2 | image/svg+xmleclispe-cheeclispe-cheCreated with Sketch. 80 | -------------------------------------------------------------------------------- /airbyte_rock/rockcraft.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | name: airbyte 5 | summary: Airbyte rock 6 | description: Airbyte OCI image for the Airbyte charm 7 | version: "1.7.0" 8 | base: ubuntu@22.04 9 | license: Apache-2.0 10 | platforms: 11 | amd64: 12 | 13 | environment: 14 | JAVA_HOME: /usr/lib/jvm/java-21-openjdk-amd64 15 | CDK_PYTHON: /usr/bin/python3.10 16 | CDK_ENTRYPOINT: /usr/lib/python3.10/dist-packages/airbyte_cdk/connector_builder/main.py 17 | CDK_VERSION: "5.12.0" 18 | 19 | 20 | parts: 21 | install-dependencies: 22 | plugin: nil 23 | stage-packages: 24 | - apt-transport-https 25 | - ca-certificates 26 | - curl 27 | - gnupg 28 | - python3.10-venv 29 | override-build: | 30 | # Install kubectl 31 | echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.31/deb/ /' | tee /etc/apt/sources.list.d/kubernetes.list 32 | chmod 644 /etc/apt/sources.list.d/kubernetes.list # helps tools such as command-not-found to work correctly 33 | curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.31/deb/Release.key | gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg 34 | chmod 644 /etc/apt/keyrings/kubernetes-apt-keyring.gpg # allow unprivileged APT programs to read this keyring 35 | apt-get update 36 | apt-get install -y kubectl 37 | mkdir -p ${CRAFT_PART_INSTALL}/usr/local/bin 38 | mkdir -p ${CRAFT_PART_INSTALL}/usr/local/lib/python3.10/dist-packages 39 | cp -r $(which kubectl) ${CRAFT_PART_INSTALL}/usr/local/bin/kubectl 40 | 41 | pip install --upgrade setuptools pip airbyte-cdk==5.12.0 \ 42 | --target=/${CRAFT_PART_INSTALL}/usr/local/lib/python3.10/dist-packages 43 | stage: 44 | - usr/local/bin/kubectl 45 | - usr/local/lib/python3.10/dist-packages 46 | 47 | pull-airbyte-repo: 48 | after: [install-dependencies] 49 | plugin: dump 50 | source: https://github.com/airbytehq/airbyte-platform.git # yamllint disable-line 51 | source-type: git 52 | source-tag: v1.7.0 53 | override-build: | 54 | cp -r . ${CRAFT_PART_INSTALL}/airbyte-platform 55 | stage: 56 | - airbyte-platform 57 | 58 | assemble: 59 | after: [pull-airbyte-repo] 60 | plugin: nil 61 | build-packages: 62 | - jq 63 | - curl 64 | - coreutils 65 | - bash 66 | - gradle 67 | - openjdk-21-jdk-headless 68 | - npm 69 | - libpq-dev 70 | - python3-dev 71 | build-snaps: 72 | - docker 73 | stage-packages: 74 | - openjdk-21-jdk-headless 75 | - libpq-dev 76 | - python3-dev 77 | override-build: | 78 | cd ${CRAFT_STAGE}/airbyte-platform 79 | ./gradlew assemble -x dockerBuildImage --continue --max-workers 1 80 | ./gradlew --stop 81 | 82 | organize-tars: 83 | after: [assemble] 84 | plugin: nil 85 | override-build: | 86 | mkdir ${CRAFT_PART_INSTALL}/airbyte-server 87 | mkdir ${CRAFT_PART_INSTALL}/airbyte-workers 88 | mkdir ${CRAFT_PART_INSTALL}/airbyte-bootloader 89 | mkdir ${CRAFT_PART_INSTALL}/airbyte-cron 90 | mkdir ${CRAFT_PART_INSTALL}/airbyte-connector-builder-server 91 | mkdir ${CRAFT_PART_INSTALL}/airbyte-workload-api-server 92 | mkdir ${CRAFT_PART_INSTALL}/airbyte-workload-launcher 93 | 94 | tar -xvf ${CRAFT_STAGE}/airbyte-platform/airbyte-server/build/distributions/airbyte-app.tar -C ${CRAFT_PART_INSTALL}/airbyte-server 95 | tar -xvf ${CRAFT_STAGE}/airbyte-platform/airbyte-workers/build/distributions/airbyte-app.tar -C ${CRAFT_PART_INSTALL}/airbyte-workers 96 | tar -xvf ${CRAFT_STAGE}/airbyte-platform/airbyte-bootloader/build/distributions/airbyte-app.tar -C ${CRAFT_PART_INSTALL}/airbyte-bootloader 97 | tar -xvf ${CRAFT_STAGE}/airbyte-platform/airbyte-cron/build/distributions/airbyte-app.tar -C ${CRAFT_PART_INSTALL}/airbyte-cron 98 | tar -xvf ${CRAFT_STAGE}/airbyte-platform/airbyte-connector-builder-server/build/distributions/airbyte-app.tar -C ${CRAFT_PART_INSTALL}/airbyte-connector-builder-server 99 | tar -xvf ${CRAFT_STAGE}/airbyte-platform/airbyte-workload-api-server/build/distributions/airbyte-app.tar -C ${CRAFT_PART_INSTALL}/airbyte-workload-api-server 100 | tar -xvf ${CRAFT_STAGE}/airbyte-platform/airbyte-workload-launcher/build/distributions/airbyte-app.tar -C ${CRAFT_PART_INSTALL}/airbyte-workload-launcher 101 | 102 | # Fix CVE-2025-59340 by replacing jinjava 2.7.4 with 2.7.5 103 | JINJAVA_VERSION="2.7.5" 104 | JINJAVA_BASE_URL="https://repo1.maven.org/maven2/com/hubspot/jinjava/jinjava" 105 | 106 | # Find and replace jinjava JAR files 107 | find ${CRAFT_PART_INSTALL} -name "jinjava-2.7.4.jar" -type f | while read -r jar_file; do 108 | jar_dir=$(dirname "$jar_file") 109 | echo "Replacing jinjava in: $jar_file" 110 | curl -L ${JINJAVA_BASE_URL}/${JINJAVA_VERSION}/jinjava-${JINJAVA_VERSION}.jar \ 111 | -o "${jar_dir}/jinjava-${JINJAVA_VERSION}.jar" 112 | rm -f "$jar_file" 113 | done 114 | stage: 115 | - airbyte-server 116 | - airbyte-workers 117 | - airbyte-bootloader 118 | - airbyte-cron 119 | - airbyte-connector-builder-server 120 | - airbyte-workload-api-server 121 | - airbyte-workload-launcher 122 | 123 | local-files: 124 | after: [organize-tars] 125 | plugin: dump 126 | source: ./local-files 127 | organize: 128 | pod-sweeper.sh: airbyte-pod-sweeper/airbyte-app/bin/airbyte-pod-sweeper 129 | stage: 130 | - airbyte-pod-sweeper/airbyte-app/bin/airbyte-pod-sweeper 131 | -------------------------------------------------------------------------------- /src/structured_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2024 Canonical Ltd. 3 | # See LICENSE file for licensing details. 4 | 5 | # flake8: noqa 6 | 7 | """Structured configuration for the charm.""" 8 | 9 | import logging 10 | import re 11 | from enum import Enum 12 | from typing import Optional 13 | 14 | from charms.data_platform_libs.v0.data_models import BaseConfigModel 15 | from pydantic import validator 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | class LogLevelType(str, Enum): 21 | """Enum for the `log-level` field.""" 22 | 23 | INFO = "INFO" 24 | DEBUG = "DEBUG" 25 | WARNING = "WARNING" 26 | ERROR = "ERROR" 27 | FATAL = "FATAL" 28 | 29 | 30 | class StorageType(str, Enum): 31 | """Enum for the `storage-type` field.""" 32 | 33 | minio = "MINIO" 34 | s3 = "S3" 35 | 36 | 37 | class SecretPersistenceType(str, Enum): 38 | """Enum for the `secret-persistence` field.""" 39 | 40 | GOOGLE_SECRET_MANAGER = "GOOGLE_SECRET_MANAGER" # nosec 41 | AWS_SECRET_MANAGER = "AWS_SECRET_MANAGER" # nosec 42 | TESTING_CONFIG_DB_TABLE = "TESTING_CONFIG_DB_TABLE" 43 | VAULT = "VAULT" 44 | 45 | 46 | class VaultAuthType(str, Enum): 47 | """Enum for the `vault-auth-method` field.""" 48 | 49 | token = "token" # nosec 50 | 51 | 52 | class ImagePullPolicyType(str, Enum): 53 | """Enum for the `*-image-pull-policy` field.""" 54 | 55 | Always = "Always" 56 | IfNotPresent = "IfNotPresent" 57 | Never = "Never" 58 | 59 | 60 | class CharmConfig(BaseConfigModel): 61 | """Manager for the structured configuration.""" 62 | 63 | log_level: LogLevelType 64 | temporal_host: str 65 | webapp_url: Optional[str] 66 | secret_persistence: Optional[SecretPersistenceType] 67 | secret_store_gcp_project_id: Optional[str] 68 | secret_store_gcp_credentials: Optional[str] 69 | vault_address: Optional[str] 70 | vault_prefix: Optional[str] 71 | vault_auth_token: Optional[str] 72 | vault_auth_method: VaultAuthType 73 | aws_access_key: Optional[str] 74 | aws_secret_access_key: Optional[str] 75 | aws_kms_key_arn: Optional[str] 76 | aws_secret_manager_secret_tags: Optional[str] 77 | sync_job_retries_complete_failures_max_successive: Optional[int] 78 | sync_job_retries_complete_failures_max_total: Optional[int] 79 | sync_job_retries_complete_failures_backoff_min_interval_s: Optional[int] 80 | sync_job_retries_complete_failures_backoff_max_interval_s: Optional[int] 81 | sync_job_retries_complete_failures_backoff_base: Optional[int] 82 | sync_job_retries_partial_failures_max_successive: Optional[int] 83 | sync_job_retries_partial_failures_max_total: Optional[int] 84 | sync_job_max_timeout_days: Optional[int] 85 | job_main_container_cpu_request: Optional[str] 86 | job_main_container_cpu_limit: Optional[str] 87 | job_main_container_memory_request: Optional[str] 88 | job_main_container_memory_limit: Optional[str] 89 | max_fields_per_connections: Optional[int] 90 | max_days_of_only_failed_jobs_before_connection_disable: Optional[int] 91 | max_failed_jobs_in_a_row_before_connection_disable: Optional[int] 92 | max_spec_workers: Optional[int] 93 | max_check_workers: Optional[int] 94 | max_sync_workers: Optional[int] 95 | max_discover_workers: Optional[int] 96 | temporal_history_retention_in_days: Optional[int] 97 | job_kube_tolerations: Optional[str] 98 | job_kube_node_selectors: Optional[str] 99 | job_kube_annotations: Optional[str] 100 | job_kube_main_container_image_pull_policy: Optional[ImagePullPolicyType] 101 | job_kube_main_container_image_pull_secret: Optional[str] 102 | job_kube_sidecar_container_image_pull_policy: Optional[ImagePullPolicyType] 103 | job_kube_socat_image: Optional[str] 104 | job_kube_busybox_image: Optional[str] 105 | job_kube_curl_image: Optional[str] 106 | job_kube_namespace: Optional[str] 107 | spec_job_kube_node_selectors: Optional[str] 108 | check_job_kube_node_selectors: Optional[str] 109 | discover_job_kube_node_selectors: Optional[str] 110 | spec_job_kube_annotations: Optional[str] 111 | check_job_kube_annotations: Optional[str] 112 | discover_job_kube_annotations: Optional[str] 113 | storage_type: StorageType 114 | storage_bucket_logs: str 115 | logs_ttl: int 116 | storage_bucket_state: str 117 | storage_bucket_activity_payload: str 118 | storage_bucket_workload_output: str 119 | pod_running_ttl_minutes: int 120 | pod_successful_ttl_minutes: int 121 | pod_unsuccessful_ttl_minutes: int 122 | heartbeat_max_seconds_between_messages: Optional[int] 123 | heartbeat_fail_sync: Optional[bool] 124 | destination_timeout_max_seconds: Optional[int] 125 | destination_timeout_fail_sync: Optional[bool] 126 | 127 | @validator("*", pre=True) 128 | @classmethod 129 | def blank_string(cls, value): 130 | """Check for empty strings. 131 | 132 | Args: 133 | value: configuration value 134 | 135 | Returns: 136 | None in place of empty string or value 137 | """ 138 | if value == "": 139 | return None 140 | return value 141 | 142 | @validator("pod_running_ttl_minutes", "pod_successful_ttl_minutes", "pod_unsuccessful_ttl_minutes") 143 | @classmethod 144 | def greater_than_zero(cls, value: str) -> Optional[int]: 145 | """Check validity of `*-ttl-minutes` fields. 146 | 147 | Args: 148 | value: *-ttl-minutes value 149 | 150 | Returns: 151 | int_value: integer for *-ttl-minutes configuration 152 | 153 | Raises: 154 | ValueError: in the case when the value is out of range 155 | """ 156 | int_value = int(value) 157 | if int_value > 0: 158 | return int_value 159 | raise ValueError("Value out of range.") 160 | 161 | @validator("logs_ttl") 162 | @classmethod 163 | def zero_or_greater(cls, value: str) -> Optional[int]: 164 | """Check validity of `logs-ttl` fields. 165 | 166 | Args: 167 | value: logs-ttl value 168 | 169 | Returns: 170 | int_value: integer for logs-ttl configuration 171 | 172 | Raises: 173 | ValueError: in the case when the value is out of range 174 | """ 175 | int_value = int(value) 176 | if int_value >= 0: 177 | return int_value 178 | raise ValueError("Value out of range.") 179 | 180 | @validator("job_main_container_cpu_request", "job_main_container_cpu_limit") 181 | @classmethod 182 | def cpu_validator(cls, value: str) -> Optional[str]: 183 | """Check validity of `*-cpu-request/limit` fields. 184 | 185 | Args: 186 | value: CPU request/limit value 187 | 188 | Returns: 189 | value: CPU request/limit value 190 | 191 | Raises: 192 | ValueError: in the case when the value is invalid 193 | """ 194 | millicores_pattern = re.compile(r"^\d+m$") 195 | 196 | if millicores_pattern.match(value): 197 | return value 198 | 199 | int_value = int(value) 200 | if int_value > 0: 201 | return value 202 | raise ValueError("Invalid CPU request/limit value.") 203 | 204 | @validator("job_main_container_memory_request", "job_main_container_memory_limit") 205 | @classmethod 206 | def memory_validator(cls, value: str) -> Optional[str]: 207 | """Check validity of `*-memory-request/limit` fields. 208 | 209 | Args: 210 | value: Memory request/limit value 211 | 212 | Returns: 213 | value: Memory request/limit value 214 | 215 | Raises: 216 | ValueError: in the case when the value is invalid 217 | """ 218 | memory_pattern = re.compile(r"^[1-9]\d*(Ei|Pi|Ti|Gi|Mi|Ki)?$") 219 | 220 | if memory_pattern.match(value): 221 | return value 222 | 223 | # Check if the input is a valid integer (bytes) 224 | int_value = int(value) 225 | if int_value > 0: 226 | return value 227 | raise ValueError("Invalid CPU request/limit value.") 228 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | To make contributions to this charm, you'll need a working 4 | [development setup](https://juju.is/docs/sdk/dev-setup). 5 | 6 | First, install the required version of `tox`: 7 | 8 | ```shell 9 | pip install -r dev-requirements.txt 10 | ``` 11 | 12 | You can create an environment for development with `tox`: 13 | 14 | ```shell 15 | tox devenv -e integration 16 | source venv/bin/activate 17 | ``` 18 | 19 | ## Testing 20 | 21 | This project uses `tox` for managing test environments. There are some 22 | pre-configured environments that can be used for linting and formatting code 23 | when you're preparing contributions to the charm: 24 | 25 | ```shell 26 | tox run -e fmt # update your code according to linting rules 27 | tox run -e lint # code style 28 | tox run -e static # static type checking 29 | tox run -e unit # unit tests 30 | tox run -e integration # integration tests 31 | tox # runs 'format', 'lint', 'static', and 'unit' environments 32 | ``` 33 | 34 | ### Committing 35 | 36 | This repo uses CI/CD workflows as outlined by 37 | [operator-workflows](https://github.com/canonical/operator-workflows). The four 38 | workflows are as follows: 39 | 40 | - `test.yaml`: This is a series of tests including linting, unit tests and 41 | library checks which run on every pull request. 42 | - `integration_test.yaml`: This runs the suite of integration tests included 43 | with the charm and runs on every pull request. 44 | - `publish_charm.yaml`: This runs either by manual dispatch or on every 45 | push to the main branch. Once a PR is merged 46 | with one of these branches, this workflow runs to ensure the tests have passed 47 | before building the charm and publishing the new version to the edge channel 48 | on Charmhub. 49 | - `promote_charm.yaml`: This is a manually triggered workflow which publishes 50 | the charm currently on the edge channel to the stable channel on Charmhub. 51 | 52 | These tests validate extensive linting and formatting rules. Before creating a 53 | PR, please run `tox` to ensure proper formatting and linting is performed. 54 | 55 | ### Deploy 56 | 57 | This charm is used to deploy Airbyte server in a k8s cluster. For a local 58 | deployment, follow the following steps: 59 | 60 | 61 | #### Install Rockcraft 62 | 63 | ```bash 64 | sudo snap install rockcraft --classic 65 | sudo snap install lxd --channel 5.21/stable 66 | lxd init --auto 67 | 68 | # Note: Docker must be installed after LXD is initialized due to firewall rules incompatibility. 69 | sudo snap install docker 70 | sudo groupadd docker 71 | sudo usermod -aG docker $USER 72 | newgrp docker 73 | 74 | # Note: disabling and enabling docker snap is required to avoid sudo requirement. 75 | # As described in https://github.com/docker-snap/docker-snap. 76 | sudo snap disable docker 77 | sudo snap enable docker 78 | ``` 79 | 80 | #### Install Microk8s 81 | 82 | ```bash 83 | # Install charmcraft from snap 84 | sudo snap install charmcraft --classic 85 | 86 | # Install Microk8s from snap 87 | sudo snap install microk8s --channel 1.32-strict/stable 88 | 89 | # Add your user to MicroK8s group and refresh session 90 | sudo adduser $USER snap_microk8s 91 | sudo chown -R $USER ~/.kube # -- chown: cannot access '/home/ubuntu/.kube': No such file or directory 92 | newgrp snap_microk8s 93 | 94 | # Enable the necessary Microk8s addons 95 | sudo microk8s enable rbac 96 | sudo microk8s enable hostpath-storage 97 | sudo microk8s enable dns 98 | sudo microk8s enable registry 99 | sudo microk8s enable ingress 100 | ``` 101 | 102 | #### Set up the Juju OLM 103 | 104 | ```bash 105 | # Install the Juju CLI client, juju. Minimum version required is juju>=3.1. 106 | sudo snap install juju --channel 3.6/stable 107 | mkdir -p ~/.local/share 108 | 109 | # Install a "juju" controller into your "microk8s" cloud 110 | juju bootstrap microk8s airbyte-controller 111 | 112 | # Create a 'model' on this controller 113 | juju add-model airbyte 114 | juju set-model-constraints -m airbyte arch=$(dpkg --print-architecture) 115 | 116 | # Enable DEBUG logging 117 | juju model-config logging-config="=INFO;unit=DEBUG" 118 | 119 | # Check progress 120 | juju status --relations --watch 2s 121 | juju debug-log 122 | ``` 123 | 124 | 125 | #### Packing the Rock 126 | 127 | **Preferred: destructive-mode (no nested containers)** 128 | 129 | To reliably build the Airbyte rock, use Rockcraft’s destructive-mode so the build runs on the host instead of inside LXD. This avoids Testcontainers/cgroup issues during Gradle’s jOOQ code generation. 130 | 131 | Requirements when using destructive-mode: 132 | - Host Ubuntu version should match the rock base in `airbyte_rock/rockcraft.yaml` (currently `ubuntu@22.04`). Building on a different series can cause toolchain/package mismatches. 133 | - Root privileges (sudo) on the build machine. 134 | - Sufficient resources: at least 4 CPU cores and 16 GB RAM are recommended. Rock builds compile multiple components (server, workers, UI) and run Gradle tasks that are memory/CPU intensive. 135 | 136 | Example (native host matching base, e.g., Ubuntu 22.04): 137 | 138 | ```bash 139 | cd airbyte_rock 140 | sudo rockcraft pack --destructive-mode --verbose 141 | ``` 142 | 143 | **Multipass users (arm64 on Apple Silicon, etc.)** 144 | 145 | - Do NOT build from a host-mounted directory inside the VM (e.g., a folder under `/home/ubuntu` that is mounted from the host). umoci will fail with `lchown permission denied` when unpacking the base. 146 | - Change `rockcraft.yaml` and use `arm64` as platform. Also, set `JAVA_HOME` as `/usr/lib/jvm/java-21-openjdk-arm64` 147 | - Instead, clone the repository directly inside the VM (or copy it to a native, non-mounted path), and run destructive-mode there. Running under `/root` is the most reliable: 148 | 149 | ```bash 150 | # inside the Multipass VM 151 | git clone https://github.com/canonical/airbyte-k8s-operator.git /root/work/airbyte-k8s-operator 152 | cd /root/work/airbyte-k8s-operator/airbyte_rock 153 | sudo rockcraft pack --destructive-mode --verbose 154 | ``` 155 | 156 | #### Upload Rock to registry 157 | The rock needs to be copied to the Microk8s registry so that it can be deployed in the Kubernetes cluster: 158 | 159 | ```bash 160 | rockcraft.skopeo --insecure-policy copy --dest-tls-verify=false oci-archive:airbyte_1.7.0_$(dpkg --print-architecture).rock docker://localhost:32000/airbyte:1.7.0 161 | ``` 162 | 163 | #### Deploy Charm 164 | 165 | ```bash 166 | # Go to root directory of the project 167 | cd .. 168 | 169 | # Pack the charm 170 | charmcraft pack # the --destructive-mode flag can be used to pack the charm using the current host. 171 | 172 | # Deploy the charm 173 | juju deploy ./airbyte-k8s_ubuntu-22.04-$(dpkg --print-architecture).charm --resource airbyte-image=localhost:32000/airbyte:1.7.0 174 | ``` 175 | 176 | #### Relate Charms 177 | 178 | ```bash 179 | # Relate operator to postgresql 180 | juju deploy postgresql-k8s --channel 14/edge --trust 181 | juju relate airbyte-k8s postgresql-k8s 182 | 183 | # Relate operator to minio 184 | juju deploy minio --channel edge 185 | juju relate airbyte-k8s minio 186 | 187 | # Deploy Temporal operators 188 | juju deploy temporal-k8s 189 | juju deploy temporal-admin-k8s 190 | juju relate temporal-k8s:db postgresql-k8s:database 191 | juju relate temporal-k8s:visibility postgresql-k8s:database 192 | juju relate temporal-k8s:admin temporal-admin-k8s:admin 193 | 194 | # Wait for units to settle and create default namespace 195 | juju run temporal-admin-k8s/0 tctl args="--ns default namespace register -rd 3" 196 | 197 | # Generate private key 198 | openssl genrsa -out airbyte.key 2048 199 | 200 | # Generate a certificate signing request 201 | openssl req -new -key airbyte.key -out airbyte.csr -subj "/CN=airbyte-k8s" 202 | 203 | # Create self-signed certificate 204 | openssl x509 -req -days 365 -in airbyte.csr -signkey airbyte.key -out airbyte.crt -extfile <(printf "subjectAltName=DNS:airbyte-k8s") 205 | 206 | # Create a k8s secret 207 | kubectl -n airbyte create secret tls airbyte-tls --cert=airbyte.crt --key=airbyte.key 208 | 209 | # Deploy ingress controller 210 | microk8s enable ingress:default-ssl-certificate=airbyte/airbyte-tls 211 | 212 | # Deploy nginx operator 213 | juju deploy nginx-ingress-integrator --channel edge 214 | juju trust nginx-ingress-integrator --scope=cluster 215 | juju relate airbyte-ui-k8s nginx-ingress-integrator 216 | ``` 217 | 218 | #### Refreshing the Charm 219 | ```bash 220 | # When we change the charm 221 | charmcraft pack 222 | juju refresh airbyte-k8s --path ./airbyte-k8s_ubuntu-22.04-$(dpkg --print-architecture).charm --resource airbyte-image=localhost:32000/airbyte:1.7.1 223 | 224 | ``` 225 | 226 | #### Cleanup 227 | 228 | ```bash 229 | # Clean-up before retrying 230 | # Either remove individual applications 231 | # (The --force flag can optionally be included if any of the units are in error state) 232 | juju remove-application airbyte-k8s 233 | juju remove-application postgresql-k8s --destroy-storage 234 | juju remove-application minio 235 | juju remove-application nginx-ingress-integrator 236 | 237 | # Or remove whole model 238 | juju destroy-model airbyte --destroy-storage 239 | ``` 240 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2024 Canonical Ltd. 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /lib/charms/data_platform_libs/v0/data_models.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Canonical Ltd. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | r"""Library to provide simple API for promoting typed, validated and structured dataclass in charms. 16 | 17 | Dict-like data structure are often used in charms. They are used for config, action parameters 18 | and databag. This library aims at providing simple API for using pydantic BaseModel-derived class 19 | in charms, in order to enhance: 20 | * Validation, by embedding custom business logic to validate single parameters or even have 21 | validators that acts across different fields 22 | * Parsing, by loading data into pydantic object we can both allow for other types (e.g. float) to 23 | be used in configuration/parameters as well as specify even nested complex objects for databags 24 | * Static typing checks, by moving from dict-like object to classes with typed-annotated properties, 25 | that can be statically checked using mypy to ensure that the code is correct. 26 | 27 | Pydantic models can be used on: 28 | 29 | * Charm Configuration (as defined in config.yaml) 30 | * Actions parameters (as defined in actions.yaml) 31 | * Application/Unit Databag Information (thus making it more structured and encoded) 32 | 33 | 34 | ## Creating models 35 | 36 | Any data-structure can be modeled using dataclasses instead of dict-like objects (e.g. storing 37 | config, action parameters and databags). Within pydantic, we can define dataclasses that provides 38 | also parsing and validation on standard dataclass implementation: 39 | 40 | ```python 41 | 42 | from charms.data_platform_libs.v0.data_models import BaseConfigModel 43 | 44 | class MyConfig(BaseConfigModel): 45 | 46 | my_key: int 47 | 48 | @validator("my_key") 49 | def is_lower_than_100(cls, v: int): 50 | if v > 100: 51 | raise ValueError("Too high") 52 | 53 | ``` 54 | 55 | This should allow to collapse both parsing and validation as the dataclass object is parsed and 56 | created: 57 | 58 | ```python 59 | dataclass = MyConfig(my_key="1") 60 | 61 | dataclass.my_key # this returns 1 (int) 62 | dataclass["my_key"] # this returns 1 (int) 63 | 64 | dataclass = MyConfig(my_key="102") # this returns a ValueError("Too High") 65 | ``` 66 | 67 | ## Charm Configuration Model 68 | 69 | Using the class above, we can implement parsing and validation of configuration by simply 70 | extending our charms using the `TypedCharmBase` class, as shown below. 71 | 72 | ```python 73 | class MyCharm(TypedCharmBase[MyConfig]): 74 | config_type = MyConfig 75 | 76 | # everywhere in the code you will have config property already parsed and validate 77 | def my_method(self): 78 | self.config: MyConfig 79 | ``` 80 | 81 | ## Action parameters 82 | 83 | In order to parse action parameters, we can use a decorator to be applied to action event 84 | callbacks, as shown below. 85 | 86 | ```python 87 | @validate_params(PullActionModel) 88 | def _pull_site_action( 89 | self, event: ActionEvent, 90 | params: Optional[Union[PullActionModel, ValidationError]] = None 91 | ): 92 | if isinstance(params, ValidationError): 93 | # handle errors 94 | else: 95 | # do stuff 96 | ``` 97 | 98 | Note that this changes the signature of the callbacks by adding an extra parameter with the parsed 99 | counterpart of the `event.params` dict-like field. If validation fails, we return (not throw!) the 100 | exception, to be handled (or raised) in the callback. 101 | 102 | ## Databag 103 | 104 | In order to parse databag fields, we define a decorator to be applied to base relation event 105 | callbacks. 106 | 107 | ```python 108 | @parse_relation_data(app_model=AppDataModel, unit_model=UnitDataModel) 109 | def _on_cluster_relation_joined( 110 | self, event: RelationEvent, 111 | app_data: Optional[Union[AppDataModel, ValidationError]] = None, 112 | unit_data: Optional[Union[UnitDataModel, ValidationError]] = None 113 | ) -> None: 114 | ... 115 | ``` 116 | 117 | The parameters `app_data` and `unit_data` refers to the databag of the entity which fired the 118 | RelationEvent. 119 | 120 | When we want to access to a relation databag outsides of an action, it can be useful also to 121 | compact multiple databags into a single object (if there are no conflicting fields), e.g. 122 | 123 | ```python 124 | 125 | class ProviderDataBag(BaseClass): 126 | provider_key: str 127 | 128 | class RequirerDataBag(BaseClass): 129 | requirer_key: str 130 | 131 | class MergedDataBag(ProviderDataBag, RequirerDataBag): 132 | pass 133 | 134 | merged_data = get_relation_data_as( 135 | MergedDataBag, relation.data[self.app], relation.data[relation.app] 136 | ) 137 | 138 | merged_data.requirer_key 139 | merged_data.provider_key 140 | 141 | ``` 142 | 143 | The above code can be generalized to other kinds of merged objects, e.g. application and unit, and 144 | it can be extended to multiple sources beyond 2: 145 | 146 | ```python 147 | merged_data = get_relation_data_as( 148 | MergedDataBag, relation.data[self.app], relation.data[relation.app], ... 149 | ) 150 | ``` 151 | 152 | """ 153 | 154 | import json 155 | from functools import reduce, wraps 156 | from typing import Callable, Generic, MutableMapping, Optional, Type, TypeVar, Union 157 | 158 | import pydantic 159 | from ops.charm import ActionEvent, CharmBase, RelationEvent 160 | from ops.model import RelationDataContent 161 | from pydantic import BaseModel, ValidationError 162 | 163 | # The unique Charmhub library identifier, never change it 164 | LIBID = "cb2094c5b07d47e1bf346aaee0fcfcfe" 165 | 166 | # Increment this major API version when introducing breaking changes 167 | LIBAPI = 0 168 | 169 | # Increment this PATCH version before using `charmcraft publish-lib` or reset 170 | # to 0 if you are raising the major API version 171 | LIBPATCH = 4 172 | 173 | PYDEPS = ["ops>=2.0.0", "pydantic>=1.10,<2"] 174 | 175 | G = TypeVar("G") 176 | T = TypeVar("T", bound=BaseModel) 177 | AppModel = TypeVar("AppModel", bound=BaseModel) 178 | UnitModel = TypeVar("UnitModel", bound=BaseModel) 179 | 180 | DataBagNativeTypes = (int, str, float) 181 | 182 | 183 | class BaseConfigModel(BaseModel): 184 | """Class to be used for defining the structured configuration options.""" 185 | 186 | def __getitem__(self, x): 187 | """Return the item using the notation instance[key].""" 188 | return getattr(self, x.replace("-", "_")) 189 | 190 | 191 | class TypedCharmBase(CharmBase, Generic[T]): 192 | """Class to be used for extending config-typed charms.""" 193 | 194 | config_type: Type[T] 195 | 196 | @property 197 | def config(self) -> T: 198 | """Return a config instance validated and parsed using the provided pydantic class.""" 199 | translated_keys = {k.replace("-", "_"): v for k, v in self.model.config.items()} 200 | return self.config_type(**translated_keys) 201 | 202 | 203 | def validate_params(cls: Type[T]): 204 | """Return a decorator to allow pydantic parsing of action parameters. 205 | 206 | Args: 207 | cls: Pydantic class representing the model to be used for parsing the content of the 208 | action parameter 209 | """ 210 | 211 | def decorator( 212 | f: Callable[[CharmBase, ActionEvent, Union[T, ValidationError]], G] 213 | ) -> Callable[[CharmBase, ActionEvent], G]: 214 | @wraps(f) 215 | def event_wrapper(self: CharmBase, event: ActionEvent): 216 | try: 217 | params = cls( 218 | **{key.replace("-", "_"): value for key, value in event.params.items()} 219 | ) 220 | except ValidationError as e: 221 | params = e 222 | return f(self, event, params) 223 | 224 | return event_wrapper 225 | 226 | return decorator 227 | 228 | 229 | def write(relation_data: RelationDataContent, model: BaseModel): 230 | """Write the data contained in a domain object to the relation databag. 231 | 232 | Args: 233 | relation_data: pointer to the relation databag 234 | model: instance of pydantic model to be written 235 | """ 236 | for key, value in model.dict(exclude_none=False).items(): 237 | if value: 238 | relation_data[key.replace("_", "-")] = ( 239 | str(value) 240 | if any(isinstance(value, _type) for _type in DataBagNativeTypes) 241 | else json.dumps(value) 242 | ) 243 | else: 244 | relation_data[key.replace("_", "-")] = "" 245 | 246 | 247 | def read(relation_data: MutableMapping[str, str], obj: Type[T]) -> T: 248 | """Read data from a relation databag and parse it into a domain object. 249 | 250 | Args: 251 | relation_data: pointer to the relation databag 252 | obj: pydantic class representing the model to be used for parsing 253 | """ 254 | return obj( 255 | **{ 256 | field_name: ( 257 | relation_data[parsed_key] 258 | if field.outer_type_ in DataBagNativeTypes 259 | else json.loads(relation_data[parsed_key]) 260 | ) 261 | for field_name, field in obj.__fields__.items() 262 | # pyright: ignore[reportGeneralTypeIssues] 263 | if (parsed_key := field_name.replace("_", "-")) in relation_data 264 | if relation_data[parsed_key] 265 | } 266 | ) 267 | 268 | 269 | def parse_relation_data( 270 | app_model: Optional[Type[AppModel]] = None, unit_model: Optional[Type[UnitModel]] = None 271 | ): 272 | """Return a decorator to allow pydantic parsing of the app and unit databags. 273 | 274 | Args: 275 | app_model: Pydantic class representing the model to be used for parsing the content of the 276 | app databag. None if no parsing ought to be done. 277 | unit_model: Pydantic class representing the model to be used for parsing the content of the 278 | unit databag. None if no parsing ought to be done. 279 | """ 280 | 281 | def decorator( 282 | f: Callable[ 283 | [ 284 | CharmBase, 285 | RelationEvent, 286 | Optional[Union[AppModel, ValidationError]], 287 | Optional[Union[UnitModel, ValidationError]], 288 | ], 289 | G, 290 | ] 291 | ) -> Callable[[CharmBase, RelationEvent], G]: 292 | @wraps(f) 293 | def event_wrapper(self: CharmBase, event: RelationEvent): 294 | try: 295 | app_data = ( 296 | read(event.relation.data[event.app], app_model) 297 | if app_model is not None and event.app 298 | else None 299 | ) 300 | except pydantic.ValidationError as e: 301 | app_data = e 302 | 303 | try: 304 | unit_data = ( 305 | read(event.relation.data[event.unit], unit_model) 306 | if unit_model is not None and event.unit 307 | else None 308 | ) 309 | except pydantic.ValidationError as e: 310 | unit_data = e 311 | 312 | return f(self, event, app_data, unit_data) 313 | 314 | return event_wrapper 315 | 316 | return decorator 317 | 318 | 319 | class RelationDataModel(BaseModel): 320 | """Base class to be used for creating data models to be used for relation databags.""" 321 | 322 | def write(self, relation_data: RelationDataContent): 323 | """Write data to a relation databag. 324 | 325 | Args: 326 | relation_data: pointer to the relation databag 327 | """ 328 | return write(relation_data, self) 329 | 330 | @classmethod 331 | def read(cls, relation_data: RelationDataContent) -> "RelationDataModel": 332 | """Read data from a relation databag and parse it as an instance of the pydantic class. 333 | 334 | Args: 335 | relation_data: pointer to the relation databag 336 | """ 337 | return read(relation_data, cls) 338 | 339 | 340 | def get_relation_data_as( 341 | model_type: Type[AppModel], 342 | *relation_data: RelationDataContent, 343 | ) -> Union[AppModel, ValidationError]: 344 | """Return a merged representation of the provider and requirer databag into a single object. 345 | 346 | Args: 347 | model_type: pydantic class representing the merged databag 348 | relation_data: list of RelationDataContent of provider/requirer/unit sides 349 | """ 350 | try: 351 | app_data = read(reduce(lambda x, y: dict(x) | dict(y), relation_data, {}), model_type) 352 | except pydantic.ValidationError as e: 353 | app_data = e 354 | return app_data 355 | -------------------------------------------------------------------------------- /tests/integration/helpers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2024 Canonical Ltd. 3 | # See LICENSE file for licensing details. 4 | 5 | """Charm integration test helpers.""" 6 | 7 | import logging 8 | import time 9 | from pathlib import Path 10 | 11 | import requests 12 | import yaml 13 | from pytest_operator.plugin import OpsTest 14 | from temporal_client.activities import say_hello 15 | from temporal_client.workflows import SayHello 16 | from temporalio.client import Client 17 | from temporalio.worker import Worker 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text()) 22 | APP_NAME_AIRBYTE_SERVER = METADATA["name"] 23 | APP_NAME_TEMPORAL_SERVER = "temporal-k8s" 24 | APP_NAME_TEMPORAL_ADMIN = "temporal-admin-k8s" 25 | APP_NAME_TEMPORAL_UI = "temporal-ui-k8s" 26 | 27 | GET_HEADERS = {"accept": "application/json"} 28 | POST_HEADERS = {"accept": "application/json", "content-type": "application/json"} 29 | 30 | 31 | async def run_sample_workflow(ops_test: OpsTest): 32 | """Connect a client and runs a basic Temporal workflow. 33 | 34 | Args: 35 | ops_test: PyTest object. 36 | """ 37 | url = await get_application_url(ops_test, application=APP_NAME_TEMPORAL_SERVER, port=7233) 38 | logger.info("running workflow on app address: %s", url) 39 | 40 | client = await Client.connect(url) 41 | 42 | # Run a worker for the workflow 43 | async with Worker(client, task_queue="my-task-queue", workflows=[SayHello], activities=[say_hello]): 44 | name = "Jean-luc" 45 | result = await client.execute_workflow(SayHello.run, name, id="my-workflow-id", task_queue="my-task-queue") 46 | logger.info(f"result: {result}") 47 | assert result == f"Hello, {name}!" 48 | 49 | 50 | async def create_default_namespace(ops_test: OpsTest): 51 | """Create default namespace on Temporal server using tctl. 52 | 53 | Args: 54 | ops_test: PyTest object. 55 | """ 56 | # Register default namespace from admin charm. 57 | action = ( 58 | await ops_test.model.applications[APP_NAME_TEMPORAL_ADMIN] 59 | .units[0] 60 | .run_action("tctl", args="--ns default namespace register -rd 3") 61 | ) 62 | result = (await action.wait()).results 63 | logger.info(f"tctl result: {result}") 64 | assert "result" in result and result["result"] == "command succeeded" 65 | 66 | 67 | async def get_application_url(ops_test: OpsTest, application, port): 68 | """Return application URL from the model. 69 | 70 | Args: 71 | ops_test: PyTest object. 72 | application: Name of the application. 73 | port: Port number of the URL. 74 | 75 | Returns: 76 | Application URL of the form {address}:{port} 77 | """ 78 | status = await ops_test.model.get_status() # noqa: F821 79 | address = status["applications"][application].public_address 80 | return f"{address}:{port}" 81 | 82 | 83 | async def get_unit_url(ops_test: OpsTest, application, unit, port, protocol="http"): 84 | """Return unit URL from the model. 85 | 86 | Args: 87 | ops_test: PyTest object. 88 | application: Name of the application. 89 | unit: Number of the unit. 90 | port: Port number of the URL. 91 | protocol: Transfer protocol (default: http). 92 | 93 | Returns: 94 | Unit URL of the form {protocol}://{address}:{port} 95 | """ 96 | status = await ops_test.model.get_status() # noqa: F821 97 | address = status["applications"][application]["units"][f"{application}/{unit}"]["address"] 98 | return f"{protocol}://{address}:{port}" 99 | 100 | 101 | async def perform_temporal_integrations(ops_test: OpsTest): 102 | """Integrate Temporal charm with postgresql, admin and ui charms. 103 | 104 | Args: 105 | ops_test: PyTest object. 106 | """ 107 | await ops_test.model.integrate(f"{APP_NAME_TEMPORAL_SERVER}:db", "postgresql-k8s:database") 108 | await ops_test.model.integrate(f"{APP_NAME_TEMPORAL_SERVER}:visibility", "postgresql-k8s:database") 109 | await ops_test.model.integrate(f"{APP_NAME_TEMPORAL_SERVER}:admin", f"{APP_NAME_TEMPORAL_ADMIN}:admin") 110 | await ops_test.model.wait_for_idle( 111 | apps=[APP_NAME_TEMPORAL_SERVER, "postgresql-k8s"], status="active", raise_on_blocked=False, timeout=180 112 | ) 113 | 114 | assert ops_test.model.applications[APP_NAME_TEMPORAL_SERVER].units[0].workload_status == "active" 115 | 116 | 117 | async def perform_airbyte_integrations(ops_test: OpsTest): 118 | """Perform Airbyte charm integrations. 119 | 120 | Args: 121 | ops_test: PyTest object. 122 | """ 123 | await ops_test.model.integrate(APP_NAME_AIRBYTE_SERVER, "postgresql-k8s") 124 | await ops_test.model.integrate(APP_NAME_AIRBYTE_SERVER, "minio") 125 | await ops_test.model.wait_for_idle( 126 | apps=[APP_NAME_AIRBYTE_SERVER, "postgresql-k8s", "minio"], 127 | status="active", 128 | raise_on_blocked=False, 129 | wait_for_active=True, 130 | idle_period=60, 131 | timeout=300, 132 | ) 133 | 134 | assert ops_test.model.applications[APP_NAME_AIRBYTE_SERVER].units[0].workload_status == "active" 135 | 136 | 137 | def get_airbyte_workspace_id(api_url): 138 | """Get Airbyte default workspace ID. 139 | 140 | Args: 141 | api_url: Airbyte API base URL. 142 | 143 | Returns: 144 | Airbyte workspace ID. 145 | """ 146 | url = f"{api_url}/api/public/v1/workspaces?includeDeleted=false&limit=20&offset=0" 147 | logger.info("fetching Airbyte workspace ID") 148 | response = requests.get(url, headers=GET_HEADERS, timeout=300) 149 | 150 | assert response.status_code == 200 151 | return response.json().get("data")[0]["workspaceId"] 152 | 153 | 154 | def create_airbyte_source(api_url, workspace_id): 155 | """Create Airbyte sample source. 156 | 157 | Args: 158 | api_url: Airbyte API base URL. 159 | workspace_id: default workspace ID. 160 | 161 | Returns: 162 | Created source ID. 163 | """ 164 | url = f"{api_url}/api/public/v1/sources" 165 | payload = { 166 | "configuration": {"sourceType": "pokeapi", "pokemon_name": "pikachu"}, 167 | "name": "API Test", 168 | "workspaceId": workspace_id, 169 | } 170 | 171 | logger.info("creating Airbyte source") 172 | response = requests.post(url, json=payload, headers=POST_HEADERS, timeout=300) 173 | logger.info(response.json()) 174 | 175 | assert response.status_code == 200 176 | return response.json().get("sourceId") 177 | 178 | 179 | def create_airbyte_destination(api_url, model_name, workspace_id, db_password): 180 | """Create Airbyte sample destination. 181 | 182 | Args: 183 | api_url: Airbyte API base URL. 184 | model_name: name of the juju model. 185 | workspace_id: default workspace ID. 186 | db_password: database password. 187 | 188 | Returns: 189 | Created destination ID. 190 | """ 191 | url = f"{api_url}/api/public/v1/destinations" 192 | payload = { 193 | "configuration": { 194 | "destinationType": "postgres", 195 | "port": 5432, 196 | "schema": "pokeapi", 197 | "ssl_mode": {"mode": "disable"}, 198 | "tunnel_method": {"tunnel_method": "NO_TUNNEL"}, 199 | "host": f"postgresql-k8s-primary.{model_name}.svc.cluster.local", 200 | "database": "airbyte-k8s_db", 201 | "username": "operator", 202 | "password": db_password, 203 | }, 204 | "workspaceId": workspace_id, 205 | "name": "Postgres", 206 | } 207 | 208 | logger.info("creating Airbyte destination") 209 | response = requests.post(url, json=payload, headers=POST_HEADERS, timeout=300) 210 | logger.info(response.json()) 211 | 212 | assert response.status_code == 200 213 | return response.json().get("destinationId") 214 | 215 | 216 | def create_airbyte_connection(api_url, source_id, destination_id): 217 | """Create Airbyte connection. 218 | 219 | Args: 220 | api_url: Airbyte API base URL. 221 | source_id: Airbyte source ID. 222 | destination_id: Airbyte destination ID. 223 | 224 | Returns: 225 | Created connection ID. 226 | """ 227 | url = f"{api_url}/api/public/v1/connections" 228 | payload = { 229 | "schedule": {"scheduleType": "manual"}, 230 | "dataResidency": "auto", 231 | "namespaceDefinition": "destination", 232 | "nonBreakingSchemaUpdatesBehavior": "ignore", 233 | "sourceId": source_id, 234 | "destinationId": destination_id, 235 | } 236 | 237 | logger.info("creating Airbyte connection") 238 | response = requests.post(url, json=payload, headers=POST_HEADERS, timeout=300) 239 | logger.info(response.json()) 240 | 241 | assert response.status_code == 200 242 | return response.json().get("connectionId") 243 | 244 | 245 | def trigger_airbyte_connection(api_url, connection_id): 246 | """Trigger Airbyte connection. 247 | 248 | Args: 249 | api_url: Airbyte API base URL. 250 | connection_id: Airbyte connection ID. 251 | 252 | Returns: 253 | Created job ID. 254 | """ 255 | url = f"{api_url}/api/public/v1/jobs" 256 | payload = {"jobType": "sync", "connectionId": connection_id} 257 | logger.info("triggering Airbyte connection") 258 | response = requests.post(url, json=payload, headers=POST_HEADERS, timeout=300) 259 | logger.info(response.json()) 260 | 261 | assert response.status_code == 200 262 | return response.json().get("jobId") 263 | 264 | 265 | def check_airbyte_job_status(api_url, job_id): 266 | """Get Airbyte sync job status. 267 | 268 | Args: 269 | api_url: Airbyte API base URL. 270 | job_id: Sync job ID. 271 | 272 | Returns: 273 | Job status. 274 | """ 275 | url = f"{api_url}/api/public/v1/jobs/{job_id}" 276 | logger.info("fetching Airbyte job status") 277 | response = requests.get(url, headers=GET_HEADERS, timeout=120) 278 | logger.info(response.json()) 279 | 280 | return response.json().get("status") 281 | 282 | 283 | def cancel_airbyte_job(api_url, job_id): 284 | """Cancel Airbyte sync job. 285 | 286 | Args: 287 | api_url: Airbyte API base URL. 288 | job_id: Sync job ID. 289 | 290 | Returns: 291 | Job status. 292 | """ 293 | url = f"{api_url}/api/public/v1/jobs/{job_id}" 294 | logger.info("cancelling Airbyte job") 295 | response = requests.delete(url, headers=GET_HEADERS, timeout=120) 296 | logger.info(response.json()) 297 | 298 | return response.json().get("status") 299 | 300 | 301 | async def get_db_password(ops_test): 302 | """Get PostgreSQL DB admin password. 303 | 304 | Args: 305 | ops_test: PyTest object. 306 | 307 | Returns: 308 | PostgreSQL DB admin password. 309 | """ 310 | postgresql_unit = ops_test.model.applications["postgresql-k8s"].units[0] 311 | for i in range(10): 312 | action = await postgresql_unit.run_action("get-password") 313 | result = await action.wait() 314 | logger.info(f"attempt {i} -> action result {result.status} {result.results}") 315 | if "password" in result.results: 316 | return result.results["password"] 317 | time.sleep(2) 318 | 319 | 320 | async def run_test_sync_job(ops_test): 321 | """Run test Airbyte connection. 322 | 323 | Args: 324 | ops_test: PyTest object. 325 | """ 326 | # Create connection 327 | api_url = await get_unit_url(ops_test, application=APP_NAME_AIRBYTE_SERVER, unit=0, port=8001) 328 | 329 | # Get DB URL 330 | status = await ops_test.model.get_status() # noqa: F821 331 | 332 | logger.info("curling app address: %s", api_url) 333 | workspace_id = get_airbyte_workspace_id(api_url) 334 | db_password = await get_db_password(ops_test) 335 | assert db_password 336 | 337 | # Create Source 338 | source_id = create_airbyte_source(api_url, workspace_id) 339 | 340 | # Create destination 341 | destination_id = create_airbyte_destination(api_url, ops_test.model.name, workspace_id, db_password) 342 | 343 | # Create connection 344 | connection_id = create_airbyte_connection(api_url, source_id, destination_id) 345 | 346 | # Trigger sync job 347 | for i in range(4): 348 | logger.info(f"attempt {i + 1} to trigger new job") 349 | job_id = trigger_airbyte_connection(api_url, connection_id) 350 | 351 | # Wait until job is successful 352 | job_successful = False 353 | for j in range(15): 354 | logger.info(f"job {i + 1} attempt {j + 1}: getting job status") 355 | status = check_airbyte_job_status(api_url, job_id) 356 | 357 | if status == "failed": 358 | break 359 | 360 | if status == "succeeded": 361 | logger.info(f"job {i + 1} attempt {j + 1}: job successful!") 362 | job_successful = True 363 | break 364 | 365 | logger.info(f"job {i + 1} attempt {j + 1}: job still running, retrying in 10 seconds") 366 | time.sleep(10) 367 | 368 | if job_successful: 369 | break 370 | 371 | cancel_airbyte_job(api_url, job_id) 372 | 373 | assert job_successful 374 | -------------------------------------------------------------------------------- /src/charm_helpers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | """Charm helpers.""" 5 | 6 | import os 7 | from urllib.parse import urlparse 8 | 9 | from literals import ( 10 | AIRBYTE_API_PORT, 11 | BASE_ENV, 12 | CONNECTOR_BUILDER_SERVER_API_PORT, 13 | INTERNAL_API_PORT, 14 | WORKLOAD_API_PORT, 15 | ) 16 | from structured_config import StorageType 17 | from utils import use_feature_flags 18 | 19 | 20 | def create_env(model_name, app_name, container_name, config, state): 21 | """Create set of environment variables for application. 22 | 23 | Args: 24 | model_name: Name of the juju model. 25 | app_name: Name of the application. 26 | container_name: Name of Airbyte container. 27 | config: Charm config. 28 | state: Charm state. 29 | 30 | Returns: 31 | environment variables dict. 32 | """ 33 | db_conn = state.database_connection 34 | 35 | host = db_conn["host"] 36 | port = db_conn["port"] 37 | db_name = db_conn["dbname"] 38 | db_url = f"jdbc:postgresql://{host}:{port}/{db_name}" 39 | secret_persistence = config["secret-persistence"] 40 | if secret_persistence: 41 | secret_persistence = config["secret-persistence"].value 42 | 43 | feature_flags_enabled = use_feature_flags(config) 44 | 45 | # Some defaults are extracted from Helm chart: 46 | # https://github.com/airbytehq/airbyte-platform/tree/v1.5.0/charts/airbyte 47 | env = { 48 | **BASE_ENV, 49 | # Airbye services config 50 | "LOG_LEVEL": config["log-level"].value, 51 | "TEMPORAL_HOST": config["temporal-host"], 52 | "WEBAPP_URL": config["webapp-url"], 53 | # Flags config - point to the mounted flags.yaml file if any flag is set 54 | # Airbyte 1.7 uses configfile by default 55 | "FEATURE_FLAG_PATH": "/flags" if feature_flags_enabled else None, 56 | "FEATURE_FLAG_CLIENT": "configfile" if feature_flags_enabled else None, 57 | # Secrets config 58 | "SECRET_PERSISTENCE": secret_persistence, 59 | "SECRET_STORE_GCP_PROJECT_ID": config["secret-store-gcp-project-id"], 60 | "SECRET_STORE_GCP_CREDENTIALS": config["secret-store-gcp-credentials"], 61 | "VAULT_ADDRESS": config["vault-address"], 62 | "VAULT_PREFIX": config["vault-prefix"], 63 | "VAULT_AUTH_TOKEN": config["vault-auth-token"], 64 | "VAULT_AUTH_METHOD": config["vault-auth-method"].value, 65 | "AWS_ACCESS_KEY": config["aws-access-key"], 66 | "AWS_SECRET_ACCESS_KEY": config["aws-secret-access-key"], 67 | "AWS_KMS_KEY_ARN": config["aws-kms-key-arn"], 68 | "AWS_SECRET_MANAGER_SECRET_TAGS": config["aws-secret-manager-secret-tags"], 69 | # Jobs config 70 | "SYNC_JOB_RETRIES_COMPLETE_FAILURES_MAX_SUCCESSIVE": config[ 71 | "sync-job-retries-complete-failures-max-successive" 72 | ], 73 | "SYNC_JOB_RETRIES_COMPLETE_FAILURES_MAX_TOTAL": config["sync-job-retries-complete-failures-max-total"], 74 | "SYNC_JOB_RETRIES_COMPLETE_FAILURES_BACKOFF_MIN_INTERVAL_S": config[ 75 | "sync-job-retries-complete-failures-backoff-min-interval-s" 76 | ], 77 | "SYNC_JOB_RETRIES_COMPLETE_FAILURES_BACKOFF_MAX_INTERVAL_S": config[ 78 | "sync-job-retries-complete-failures-backoff-max-interval-s" 79 | ], 80 | "SYNC_JOB_RETRIES_COMPLETE_FAILURES_BACKOFF_BASE": config["sync-job-retries-complete-failures-backoff-base"], 81 | "SYNC_JOB_RETRIES_PARTIAL_FAILURES_MAX_SUCCESSIVE": config["sync-job-retries-partial-failures-max-successive"], 82 | "SYNC_JOB_RETRIES_PARTIAL_FAILURES_MAX_TOTAL": config["sync-job-retries-partial-failures-max-total"], 83 | "SYNC_JOB_MAX_TIMEOUT_DAYS": config["sync-job-max-timeout-days"], 84 | "JOB_MAIN_CONTAINER_CPU_REQUEST": config["job-main-container-cpu-request"], 85 | "JOB_MAIN_CONTAINER_CPU_LIMIT": config["job-main-container-cpu-limit"], 86 | "JOB_MAIN_CONTAINER_MEMORY_REQUEST": config["job-main-container-memory-request"], 87 | "JOB_MAIN_CONTAINER_MEMORY_LIMIT": config["job-main-container-memory-limit"], 88 | # Connections config 89 | "MAX_FIELDS_PER_CONNECTION": config["max-fields-per-connections"], 90 | "MAX_DAYS_OF_ONLY_FAILED_JOBS_BEFORE_CONNECTION_DISABLE": config[ 91 | "max-days-of-only-failed-jobs-before-connection-disable" 92 | ], 93 | "MAX_FAILED_JOBS_IN_A_ROW_BEFORE_CONNECTION_DISABLE": config[ 94 | "max-failed-jobs-in-a-row-before-connection-disable" 95 | ], 96 | # Worker config 97 | "MAX_SPEC_WORKERS": config["max-spec-workers"], 98 | "MAX_CHECK_WORKERS": config["max-check-workers"], 99 | "MAX_SYNC_WORKERS": config["max-sync-workers"], 100 | "MAX_DISCOVER_WORKERS": config["max-discover-workers"], 101 | # Data retention config 102 | "TEMPORAL_HISTORY_RETENTION_IN_DAYS": config["temporal-history-retention-in-days"], 103 | # Kubernetes config 104 | "JOB_KUBE_TOLERATIONS": config["job-kube-tolerations"], 105 | "JOB_KUBE_NODE_SELECTORS": config["job-kube-node-selectors"], 106 | "JOB_KUBE_ANNOTATIONS": config["job-kube-annotations"], 107 | "JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_POLICY": config["job-kube-main-container-image-pull-policy"].value, 108 | "JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_SECRET": config["job-kube-main-container-image-pull-secret"], 109 | "JOB_KUBE_SIDECAR_CONTAINER_IMAGE_PULL_POLICY": config["job-kube-sidecar-container-image-pull-policy"].value, 110 | "JOB_KUBE_SOCAT_IMAGE": config["job-kube-socat-image"], 111 | "JOB_KUBE_BUSYBOX_IMAGE": config["job-kube-busybox-image"], 112 | "JOB_KUBE_CURL_IMAGE": config["job-kube-curl-image"], 113 | "JOB_KUBE_NAMESPACE": config["job-kube-namespace"] or model_name, 114 | # Jobs config 115 | "SPEC_JOB_KUBE_NODE_SELECTORS": config["spec-job-kube-node-selectors"], 116 | "CHECK_JOB_KUBE_NODE_SELECTORS": config["check-job-kube-node-selectors"], 117 | "DISCOVER_JOB_KUBE_NODE_SELECTORS": config["discover-job-kube-node-selectors"], 118 | "SPEC_JOB_KUBE_ANNOTATIONS": config["spec-job-kube-annotations"], 119 | "CHECK_JOB_KUBE_ANNOTATIONS": config["check-job-kube-annotations"], 120 | "DISCOVER_JOB_KUBE_ANNOTATIONS": config["discover-job-kube-annotations"], 121 | # Logging config 122 | "WORKER_LOGS_STORAGE_TYPE": config["storage-type"].value, 123 | "WORKER_STATE_STORAGE_TYPE": config["storage-type"].value, 124 | "STORAGE_TYPE": config["storage-type"].value, 125 | "STORAGE_BUCKET_LOG": config["storage-bucket-logs"], 126 | "S3_LOG_BUCKET": config["storage-bucket-logs"], 127 | "STORAGE_BUCKET_STATE": config["storage-bucket-state"], 128 | "STORAGE_BUCKET_WORKLOAD_OUTPUT": config["storage-bucket-workload-output"], 129 | "STORAGE_BUCKET_ACTIVITY_PAYLOAD": config["storage-bucket-activity-payload"], 130 | # Database config 131 | "DATABASE_URL": db_url, 132 | "DATABASE_USER": db_conn["user"], 133 | "DATABASE_PASSWORD": db_conn["password"], 134 | "DATABASE_DB": db_name, 135 | "DATABASE_HOST": host, 136 | "DATABASE_PORT": port, 137 | "KEYCLOAK_DATABASE_URL": db_url + "?currentSchema=keycloak", 138 | "JOB_KUBE_SERVICEACCOUNT": app_name, 139 | "RUNNING_TTL_MINUTES": config["pod-running-ttl-minutes"], 140 | "SUCCEEDED_TTL_MINUTES": config["pod-successful-ttl-minutes"], 141 | "UNSUCCESSFUL_TTL_MINUTES": config["pod-unsuccessful-ttl-minutes"], 142 | "INTERNAL_API_HOST": f"http://{app_name}:{INTERNAL_API_PORT}", 143 | "AIRBYTE_SERVER_HOST": f"{app_name}:{INTERNAL_API_PORT}", 144 | "CONFIG_API_HOST": f"{app_name}:{INTERNAL_API_PORT}", 145 | "CONNECTOR_BUILDER_SERVER_API_HOST": f"{app_name}:{CONNECTOR_BUILDER_SERVER_API_PORT}", 146 | "CONNECTOR_BUILDER_API_HOST": f"{app_name}:{CONNECTOR_BUILDER_SERVER_API_PORT}", 147 | "AIRBYTE_API_HOST": f"{app_name}:{AIRBYTE_API_PORT}/api/public", 148 | "WORKLOAD_API_HOST": f"{app_name}:{WORKLOAD_API_PORT}", 149 | "WORKLOAD_API_BEARER_TOKEN": ".Values.workload-api.bearerToken", 150 | "CONTROL_PLANE_TOKEN_ENDPOINT": f"http://{app_name}:{INTERNAL_API_PORT}/api/v1/dataplanes/token", 151 | } 152 | 153 | # https://github.com/airbytehq/airbyte/issues/29506#issuecomment-1775148609 154 | if container_name in ["airbyte-workload-launcher", "airbyte-workers", "airbyte-cron"]: 155 | env.update( 156 | { 157 | "INTERNAL_API_HOST": f"http://{app_name}:{INTERNAL_API_PORT}", 158 | "WORKLOAD_API_HOST": f"http://{app_name}:{WORKLOAD_API_PORT}", 159 | } 160 | ) 161 | 162 | if config["storage-type"].value == StorageType.minio and state.minio: 163 | minio_endpoint = construct_svc_endpoint( 164 | state.minio["service"], 165 | state.minio["namespace"], 166 | state.minio["port"], 167 | state.minio["secure"], 168 | ) 169 | env.update( 170 | { 171 | "MINIO_ENDPOINT": minio_endpoint, 172 | "AWS_ACCESS_KEY_ID": state.minio["access-key"], 173 | "AWS_SECRET_ACCESS_KEY": state.minio["secret-key"], 174 | "STATE_STORAGE_MINIO_ENDPOINT": minio_endpoint, 175 | "STATE_STORAGE_MINIO_ACCESS_KEY": state.minio["access-key"], 176 | "STATE_STORAGE_MINIO_SECRET_ACCESS_KEY": state.minio["secret-key"], 177 | "STATE_STORAGE_MINIO_BUCKET_NAME": config["storage-bucket-state"], 178 | "S3_PATH_STYLE_ACCESS": "true", 179 | } 180 | ) 181 | 182 | if config["storage-type"].value == StorageType.s3 and state.s3: 183 | env.update( 184 | { 185 | "AWS_ACCESS_KEY_ID": state.s3["access-key"], 186 | "AWS_SECRET_ACCESS_KEY": state.s3["secret-key"], 187 | "S3_LOG_BUCKET_REGION": state.s3["region"], 188 | "AWS_DEFAULT_REGION": state.s3["region"], 189 | } 190 | ) 191 | 192 | http_proxy = os.environ.get("JUJU_CHARM_HTTP_PROXY") 193 | https_proxy = os.environ.get("JUJU_CHARM_HTTPS_PROXY") 194 | no_proxy = os.environ.get("JUJU_CHARM_NO_PROXY") 195 | java_tool_options = _get_java_tool_options(http_proxy, https_proxy, no_proxy) 196 | 197 | if http_proxy: 198 | env.update( 199 | { 200 | "HTTP_PROXY": http_proxy, 201 | "http_proxy": http_proxy, 202 | "JAVA_TOOL_OPTIONS": java_tool_options, 203 | "JOB_DEFAULT_ENV_http_proxy": http_proxy, 204 | "JOB_DEFAULT_ENV_HTTP_PROXY": http_proxy, 205 | "JOB_DEFAULT_ENV_JAVA_TOOL_OPTIONS": java_tool_options, 206 | } 207 | ) 208 | 209 | if https_proxy: 210 | env.update( 211 | { 212 | "HTTPS_PROXY": https_proxy, 213 | "https_proxy": https_proxy, 214 | "JOB_DEFAULT_ENV_https_proxy": https_proxy, 215 | "JOB_DEFAULT_ENV_HTTPS_PROXY": https_proxy, 216 | } 217 | ) 218 | 219 | if no_proxy: 220 | env.update( 221 | { 222 | "NO_PROXY": no_proxy, 223 | "no_proxy": no_proxy, 224 | "JOB_DEFAULT_ENV_no_proxy": no_proxy, 225 | "JOB_DEFAULT_ENV_NO_PROXY": no_proxy, 226 | } 227 | ) 228 | 229 | return env 230 | 231 | 232 | def _get_java_tool_options(http_proxy, https_proxy, no_proxy): 233 | """Generate Java tool options for configuring HTTP and HTTPS proxies. 234 | 235 | Args: 236 | http_proxy: The HTTP proxy URL. 237 | https_proxy: The HTTPS proxy URL. 238 | no_proxy: A comma-separated string of hosts that should bypass the proxy. 239 | 240 | Returns: 241 | A string of Java tool options for the provided proxy settings. 242 | 243 | Raises: 244 | ValueError: If any provided proxy URL is invalid or cannot be parsed. 245 | """ 246 | options = "" 247 | try: 248 | if http_proxy: 249 | _, host, port = _split_url(http_proxy) 250 | options += f"-Dhttp.proxyHost={host} -Dhttp.proxyPort={port}" 251 | if https_proxy: 252 | _, host, port = _split_url(https_proxy) 253 | options += f" -Dhttps.proxyHost={host} -Dhttps.proxyPort={port}" 254 | if no_proxy: 255 | options += f" -Dhttp.nonProxyHosts={no_proxy.replace(',', '|')}" 256 | except Exception as e: 257 | raise ValueError(f"Invalid proxy URL: {e}") from e 258 | 259 | return options 260 | 261 | 262 | def _split_url(url): 263 | """Split the given URL into its components: protocol, host, and port. 264 | 265 | Args: 266 | url: The URL to be split. 267 | 268 | Returns: 269 | tuple: A tuple containing the protocol, host, and port. 270 | 271 | Raises: 272 | ValueError: If the URL is invalid or cannot be parsed. 273 | """ 274 | try: 275 | parsed_url = urlparse(url) 276 | protocol = parsed_url.scheme 277 | host = parsed_url.hostname 278 | port = parsed_url.port 279 | 280 | if not protocol or not host: 281 | raise ValueError("Invalid URL: Protocol or host is missing") 282 | 283 | return protocol, host, port 284 | except Exception as e: 285 | raise ValueError(f"Invalid URL: {e}") from e 286 | 287 | 288 | def construct_svc_endpoint(service_name, namespace, port, secure=False): 289 | """Construct the endpoint for a Kubernetes service. 290 | 291 | Args: 292 | service_name (str): The name of the Kubernetes service. 293 | namespace (str): The namespace of the Kubernetes service. 294 | port (int): The port number of the Kubernetes service. 295 | secure (bool): Whether to use HTTPS (true) or HTTP (false). 296 | 297 | Returns: 298 | str: The constructed S3 service endpoint. 299 | """ 300 | protocol = "https" if secure else "http" 301 | return f"{protocol}://{service_name}.{namespace}.svc.cluster.local:{port}" 302 | -------------------------------------------------------------------------------- /charmcraft.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Canonical Ltd. 2 | # See LICENSE file for licensing details. 3 | 4 | # This file configures Charmcraft. 5 | # See https://juju.is/docs/sdk/charmcraft-config for guidance. 6 | 7 | name: airbyte-k8s 8 | type: charm 9 | title: Airbyte Server 10 | summary: Airbyte Server operator 11 | description: | 12 | Airbyte is an open-source data integration platform designed to centralize and 13 | streamline the process of extracting and loading data from various sources into 14 | data warehouses, lakes, or other destinations. 15 | 16 | links: 17 | documentation: https://discourse.charmhub.io/t/charmed-airbyte-k8s-overview/14530 18 | source: 19 | - https://github.com/canonical/airbyte-k8s-operator 20 | issues: 21 | - https://github.com/canonical/airbyte-k8s-operator/issues 22 | 23 | # (Required for 'charm' type) 24 | bases: 25 | - build-on: 26 | - name: ubuntu 27 | channel: "22.04" 28 | run-on: 29 | - name: ubuntu 30 | channel: "22.04" 31 | 32 | assumes: 33 | - juju >= 3.1 34 | - k8s-api 35 | 36 | # Metadata 37 | peers: 38 | airbyte-peer: 39 | interface: airbyte 40 | 41 | requires: 42 | db: 43 | interface: postgresql_client 44 | limit: 1 45 | 46 | object-storage: 47 | interface: object-storage 48 | limit: 1 49 | schema: 50 | v1: 51 | provides: 52 | type: object 53 | properties: 54 | access-key: 55 | type: string 56 | namespace: 57 | type: 58 | - string 59 | - 'null' 60 | port: 61 | type: number 62 | secret-key: 63 | type: string 64 | secure: 65 | type: boolean 66 | service: 67 | type: string 68 | required: 69 | - access-key 70 | - port 71 | - secret-key 72 | - secure 73 | - service 74 | versions: [v1] 75 | __schema_source: https://raw.githubusercontent.com/canonical/operator-schemas/master/object-storage.yaml 76 | 77 | s3-parameters: 78 | interface: s3 79 | limit: 1 80 | optional: true 81 | 82 | provides: 83 | airbyte-server: 84 | interface: airbyte-server 85 | optional: true 86 | limit: 1 87 | 88 | # (Optional) Configuration options for the charm 89 | # This config section defines charm config options, and populates the Configure 90 | # tab on Charmhub. 91 | # More information on this section at https://juju.is/docs/sdk/charmcraft-yaml#heading--config 92 | # General configuration documentation: https://juju.is/docs/sdk/config 93 | config: 94 | options: 95 | log-level: 96 | description: | 97 | Configures the log level. 98 | 99 | Acceptable values are: "INFO", "DEBUG", "WARNING", "ERROR" and "FATAL" 100 | default: "INFO" 101 | type: string 102 | 103 | ##### Airbyte services config ##### 104 | temporal-host: 105 | description: Temporal server host. 106 | default: "temporal-k8s:7233" 107 | type: string 108 | 109 | webapp-url: 110 | description: URL for the Airbyte webapp. 111 | default: "http://airbyte-ui-k8s:8080" 112 | type: string 113 | 114 | ##### Secrets config ##### 115 | secret-persistence: 116 | description: | 117 | Defines the Secret Persistence type. 118 | 119 | If set, acceptable values are: "GOOGLE_SECRET_MANAGER", "AWS_SECRET_MANAGER", "TESTING_CONFIG_DB_TABLE", "VAULT" 120 | type: string 121 | 122 | secret-store-gcp-project-id: 123 | description: Defines the GCP Project to store secrets in. Alpha support. 124 | type: string 125 | 126 | secret-store-gcp-credentials: 127 | description: | 128 | Defines the JSON credentials used to read/write Airbyte Configuration to Google Secret Manager. 129 | 130 | These credentials must have Secret Manager Read/Write access. Alpha support. 131 | type: string 132 | 133 | vault-address: 134 | description: Defines the vault address to read/write Airbyte Configuration to Hashicorp Vault. Alpha Support. 135 | type: string 136 | 137 | vault-prefix: 138 | description: Defines the vault path prefix. Empty by default. Alpha Support. 139 | type: string 140 | 141 | vault-auth-token: 142 | description: The token used for vault authentication. Alpha Support. 143 | type: string 144 | 145 | vault-auth-method: 146 | description: How vault will perform authentication. Currently, only supports Token auth. Defaults to "token". Alpha Support. 147 | default: "token" 148 | type: string 149 | 150 | aws-access-key: 151 | description: Defines the aws_access_key_id from the AWS credentials to use for AWS Secret Manager. 152 | type: string 153 | 154 | aws-secret-access-key: 155 | description: Defines aws_secret_access_key to use for the AWS Secret Manager. 156 | type: string 157 | 158 | aws-kms-key-arn: 159 | description: Optional param that defines the KMS Encryption key used for the AWS Secret Manager. 160 | type: string 161 | 162 | aws-secret-manager-secret-tags: 163 | description: | 164 | Defines the tags that will be included to all writes to the AWS Secret Manager. 165 | 166 | The format should be "key1=value1,key2=value2". 167 | type: string 168 | 169 | ##### Jobs config ##### 170 | sync-job-retries-complete-failures-max-successive: 171 | description: Max number of successive attempts in which no data was synchronized before failing the job. 172 | default: 5 173 | type: int 174 | 175 | sync-job-retries-complete-failures-max-total: 176 | description: Max number of attempts in which no data was synchronized before failing the job. 177 | default: 10 178 | type: int 179 | 180 | sync-job-retries-complete-failures-backoff-min-interval-s: 181 | description: Minimum backoff interval in seconds between failed attempts in which no data was synchronized. 182 | default: 10 183 | type: int 184 | 185 | sync-job-retries-complete-failures-backoff-max-interval-s: 186 | description: Maximum backoff interval in seconds between failed attempts in which no data was synchronized. 187 | default: 1800 188 | type: int 189 | 190 | sync-job-retries-complete-failures-backoff-base: 191 | description: Exponential base of the backoff interval between failed attempts in which no data was synchronized. 192 | default: 3 193 | type: int 194 | 195 | sync-job-retries-partial-failures-max-successive: 196 | description: Max number of successive attempts in which some data was synchronized before failing the job. 197 | default: 1000 198 | type: int 199 | 200 | sync-job-retries-partial-failures-max-total: 201 | description: Max number of attempts in which some data was synchronized before failing the job. 202 | default: 20 203 | type: int 204 | 205 | sync-job-max-timeout-days: 206 | description: Number of days a sync job will execute for before timing out. 207 | default: 3 208 | type: int 209 | 210 | job-main-container-cpu-request: 211 | description: Job container's minimum CPU usage. Defaults to none. 212 | type: string 213 | 214 | job-main-container-cpu-limit: 215 | description: Job container's maximum CPU usage. Defaults to none. 216 | type: string 217 | 218 | job-main-container-memory-request: 219 | description: Job container's minimum RAM usage. Defaults to none. 220 | type: string 221 | 222 | job-main-container-memory-limit: 223 | description: Job container's maximum RAM usage. Defaults to none. 224 | type: string 225 | 226 | ##### Connections config ##### 227 | max-fields-per-connections: 228 | description: Maximum number of fields able to be selected for a single connection. 229 | default: 20000 230 | type: int 231 | 232 | max-days-of-only-failed-jobs-before-connection-disable: 233 | description: Number of consecuative days of only failed jobs before the connection is disabled. 234 | default: 14 235 | type: int 236 | 237 | max-failed-jobs-in-a-row-before-connection-disable: 238 | description: Number of consecuative failed jobs before the connection is disabled. 239 | default: 20 240 | type: int 241 | 242 | ##### Worker config ##### 243 | max-spec-workers: 244 | description: Maximum number of Spec workers each Airbyte Worker container can support. Defaults to 5. 245 | default: 5 246 | type: int 247 | 248 | max-check-workers: 249 | description: Maximum number of Check workers each Airbyte Worker container can support. Defaults to 5. 250 | default: 5 251 | type: int 252 | 253 | max-sync-workers: 254 | description: Maximum number of Sync workers each Airbyte Worker container can support. Defaults to 5. 255 | default: 5 256 | type: int 257 | 258 | max-discover-workers: 259 | description: Maximum number of Discover workers each Airbyte Worker container can support. Defaults to 5. 260 | default: 5 261 | type: int 262 | 263 | ##### Data retention config ##### 264 | temporal-history-retention-in-days: 265 | description: Retention period of the job history in Temporal, defaults to 30 days. 266 | default: 30 267 | type: int 268 | 269 | ##### Kubernetes config ##### 270 | job-kube-tolerations: 271 | description: | 272 | Defines one or more Job pod tolerations. 273 | 274 | Tolerations are separated by ';'. Each toleration contains k=v pairs mentioning some/all 275 | of key, effect, operator and value and separated by ','. 276 | type: string 277 | 278 | job-kube-node-selectors: 279 | description: | 280 | Defines one or more Job pod node selectors. 281 | 282 | Each k=v pair is separated by a ','. For example: key1=value1,key2=value2. 283 | It is the pod node selectors of the "sync" job. It also serves as the 284 | default pod node selectors fallback for other jobs. 285 | type: string 286 | 287 | job-kube-annotations: 288 | description: | 289 | Defines one or more Job pod annotations. 290 | 291 | Each k=v pair is separated by a ','. For example: key1=value1,key2=value2. 292 | It is the pod annotations of the "sync" job. It also serves as the 293 | default pod annotations fallback for other jobs. 294 | type: string 295 | 296 | job-kube-main-container-image-pull-policy: 297 | description: Defines the Job pod connector image pull policy. 298 | default: "IfNotPresent" 299 | type: string 300 | 301 | job-kube-main-container-image-pull-secret: 302 | description: Defines the Job pod connector image pull secret. Useful when hosting private images. 303 | type: string 304 | 305 | job-kube-sidecar-container-image-pull-policy: 306 | description: | 307 | Defines the image pull policy on the sidecar containers in the Job pod. 308 | 309 | Useful when there are cluster policies enforcing to always pull. 310 | default: "IfNotPresent" 311 | type: string 312 | 313 | job-kube-socat-image: 314 | description: Defines the Job pod socat image. e.g. alpine/socat:1.7.4.3-r0 315 | type: string 316 | 317 | job-kube-busybox-image: 318 | description: Defines the Job pod busybox image. e.g. busybox:1.28 319 | type: string 320 | 321 | job-kube-curl-image: 322 | description: Defines the Job pod curl image. e.g. curlimages/curl:7.83.1 323 | type: string 324 | 325 | job-kube-namespace: 326 | description: | 327 | Defines the Kubernetes namespace Job pods are created in. 328 | 329 | Defaults to the current namespace. 330 | type: string 331 | 332 | ##### Jobs config ##### 333 | spec-job-kube-node-selectors: 334 | description: | 335 | Defines one or more pod node selectors for the spec job. 336 | 337 | Each k=v pair is separated by a ','. For example: key1=value1,key2=value2. 338 | type: string 339 | 340 | check-job-kube-node-selectors: 341 | description: | 342 | Defines one or more pod node selectors for the check job. 343 | 344 | Each k=v pair is separated by a ','. For example: key1=value1,key2=value2. 345 | type: string 346 | 347 | discover-job-kube-node-selectors: 348 | description: | 349 | Defines one or more pod node selectors for the discover job. 350 | 351 | Each k=v pair is separated by a ','. For example: key1=value1,key2=value2. 352 | type: string 353 | 354 | spec-job-kube-annotations: 355 | description: | 356 | Defines one or more pod annotations for the spec job. 357 | 358 | Each k=v pair is separated by a ','. For example: key1=value1,key2=value2 359 | type: string 360 | 361 | check-job-kube-annotations: 362 | description: | 363 | Defines one or more pod annotations for the check job. 364 | 365 | Each k=v pair is separated by a ','. For example: key1=value1,key2=value2 366 | type: string 367 | 368 | discover-job-kube-annotations: 369 | description: | 370 | Defines one or more pod annotations for the discover job. 371 | 372 | Each k=v pair is separated by a ','. For example: key1=value1,key2=value2 373 | type: string 374 | 375 | ##### Logging config ##### 376 | storage-type: 377 | description: | 378 | Storage type for logs. 379 | 380 | Acceptable values are: "MINIO", "S3" (AWS) 381 | default: "MINIO" 382 | type: string 383 | 384 | storage-bucket-logs: 385 | description: Name of logs storage bucket. 386 | default: "airbyte-dev-logs" 387 | type: string 388 | 389 | logs-ttl: 390 | description: | 391 | Number of days until logs are purged from object storage. 392 | default: 30 393 | type: int 394 | 395 | storage-bucket-state: 396 | description: Name of state storage bucket. 397 | default: "airbyte-state-storage" 398 | type: string 399 | 400 | storage-bucket-activity-payload: 401 | description: Name of activity payload storage bucket. 402 | default: "airbyte-payload-storage" 403 | type: string 404 | 405 | storage-bucket-workload-output: 406 | description: Name of workload output storage bucket. 407 | default: "airbyte-state-storage" 408 | type: string 409 | 410 | ##### Miscellaneous config ##### 411 | pod-running-ttl-minutes: 412 | description: Number of minutes until a running job pod is removed. 413 | default: 240 414 | type: int 415 | 416 | pod-successful-ttl-minutes: 417 | description: Number of minutes until a successful job pod is removed. 418 | default: 30 419 | type: int 420 | 421 | pod-unsuccessful-ttl-minutes: 422 | description: Number of minutes until an unsuccessful job pod is removed. 423 | default: 1440 424 | type: int 425 | 426 | heartbeat-max-seconds-between-messages: 427 | description: | 428 | Maximum seconds allowed between heartbeat messages. 429 | type: int 430 | 431 | heartbeat-fail-sync: 432 | description: | 433 | If set to true, the platform will fail the sync when a heartbeat timeout is detected. 434 | type: boolean 435 | 436 | destination-timeout-max-seconds: 437 | description: | 438 | Maximum seconds the platform waits before deciding that a destination has timed out. 439 | type: int 440 | 441 | destination-timeout-fail-sync: 442 | description: | 443 | If set to true and the platform detects a destination timeout, it will fail the sync. 444 | type: boolean 445 | 446 | # The containers and resources metadata apply to Kubernetes charms only. 447 | # See https://juju.is/docs/sdk/metadata-reference for a checklist and guidance. 448 | 449 | # Your workload’s containers. 450 | containers: 451 | airbyte-workload-api-server: 452 | resource: airbyte-image 453 | airbyte-workload-launcher: 454 | resource: airbyte-image 455 | airbyte-bootloader: 456 | resource: airbyte-image 457 | airbyte-connector-builder-server: 458 | resource: airbyte-image 459 | airbyte-cron: 460 | resource: airbyte-image 461 | airbyte-pod-sweeper: 462 | resource: airbyte-image 463 | airbyte-server: 464 | resource: airbyte-image 465 | airbyte-workers: 466 | resource: airbyte-image 467 | 468 | # This field populates the Resources tab on Charmhub. 469 | resources: 470 | airbyte-image: 471 | type: oci-image 472 | description: OCI image for Airbyte 473 | -------------------------------------------------------------------------------- /src/charm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2024 Canonical Ltd. 3 | # See LICENSE file for licensing details. 4 | 5 | """Charm the application.""" 6 | import base64 7 | import hashlib 8 | import logging 9 | 10 | import kubernetes.client 11 | import ops 12 | from botocore.exceptions import ClientError 13 | from charms.data_platform_libs.v0.data_models import TypedCharmBase 14 | from charms.data_platform_libs.v0.database_requires import DatabaseRequires 15 | from charms.data_platform_libs.v0.s3 import S3Requirer 16 | from kubernetes.client.exceptions import ApiException 17 | from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus, WaitingStatus 18 | from ops.pebble import CheckStatus 19 | 20 | from charm_helpers import create_env 21 | from literals import ( 22 | AIRBYTE_API_PORT, 23 | AIRBYTE_AUTH_K8S_SECRET_NAME, 24 | AIRBYTE_VERSION, 25 | BUCKET_CONFIGS, 26 | CONNECTOR_BUILDER_SERVER_API_PORT, 27 | CONTAINER_HEALTH_CHECK_MAP, 28 | FLAGS_FILE_PATH, 29 | INTERNAL_API_PORT, 30 | LOGS_BUCKET_CONFIG, 31 | REQUIRED_S3_PARAMETERS, 32 | WORKLOAD_API_PORT, 33 | WORKLOAD_LAUNCHER_PORT, 34 | ) 35 | from log import log_event_handler 36 | from relations.airbyte_ui import AirbyteServerProvider 37 | from relations.minio import MinioRelation 38 | from relations.postgresql import PostgresqlRelation 39 | from relations.s3 import S3Integrator 40 | from s3_helpers import S3Client 41 | from state import State 42 | from structured_config import CharmConfig, StorageType 43 | from utils import render_template, use_feature_flags 44 | 45 | logger = logging.getLogger(__name__) 46 | 47 | 48 | def get_pebble_layer(application_name, context): 49 | """Create pebble layer based on application. 50 | 51 | Args: 52 | application_name: Name of Airbyte application. 53 | context: environment to include with the pebble plan. 54 | 55 | Returns: 56 | pebble plan dict. 57 | """ 58 | pebble_layer = { 59 | "summary": "airbyte layer", 60 | "services": { 61 | application_name: { 62 | "summary": application_name, 63 | "command": f"/bin/bash {application_name}/airbyte-app/bin/{application_name}", 64 | "startup": "enabled", 65 | "override": "replace", 66 | # Including config values here so that a change in the 67 | # config forces replanning to restart the service. 68 | "environment": context, 69 | }, 70 | }, 71 | } 72 | 73 | if application_name == "airbyte-bootloader": 74 | pebble_layer["services"][application_name].update( 75 | { 76 | "on-success": "ignore", 77 | } 78 | ) 79 | 80 | application_info = CONTAINER_HEALTH_CHECK_MAP[application_name] 81 | if application_info is not None: 82 | pebble_layer["services"][application_name].update( 83 | { 84 | "on-check-failure": {"up": "ignore"}, 85 | } 86 | ) 87 | pebble_layer.update( 88 | { 89 | "checks": { 90 | "up": { 91 | "override": "replace", 92 | "period": "10s", 93 | "http": { 94 | "url": f"http://localhost:{application_info['port']}{application_info['health_endpoint']}" 95 | }, 96 | } 97 | } 98 | } 99 | ) 100 | 101 | return pebble_layer 102 | 103 | 104 | class AirbyteK8SOperatorCharm(TypedCharmBase[CharmConfig]): 105 | """Airbyte Server charm. 106 | 107 | Attrs: 108 | _state: used to store data that is persisted across invocations. 109 | config_type: the charm structured config 110 | """ 111 | 112 | config_type = CharmConfig 113 | 114 | def __init__(self, *args): 115 | """Construct. 116 | 117 | Args: 118 | args: Ignore. 119 | """ 120 | super().__init__(*args) 121 | kubernetes.config.load_incluster_config() 122 | self._k8s_client = kubernetes.client.CoreV1Api() 123 | 124 | self._state = State(self.app, lambda: self.model.get_relation("airbyte-peer")) 125 | 126 | self.framework.observe(self.on.config_changed, self._on_config_changed) 127 | self.framework.observe(self.on.airbyte_peer_relation_changed, self._on_peer_relation_changed) 128 | self.framework.observe(self.on.update_status, self._on_update_status) 129 | 130 | # Handle postgresql relation. 131 | self.db = DatabaseRequires(self, relation_name="db", database_name="airbyte-k8s_db", extra_user_roles="admin") 132 | self.postgresql = PostgresqlRelation(self) 133 | 134 | self.minio = MinioRelation(self) 135 | 136 | # Handle S3 integrator relation 137 | self.s3_client = S3Requirer(self, "s3-parameters") 138 | self.s3_relation = S3Integrator(self) 139 | 140 | # Handle UI relation 141 | self.airbyte_ui = AirbyteServerProvider(self) 142 | 143 | for container_name in CONTAINER_HEALTH_CHECK_MAP: 144 | self.framework.observe(self.on[container_name].pebble_ready, self._on_pebble_ready) 145 | 146 | @log_event_handler(logger) 147 | def _on_pebble_ready(self, event: ops.PebbleReadyEvent): 148 | """Handle pebble ready event. 149 | 150 | Args: 151 | event: The event triggered. 152 | """ 153 | self._update(event) 154 | 155 | @log_event_handler(logger) 156 | def _on_peer_relation_changed(self, event): 157 | """Handle peer relation changed event. 158 | 159 | Args: 160 | event: The event triggered when the relation changed. 161 | """ 162 | self._update(event) 163 | 164 | @log_event_handler(logger) 165 | def _on_update_status(self, event): 166 | """Handle `update-status` events. 167 | 168 | Args: 169 | event: The `update-status` event triggered at intervals. 170 | """ 171 | try: 172 | self._validate() 173 | except ValueError: 174 | return 175 | 176 | all_valid_plans = True 177 | for container_name, settings in CONTAINER_HEALTH_CHECK_MAP.items(): 178 | if not settings: 179 | continue 180 | 181 | container = self.unit.get_container(container_name) 182 | valid_pebble_plan = self._validate_pebble_plan(container, container_name) 183 | logger.info(f"validating pebble plan for {container_name}") 184 | if not valid_pebble_plan: 185 | logger.debug(f"failed to validate pebble plan for {container_name}, attempting creation again") 186 | all_valid_plans = False 187 | continue 188 | 189 | logger.info(f"performing up check for {container_name}") 190 | check = container.get_check("up") 191 | if check.status != CheckStatus.UP: 192 | logger.error(f"check failed for {container_name}") 193 | self.unit.status = MaintenanceStatus(f"Status check: {container_name!r} DOWN") 194 | return 195 | 196 | if not all_valid_plans: 197 | self._update(event) 198 | return 199 | 200 | self.unit.set_workload_version(f"v{AIRBYTE_VERSION}") 201 | self.unit.status = ActiveStatus() 202 | if self.unit.is_leader(): 203 | self.airbyte_ui._provide_server_status() 204 | 205 | def _validate_pebble_plan(self, container, container_name): 206 | """Validate pebble plan. 207 | 208 | Args: 209 | container: application container 210 | container_name: name of container to check 211 | 212 | Returns: 213 | bool of pebble plan validity 214 | """ 215 | try: 216 | plan = container.get_plan().to_dict() 217 | return bool(plan["services"][container_name]["on-check-failure"]) 218 | except (KeyError, ops.pebble.ConnectionError): 219 | return False 220 | 221 | @log_event_handler(logger) 222 | def _on_config_changed(self, event): 223 | """Handle changed configuration. 224 | 225 | Args: 226 | event: The event triggered when the relation changed. 227 | """ 228 | self.unit.status = WaitingStatus("configuring application") 229 | self._update(event) 230 | 231 | def _check_missing_params(self, params, required_params): 232 | """Validate that all required properties were extracted. 233 | 234 | Args: 235 | params: dictionary of parameters extracted from relation. 236 | required_params: list of required parameters. 237 | 238 | Returns: 239 | list: List of required parameters that are not set in state. 240 | """ 241 | missing_params = [] 242 | for key in required_params: 243 | if params.get(key) is None: 244 | missing_params.append(key) 245 | return missing_params 246 | 247 | def _generate_flags_yaml_content(self): 248 | """Generate flags.yaml content from opinionated config using Jinja2 template. 249 | 250 | Returns: 251 | str or None: The flags.yaml content as a string, or None if no flags are configured. 252 | """ 253 | # Check if any flags are configured 254 | if not use_feature_flags(self.config): 255 | return None 256 | 257 | # Prepare template context 258 | context = { 259 | "heartbeat_max_seconds_between_messages": self.config["heartbeat-max-seconds-between-messages"], 260 | "heartbeat_fail_sync": self.config["heartbeat-fail-sync"], 261 | "destination_timeout_max_seconds": self.config["destination-timeout-max-seconds"], 262 | "destination_timeout_fail_sync": self.config["destination-timeout-fail-sync"], 263 | } 264 | 265 | # Render template 266 | return render_template("flags.jinja", context) 267 | 268 | def _push_flags_to_container(self, container, container_name, flags_yaml_content, env): 269 | """Push generated flags content to container if available. 270 | 271 | Args: 272 | container: The container to push flags to. 273 | container_name: Name of the container. 274 | flags_yaml_content: The flags YAML content to push, or None. 275 | env: Environment dictionary to update with flags hash. 276 | """ 277 | if not flags_yaml_content: 278 | return 279 | 280 | try: 281 | # Airbyte ConfigFileClient reads a file at FEATURE_FLAG_PATH. 282 | # We set FEATURE_FLAG_PATH=/flags 283 | # so write the YAML directly to the file path '/flags' (no extension). 284 | container.push(FLAGS_FILE_PATH, flags_yaml_content) 285 | logger.info("Pushed flags to %s at %s", container_name, FLAGS_FILE_PATH) 286 | except Exception as e: 287 | logger.error("Failed to push flags file to %s: %s", container_name, e) 288 | self.unit.status = BlockedStatus(f"failed to push flags file: {str(e)}") 289 | return 290 | 291 | def _add_flags_hash_to_env(self, flags_yaml_content, container_name, env): 292 | """Add a hash of flags content to env to force replan+restart when flags change. 293 | 294 | Args: 295 | flags_yaml_content: The flags YAML content to hash, or None. 296 | container_name: Name of the container. 297 | env: Environment dictionary to update with flags hash. 298 | """ 299 | if not flags_yaml_content: 300 | return 301 | 302 | try: 303 | flags_hash = hashlib.sha256(flags_yaml_content.encode("utf-8")).hexdigest() 304 | env.update({"FEATURE_FLAG_HASH": flags_hash}) 305 | except Exception as e: 306 | logger.warning("Failed to compute flags hash for %s: %s", container_name, e) 307 | 308 | def _validate(self): 309 | """Validate that configuration and relations are valid and ready. 310 | 311 | Raises: 312 | ValueError: in case of invalid configuration. 313 | """ 314 | # Validate peer relation 315 | if not self._state.is_ready(): 316 | raise ValueError("peer relation not ready") 317 | 318 | # Validate db relation 319 | if self._state.database_connection is None: 320 | raise ValueError("database relation not ready") 321 | 322 | # Validate minio relation 323 | if self.config["storage-type"] == StorageType.minio and self._state.minio is None: 324 | raise ValueError("minio relation not ready") 325 | 326 | # Validate S3 relation 327 | if self.config["storage-type"] == StorageType.s3 and self._state.s3 is None: 328 | raise ValueError("s3 relation not ready") 329 | 330 | # Validate S3 relation. 331 | if self._state.s3: 332 | missing_params = self._check_missing_params(self._state.s3, REQUIRED_S3_PARAMETERS) 333 | if len(missing_params) > 0: 334 | raise ValueError(f"s3:missing parameters {missing_params!r}") 335 | 336 | def _update(self, event): # noqa: C901 337 | """Update configuration and replan its execution. 338 | 339 | Args: 340 | event: The event triggered when the relation changed. 341 | """ 342 | try: 343 | self._validate() 344 | except ValueError as err: 345 | self.unit.status = BlockedStatus(str(err)) 346 | return 347 | 348 | s3_parameters = self._state.s3 349 | if self.config["storage-type"] == StorageType.minio: 350 | s3_parameters = self._state.minio 351 | 352 | try: 353 | s3_client = S3Client(s3_parameters) 354 | 355 | for bucket_config in BUCKET_CONFIGS: 356 | bucket = self.config[bucket_config] 357 | s3_client.create_bucket_if_not_exists(bucket) 358 | 359 | logs_ttl = int(self.config["logs-ttl"]) 360 | s3_client.set_bucket_lifecycle_policy(bucket_name=self.config[LOGS_BUCKET_CONFIG], ttl=logs_ttl) 361 | except (ClientError, ValueError) as e: 362 | logger.error(f"Error creating bucket and setting lifecycle policy: {e}") 363 | self.unit.status = BlockedStatus(f"failed to create buckets: {str(e)}") 364 | return 365 | 366 | self.model.unit.set_ports( 367 | AIRBYTE_API_PORT, 368 | INTERNAL_API_PORT, 369 | CONNECTOR_BUILDER_SERVER_API_PORT, 370 | WORKLOAD_API_PORT, 371 | WORKLOAD_LAUNCHER_PORT, 372 | ) 373 | 374 | flags_yaml_content = self._generate_flags_yaml_content() 375 | 376 | for container_name in CONTAINER_HEALTH_CHECK_MAP: 377 | container = self.unit.get_container(container_name) 378 | if not container.can_connect(): 379 | event.defer() 380 | return 381 | 382 | env = create_env(self.model.name, self.app.name, container_name, self.config, self._state) 383 | env = {k: v for k, v in env.items() if v is not None} 384 | 385 | self._push_flags_to_container(container, container_name, flags_yaml_content, env) 386 | self._add_flags_hash_to_env(flags_yaml_content, container_name, env) 387 | 388 | # Read values from k8s secret created by airbyte-bootloader and add 389 | # them to the pebble plan. 390 | try: 391 | secret = self._k8s_client.read_namespaced_secret(AIRBYTE_AUTH_K8S_SECRET_NAME, self.model.name) 392 | decoded_data = {k: base64.b64decode(v).decode("utf-8") for k, v in secret.data.items()} 393 | 394 | if decoded_data.get("dataplane-client-id"): 395 | env.update({"DATAPLANE_CLIENT_ID": decoded_data["dataplane-client-id"]}) 396 | if decoded_data.get("dataplane-client-secret"): 397 | env.update({"DATAPLANE_CLIENT_SECRET": decoded_data["dataplane-client-secret"]}) 398 | 399 | except ApiException as e: 400 | if e.status == 404: 401 | logging.info( 402 | "Secret '%s' not found in namespace '%s'.", AIRBYTE_AUTH_K8S_SECRET_NAME, self.model.name 403 | ) 404 | else: 405 | logging.error("Error: %s", str(e)) 406 | 407 | pebble_layer = get_pebble_layer(container_name, env) 408 | container.add_layer(container_name, pebble_layer, combine=True) 409 | container.replan() 410 | 411 | self.unit.status = MaintenanceStatus("replanning application") 412 | 413 | 414 | if __name__ == "__main__": # pragma: nocover 415 | ops.main(AirbyteK8SOperatorCharm) # type: ignore 416 | -------------------------------------------------------------------------------- /lib/charms/data_platform_libs/v0/database_requires.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Canonical Ltd. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | r"""[DEPRECATED] Relation 'requires' side abstraction for database relation. 16 | 17 | This library is a uniform interface to a selection of common database 18 | metadata, with added custom events that add convenience to database management, 19 | and methods to consume the application related data. 20 | 21 | Following an example of using the DatabaseCreatedEvent, in the context of the 22 | application charm code: 23 | 24 | ```python 25 | 26 | from charms.data_platform_libs.v0.database_requires import ( 27 | DatabaseCreatedEvent, 28 | DatabaseRequires, 29 | ) 30 | 31 | class ApplicationCharm(CharmBase): 32 | # Application charm that connects to database charms. 33 | 34 | def __init__(self, *args): 35 | super().__init__(*args) 36 | 37 | # Charm events defined in the database requires charm library. 38 | self.database = DatabaseRequires(self, relation_name="database", database_name="database") 39 | self.framework.observe(self.database.on.database_created, self._on_database_created) 40 | 41 | def _on_database_created(self, event: DatabaseCreatedEvent) -> None: 42 | # Handle the created database 43 | 44 | # Create configuration file for app 45 | config_file = self._render_app_config_file( 46 | event.username, 47 | event.password, 48 | event.endpoints, 49 | ) 50 | 51 | # Start application with rendered configuration 52 | self._start_application(config_file) 53 | 54 | # Set active status 55 | self.unit.status = ActiveStatus("received database credentials") 56 | ``` 57 | 58 | As shown above, the library provides some custom events to handle specific situations, 59 | which are listed below: 60 | 61 | — database_created: event emitted when the requested database is created. 62 | — endpoints_changed: event emitted when the read/write endpoints of the database have changed. 63 | — read_only_endpoints_changed: event emitted when the read-only endpoints of the database 64 | have changed. Event is not triggered if read/write endpoints changed too. 65 | 66 | If it is needed to connect multiple database clusters to the same relation endpoint 67 | the application charm can implement the same code as if it would connect to only 68 | one database cluster (like the above code example). 69 | 70 | To differentiate multiple clusters connected to the same relation endpoint 71 | the application charm can use the name of the remote application: 72 | 73 | ```python 74 | 75 | def _on_database_created(self, event: DatabaseCreatedEvent) -> None: 76 | # Get the remote app name of the cluster that triggered this event 77 | cluster = event.relation.app.name 78 | ``` 79 | 80 | It is also possible to provide an alias for each different database cluster/relation. 81 | 82 | So, it is possible to differentiate the clusters in two ways. 83 | The first is to use the remote application name, i.e., `event.relation.app.name`, as above. 84 | 85 | The second way is to use different event handlers to handle each cluster events. 86 | The implementation would be something like the following code: 87 | 88 | ```python 89 | 90 | from charms.data_platform_libs.v0.database_requires import ( 91 | DatabaseCreatedEvent, 92 | DatabaseRequires, 93 | ) 94 | 95 | class ApplicationCharm(CharmBase): 96 | # Application charm that connects to database charms. 97 | 98 | def __init__(self, *args): 99 | super().__init__(*args) 100 | 101 | # Define the cluster aliases and one handler for each cluster database created event. 102 | self.database = DatabaseRequires( 103 | self, 104 | relation_name="database", 105 | database_name="database", 106 | relations_aliases = ["cluster1", "cluster2"], 107 | ) 108 | self.framework.observe( 109 | self.database.on.cluster1_database_created, self._on_cluster1_database_created 110 | ) 111 | self.framework.observe( 112 | self.database.on.cluster2_database_created, self._on_cluster2_database_created 113 | ) 114 | 115 | def _on_cluster1_database_created(self, event: DatabaseCreatedEvent) -> None: 116 | # Handle the created database on the cluster named cluster1 117 | 118 | # Create configuration file for app 119 | config_file = self._render_app_config_file( 120 | event.username, 121 | event.password, 122 | event.endpoints, 123 | ) 124 | ... 125 | 126 | def _on_cluster2_database_created(self, event: DatabaseCreatedEvent) -> None: 127 | # Handle the created database on the cluster named cluster2 128 | 129 | # Create configuration file for app 130 | config_file = self._render_app_config_file( 131 | event.username, 132 | event.password, 133 | event.endpoints, 134 | ) 135 | ... 136 | 137 | ``` 138 | """ 139 | 140 | import json 141 | import logging 142 | from collections import namedtuple 143 | from datetime import datetime 144 | from typing import List, Optional 145 | 146 | from ops.charm import ( 147 | CharmEvents, 148 | RelationChangedEvent, 149 | RelationEvent, 150 | RelationJoinedEvent, 151 | ) 152 | from ops.framework import EventSource, Object 153 | from ops.model import Relation 154 | 155 | # The unique Charmhub library identifier, never change it 156 | LIBID = "0241e088ffa9440fb4e3126349b2fb62" 157 | 158 | # Increment this major API version when introducing breaking changes 159 | LIBAPI = 0 160 | 161 | # Increment this PATCH version before using `charmcraft publish-lib` or reset 162 | # to 0 if you are raising the major API version. 163 | LIBPATCH = 6 164 | 165 | logger = logging.getLogger(__name__) 166 | 167 | 168 | class DatabaseEvent(RelationEvent): 169 | """Base class for database events.""" 170 | 171 | @property 172 | def endpoints(self) -> Optional[str]: 173 | """Returns a comma separated list of read/write endpoints.""" 174 | if not self.relation.app: 175 | return None 176 | 177 | return self.relation.data[self.relation.app].get("endpoints") 178 | 179 | @property 180 | def password(self) -> Optional[str]: 181 | """Returns the password for the created user.""" 182 | if not self.relation.app: 183 | return None 184 | 185 | return self.relation.data[self.relation.app].get("password") 186 | 187 | @property 188 | def read_only_endpoints(self) -> Optional[str]: 189 | """Returns a comma separated list of read only endpoints.""" 190 | if not self.relation.app: 191 | return None 192 | 193 | return self.relation.data[self.relation.app].get("read-only-endpoints") 194 | 195 | @property 196 | def replset(self) -> Optional[str]: 197 | """Returns the replicaset name. 198 | 199 | MongoDB only. 200 | """ 201 | if not self.relation.app: 202 | return None 203 | 204 | return self.relation.data[self.relation.app].get("replset") 205 | 206 | @property 207 | def tls(self) -> Optional[str]: 208 | """Returns whether TLS is configured.""" 209 | if not self.relation.app: 210 | return None 211 | 212 | return self.relation.data[self.relation.app].get("tls") 213 | 214 | @property 215 | def tls_ca(self) -> Optional[str]: 216 | """Returns TLS CA.""" 217 | if not self.relation.app: 218 | return None 219 | 220 | return self.relation.data[self.relation.app].get("tls-ca") 221 | 222 | @property 223 | def uris(self) -> Optional[str]: 224 | """Returns the connection URIs. 225 | 226 | MongoDB, Redis, OpenSearch and Kafka only. 227 | """ 228 | if not self.relation.app: 229 | return None 230 | 231 | return self.relation.data[self.relation.app].get("uris") 232 | 233 | @property 234 | def username(self) -> Optional[str]: 235 | """Returns the created username.""" 236 | if not self.relation.app: 237 | return None 238 | 239 | return self.relation.data[self.relation.app].get("username") 240 | 241 | @property 242 | def version(self) -> Optional[str]: 243 | """Returns the version of the database. 244 | 245 | Version as informed by the database daemon. 246 | """ 247 | if not self.relation.app: 248 | return None 249 | 250 | return self.relation.data[self.relation.app].get("version") 251 | 252 | 253 | class DatabaseCreatedEvent(DatabaseEvent): 254 | """Event emitted when a new database is created for use on this relation.""" 255 | 256 | 257 | class DatabaseEndpointsChangedEvent(DatabaseEvent): 258 | """Event emitted when the read/write endpoints are changed.""" 259 | 260 | 261 | class DatabaseReadOnlyEndpointsChangedEvent(DatabaseEvent): 262 | """Event emitted when the read only endpoints are changed.""" 263 | 264 | 265 | class DatabaseEvents(CharmEvents): 266 | """Database events. 267 | 268 | This class defines the events that the database can emit. 269 | """ 270 | 271 | database_created = EventSource(DatabaseCreatedEvent) 272 | endpoints_changed = EventSource(DatabaseEndpointsChangedEvent) 273 | read_only_endpoints_changed = EventSource(DatabaseReadOnlyEndpointsChangedEvent) 274 | 275 | 276 | Diff = namedtuple("Diff", "added changed deleted") 277 | Diff.__doc__ = """ 278 | A tuple for storing the diff between two data mappings. 279 | 280 | — added — keys that were added. 281 | — changed — keys that still exist but have new values. 282 | — deleted — keys that were deleted. 283 | """ 284 | 285 | 286 | class DatabaseRequires(Object): 287 | """Requires-side of the database relation.""" 288 | 289 | on = DatabaseEvents() # pyright: ignore [reportGeneralTypeIssues] 290 | 291 | def __init__( 292 | self, 293 | charm, 294 | relation_name: str, 295 | database_name: str, 296 | extra_user_roles: Optional[str] = None, 297 | relations_aliases: Optional[List[str]] = None, 298 | ): 299 | """Manager of database client relations.""" 300 | super().__init__(charm, relation_name) 301 | self.charm = charm 302 | self.database = database_name 303 | self.extra_user_roles = extra_user_roles 304 | self.local_app = self.charm.model.app 305 | self.local_unit = self.charm.unit 306 | self.relation_name = relation_name 307 | self.relations_aliases = relations_aliases 308 | self.framework.observe( 309 | self.charm.on[relation_name].relation_joined, self._on_relation_joined_event 310 | ) 311 | self.framework.observe( 312 | self.charm.on[relation_name].relation_changed, self._on_relation_changed_event 313 | ) 314 | 315 | # Define custom event names for each alias. 316 | if relations_aliases: 317 | # Ensure the number of aliases does not exceed the maximum 318 | # of connections allowed in the specific relation. 319 | relation_connection_limit = self.charm.meta.requires[relation_name].limit 320 | if len(relations_aliases) != relation_connection_limit: 321 | raise ValueError( 322 | f"The number of aliases must match the maximum number of connections allowed in the relation. " 323 | f"Expected {relation_connection_limit}, got {len(relations_aliases)}" 324 | ) 325 | 326 | for relation_alias in relations_aliases: 327 | self.on.define_event(f"{relation_alias}_database_created", DatabaseCreatedEvent) 328 | self.on.define_event( 329 | f"{relation_alias}_endpoints_changed", DatabaseEndpointsChangedEvent 330 | ) 331 | self.on.define_event( 332 | f"{relation_alias}_read_only_endpoints_changed", 333 | DatabaseReadOnlyEndpointsChangedEvent, 334 | ) 335 | 336 | def _assign_relation_alias(self, relation_id: int) -> None: 337 | """Assigns an alias to a relation. 338 | 339 | This function writes in the unit data bag. 340 | 341 | Args: 342 | relation_id: the identifier for a particular relation. 343 | """ 344 | # If no aliases were provided, return immediately. 345 | if not self.relations_aliases: 346 | return 347 | 348 | # Return if an alias was already assigned to this relation 349 | # (like when there are more than one unit joining the relation). 350 | if ( 351 | self.charm.model.get_relation(self.relation_name, relation_id) 352 | .data[self.local_unit] 353 | .get("alias") 354 | ): 355 | return 356 | 357 | # Retrieve the available aliases (the ones that weren't assigned to any relation). 358 | available_aliases = self.relations_aliases[:] 359 | for relation in self.charm.model.relations[self.relation_name]: 360 | alias = relation.data[self.local_unit].get("alias") 361 | if alias: 362 | logger.debug("Alias %s was already assigned to relation %d", alias, relation.id) 363 | available_aliases.remove(alias) 364 | 365 | # Set the alias in the unit relation databag of the specific relation. 366 | relation = self.charm.model.get_relation(self.relation_name, relation_id) 367 | relation.data[self.local_unit].update({"alias": available_aliases[0]}) 368 | 369 | def _diff(self, event: RelationChangedEvent) -> Diff: 370 | """Retrieves the diff of the data in the relation changed databag. 371 | 372 | Args: 373 | event: relation changed event. 374 | 375 | Returns: 376 | a Diff instance containing the added, deleted and changed 377 | keys from the event relation databag. 378 | """ 379 | # Retrieve the old data from the data key in the local unit relation databag. 380 | old_data = json.loads(event.relation.data[self.local_unit].get("data", "{}")) 381 | # Retrieve the new data from the event relation databag. 382 | new_data = ( 383 | {key: value for key, value in event.relation.data[event.app].items() if key != "data"} 384 | if event.app 385 | else {} 386 | ) 387 | 388 | # These are the keys that were added to the databag and triggered this event. 389 | added = new_data.keys() - old_data.keys() 390 | # These are the keys that were removed from the databag and triggered this event. 391 | deleted = old_data.keys() - new_data.keys() 392 | # These are the keys that already existed in the databag, 393 | # but had their values changed. 394 | changed = { 395 | key for key in old_data.keys() & new_data.keys() if old_data[key] != new_data[key] 396 | } 397 | 398 | # TODO: evaluate the possibility of losing the diff if some error 399 | # happens in the charm before the diff is completely checked (DPE-412). 400 | # Convert the new_data to a serializable format and save it for a next diff check. 401 | event.relation.data[self.local_unit].update({"data": json.dumps(new_data)}) 402 | 403 | # Return the diff with all possible changes. 404 | return Diff(added, changed, deleted) 405 | 406 | def _emit_aliased_event(self, event: RelationChangedEvent, event_name: str) -> None: 407 | """Emit an aliased event to a particular relation if it has an alias. 408 | 409 | Args: 410 | event: the relation changed event that was received. 411 | event_name: the name of the event to emit. 412 | """ 413 | alias = self._get_relation_alias(event.relation.id) 414 | if alias: 415 | getattr(self.on, f"{alias}_{event_name}").emit( 416 | event.relation, app=event.app, unit=event.unit 417 | ) 418 | 419 | def _get_relation_alias(self, relation_id: int) -> Optional[str]: 420 | """Returns the relation alias. 421 | 422 | Args: 423 | relation_id: the identifier for a particular relation. 424 | 425 | Returns: 426 | the relation alias or None if the relation was not found. 427 | """ 428 | for relation in self.charm.model.relations[self.relation_name]: 429 | if relation.id == relation_id: 430 | return relation.data[self.local_unit].get("alias") 431 | return None 432 | 433 | def fetch_relation_data(self) -> dict: 434 | """Retrieves data from relation. 435 | 436 | This function can be used to retrieve data from a relation 437 | in the charm code when outside an event callback. 438 | 439 | Returns: 440 | a dict of the values stored in the relation data bag 441 | for all relation instances (indexed by the relation ID). 442 | """ 443 | data = {} 444 | for relation in self.relations: 445 | data[relation.id] = ( 446 | {key: value for key, value in relation.data[relation.app].items() if key != "data"} 447 | if relation.app 448 | else {} 449 | ) 450 | return data 451 | 452 | def _update_relation_data(self, relation_id: int, data: dict) -> None: 453 | """Updates a set of key-value pairs in the relation. 454 | 455 | This function writes in the application data bag, therefore, 456 | only the leader unit can call it. 457 | 458 | Args: 459 | relation_id: the identifier for a particular relation. 460 | data: dict containing the key-value pairs 461 | that should be updated in the relation. 462 | """ 463 | if self.local_unit.is_leader(): 464 | relation = self.charm.model.get_relation(self.relation_name, relation_id) 465 | relation.data[self.local_app].update(data) 466 | 467 | def _on_relation_joined_event(self, event: RelationJoinedEvent) -> None: 468 | """Event emitted when the application joins the database relation.""" 469 | # If relations aliases were provided, assign one to the relation. 470 | self._assign_relation_alias(event.relation.id) 471 | 472 | # Sets both database and extra user roles in the relation 473 | # if the roles are provided. Otherwise, sets only the database. 474 | if self.extra_user_roles: 475 | self._update_relation_data( 476 | event.relation.id, 477 | { 478 | "database": self.database, 479 | "extra-user-roles": self.extra_user_roles, 480 | }, 481 | ) 482 | else: 483 | self._update_relation_data(event.relation.id, {"database": self.database}) 484 | 485 | def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: 486 | """Event emitted when the database relation has changed.""" 487 | # Check which data has changed to emit customs events. 488 | diff = self._diff(event) 489 | 490 | # Check if the database is created 491 | # (the database charm shared the credentials). 492 | if "username" in diff.added and "password" in diff.added: 493 | # Emit the default event (the one without an alias). 494 | logger.info("database created at %s", datetime.now()) 495 | getattr(self.on, "database_created").emit( 496 | event.relation, app=event.app, unit=event.unit 497 | ) 498 | 499 | # Emit the aliased event (if any). 500 | self._emit_aliased_event(event, "database_created") 501 | 502 | # To avoid unnecessary application restarts do not trigger 503 | # “endpoints_changed“ event if “database_created“ is triggered. 504 | return 505 | 506 | # Emit an endpoints changed event if the database 507 | # added or changed this info in the relation databag. 508 | if "endpoints" in diff.added or "endpoints" in diff.changed: 509 | # Emit the default event (the one without an alias). 510 | logger.info("endpoints changed on %s", datetime.now()) 511 | getattr(self.on, "endpoints_changed").emit( 512 | event.relation, app=event.app, unit=event.unit 513 | ) 514 | 515 | # Emit the aliased event (if any). 516 | self._emit_aliased_event(event, "endpoints_changed") 517 | 518 | # To avoid unnecessary application restarts do not trigger 519 | # “read_only_endpoints_changed“ event if “endpoints_changed“ is triggered. 520 | return 521 | 522 | # Emit a read only endpoints changed event if the database 523 | # added or changed this info in the relation databag. 524 | if "read-only-endpoints" in diff.added or "read-only-endpoints" in diff.changed: 525 | # Emit the default event (the one without an alias). 526 | logger.info("read-only-endpoints changed on %s", datetime.now()) 527 | getattr(self.on, "read_only_endpoints_changed").emit( 528 | event.relation, app=event.app, unit=event.unit 529 | ) 530 | 531 | # Emit the aliased event (if any). 532 | self._emit_aliased_event(event, "read_only_endpoints_changed") 533 | 534 | @property 535 | def relations(self) -> List[Relation]: 536 | """The list of Relation instances associated with this relation_name.""" 537 | return list(self.charm.model.relations[self.relation_name]) 538 | --------------------------------------------------------------------------------