├── tests
    ├── __init__.py
    ├── conftest.py
    ├── fixtures
    │   ├── __init__.py
    │   └── kube.py
    ├── test_config.py
    ├── test_kube.py
    ├── test_snapshots.py
    ├── test_deltas.py
    └── test_volume_from_pvc.py
├── k8s_snapshots
    ├── __init__.py
    ├── logging.py
    ├── backends
    │   ├── __init__.py
    │   ├── abstract.py
    │   ├── digitalocean.py
    │   ├── aws.py
    │   └── google.py
    ├── serialize.py
    ├── context.py
    ├── events.py
    ├── config.py
    ├── errors.py
    ├── __main__.py
    ├── asyncutils.py
    ├── kube.py
    ├── rule.py
    ├── logconf.py
    ├── snapshot.py
    └── core.py
├── .github
    └── FUNDING.yml
├── .gitignore
├── examples
    ├── snapshotrule-volumeclaim.yml
    ├── snapshotrule-aws.yml
    ├── snapshotrule-google.yml
    └── backup-kops-etcd.yml
├── manifests
    ├── third-party-resource.yml
    ├── custom-resource-definition.yml
    └── rbac.yaml
├── docs
    ├── digitalocean.md
    ├── google-cloud.md
    └── aws.md
├── setup.py
├── Dockerfile
├── pyproject.toml
├── DEVELOPMENT.md
├── .circleci
    └── config.yml
├── LICENSE
├── CHANGES
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/k8s_snapshots/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | issuehunt: miracle2k/k8s-snapshots
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info/
2 | build/
3 | dist/
4 | .cache/
5 | .mypy_cache/
6 | .vscode
7 | 


--------------------------------------------------------------------------------
/examples/snapshotrule-volumeclaim.yml:
--------------------------------------------------------------------------------
1 | apiVersion: "k8s-snapshots.elsdoerfer.com/v1"
2 | kind: SnapshotRule
3 | metadata:
4 |   name: mysql
5 | spec:
6 |   deltas: P1D P30D
7 |   persistentVolumeClaim: my-mysql-disk
8 | 


--------------------------------------------------------------------------------
/manifests/third-party-resource.yml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: ThirdPartyResource
3 | metadata:
4 |   name: snapshot-rule.k8s-snapshots.elsdoerfer.com
5 | description: "Defines snapshot management rules for a disk."
6 | versions:
7 | - name: v1
8 | 


--------------------------------------------------------------------------------
/docs/digitalocean.md:
--------------------------------------------------------------------------------
1 | ### Configure access permissions on DigitalOcean
2 | 
3 | To create volume snapshots on DigitalOcean, you have to provide the
4 | `DIGITALOCEAN_ACCESS_TOKEN` env var.
5 | 
6 | Note that, DO limits the number of snapshots for a single volume to 25.
7 | 


--------------------------------------------------------------------------------
/examples/snapshotrule-aws.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: "k8s-snapshots.elsdoerfer.com/v1"
 2 | kind: SnapshotRule
 3 | metadata:
 4 |   name: mysql
 5 | spec:
 6 |   deltas: P1D P30D
 7 |   backend: aws
 8 |   disk:
 9 |      region: eu-west-1
10 |      volumeId: vol-0aa6f44aad0daf9f2
11 | 


--------------------------------------------------------------------------------
/examples/snapshotrule-google.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: "k8s-snapshots.elsdoerfer.com/v1"
 2 | kind: SnapshotRule
 3 | metadata:
 4 |   name: mysql
 5 | spec:
 6 |   deltas: P1D P30D
 7 |   backend: google
 8 |   disk:
 9 |      name: my-mysql-disk
10 |      zone: europe-west1-c
11 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='k8s-snapshots',
 5 |     packages=find_packages(exclude=['tests']),
 6 |     entry_points={
 7 |         'console_scripts': [
 8 |             'k8s-snapshots=k8s_snapshots.__main__:main'
 9 |         ]
10 |     }
11 | )
12 | 


--------------------------------------------------------------------------------
/k8s_snapshots/logging.py:
--------------------------------------------------------------------------------
 1 | import attr
 2 | 
 3 | 
 4 | class Loggable:
 5 |     def __structlog__(self):
 6 |         if attr.has(self.__class__):
 7 |             return attr.asdict(self)
 8 | 
 9 |         if hasattr(self, 'to_dict') and callable(self.to_dict):
10 |             return self.to_dict()
11 | 
12 |         return self
13 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from k8s_snapshots.logconf import configure_logging
 4 | 
 5 | 
 6 | @pytest.fixture(scope='session', autouse=True)
 7 | def configured_logging():
 8 |     configure_logging(
 9 |         level_name='DEBUG',
10 |         for_humans=True,
11 |     )
12 | 
13 | from .fixtures import *  # noqa
14 | from .fixtures.kube import *  # noqa
15 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9-alpine
 2 | 
 3 | ADD . /app
 4 | WORKDIR /app
 5 | RUN apk add --no-cache --virtual .build_deps gcc musl-dev libffi-dev
 6 | RUN pip3 install poetry
 7 | RUN poetry config virtualenvs.create false
 8 | RUN poetry install --no-dev
 9 | RUN apk del .build_deps gcc musl-dev libffi-dev
10 | 
11 | ENV TZ UTC
12 | 
13 | CMD ["python", "-m", "k8s_snapshots"]
14 | 


--------------------------------------------------------------------------------
/manifests/custom-resource-definition.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: apiextensions.k8s.io/v1beta1
 2 | kind: CustomResourceDefinition
 3 | metadata:
 4 |   name: snapshotrules.k8s-snapshots.elsdoerfer.com
 5 | spec:
 6 |   group: k8s-snapshots.elsdoerfer.com
 7 |   version: v1
 8 |   scope: Namespaced
 9 |   names:
10 |     plural: snapshotrules
11 |     singular: snapshotrule
12 |     kind: SnapshotRule
13 |     shortNames:
14 |     - sr
15 | 


--------------------------------------------------------------------------------
/examples/backup-kops-etcd.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: "k8s-snapshots.elsdoerfer.com/v1"
 2 | kind: SnapshotRule
 3 | metadata:
 4 |   name: etcd-main
 5 |   namespace: kube-system
 6 | spec:
 7 |   deltas: P1D P30D
 8 |   backend: aws
 9 |   disk:
10 |      region: eu-west-2
11 |      volumeId: vol-0c9f96dd263e10067
12 | 
13 | ---
14 | apiVersion: "k8s-snapshots.elsdoerfer.com/v1"
15 | kind: SnapshotRule
16 | metadata:
17 |   name: etcd-events
18 |   namespace: kube-system
19 | spec:
20 |   deltas: P1D P30D
21 |   backend: aws
22 |   disk:
23 |      region: eu-west-2
24 |      volumeId: vol-070121e34012404fd
25 | 


--------------------------------------------------------------------------------
/tests/fixtures/__init__.py:
--------------------------------------------------------------------------------
 1 | from unittest import mock
 2 | 
 3 | import pytest
 4 | 
 5 | from k8s_snapshots import errors
 6 | from k8s_snapshots.context import Context
 7 | from tests.fixtures.kube import make_resource, KUBE_CONFIG
 8 | 
 9 | 
10 | @pytest.fixture
11 | def fx_context(request):
12 |     request.getfixturevalue('fx_mock_context_kube_config')
13 |     request.getfixturevalue('fx_mock_context_kube_client')
14 |     ctx = Context({
15 |         'deltas_annotation_key': 'test.k8s-snapshots.example/deltas'
16 |     })
17 |     return ctx
18 | 
19 | 
20 | @pytest.fixture
21 | def fx_deltas(request):
22 |     return 'PT10S PT40S'
23 | 


--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import contextlib
 3 | import datetime
 4 | 
 5 | 
 6 | 
 7 | @contextlib.contextmanager
 8 | def set_env(**environ):
 9 |     """
10 |     Temporarily set the process environment variables.
11 | 
12 |     >>> with set_env(PLUGINS_DIR=u'test/plugins'):
13 |     ...   "PLUGINS_DIR" in os.environ
14 |     True
15 | 
16 |     >>> "PLUGINS_DIR" in os.environ
17 |     False
18 | 
19 |     :type environ: dict[str, unicode]
20 |     :param environ: Environment variables to set
21 |     """
22 |     old_environ = dict(os.environ)
23 |     os.environ.update(environ)
24 |     try:
25 |         yield
26 |     finally:
27 |         os.environ.clear()
28 |         os.environ.update(old_environ)
29 | 
30 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "k8s-snapshots"
 3 | version = "0.1.0"
 4 | description = ""
 5 | authors = ["Michael Elsdorfer <michael@elsdoerfer.com>"]
 6 | 
 7 | [tool.poetry.dependencies]
 8 | python = "^3.9"
 9 | google-api-python-client = "^1.6.2"
10 | requests = "^2.27.1"
11 | pykube = "^0.14.0"
12 | tarsnapper = "^0.4.0"
13 | aiochannel = "^1.0.1"
14 | structlog = "^21.5.0"
15 | attrs = "^17.3.0"
16 | pendulum = "^0.8.0"
17 | confcollect = "^0.2.3"
18 | isodate = "^0.6.1"
19 | python-dateutil = "^2.6.0"
20 | aiohttp = "^3.5.4"
21 | aiostream = "^0.4.4"
22 | boto3 = "^1.21.10"
23 | yarl = "^1.1.1"
24 | python-digitalocean = "^1.15.0"
25 | 
26 | [tool.poetry.dev-dependencies]
27 | 
28 | [build-system]
29 | requires = ["poetry-core>=1.0.0"]
30 | build-backend = "poetry.core.masonry.api"
31 | 


--------------------------------------------------------------------------------
/DEVELOPMENT.md:
--------------------------------------------------------------------------------
 1 | Development
 2 | ===========
 3 | 
 4 | For local development, you can still connect to an existing Google
 5 | Cloud Project and Kubernetes cluster using the config options
 6 | available. If you are lucky, your local workstation is already setup
 7 | the way you need it. If we can find credentials for Google Cloud
 8 | or Kubernetes, they will be used automatically.
 9 | 
10 | However, depending on the backend, you need to provide some options that
11 | otherwise would be read from the instance metadata:
12 | 
13 | 
14 | For AWS:
15 | 
16 |     $ AWS_REGION=eu-west-1 python -m k8s_snapshots
17 | 
18 | 
19 | For Google Cloud:
20 | 
21 |     $ GCLOUD_PROJECT=revolving-randy python -m k8s_snapshots
22 | 
23 | 
24 | ## Releasing a new version
25 | 
26 | - Update CHANGES.
27 | - Tag with a v-prefix, which will cause a tag on Docker hub.
28 | 
29 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | jobs:
 3 |   build:
 4 |     docker:
 5 |       - image: circleci/python:3.6.1
 6 | 
 7 |     working_directory: ~/k8s-snapshots
 8 | 
 9 |     steps:
10 |       - checkout
11 | 
12 |       - restore_cache:
13 |           keys:
14 |           - v1-dependencies-{{ checksum "requirements.txt" }}
15 |           - v1-dependencies-
16 | 
17 |       - run:
18 |           name: install dependencies
19 |           command: |
20 |             python3 -m venv venv
21 |             . venv/bin/activate
22 |             pip install -r requirements.txt
23 | 
24 |       - save_cache:
25 |           paths:
26 |             - ./venv
27 |           key: v1-dependencies-{{ checksum "requirements.txt" }}
28 | 
29 |       # run tests!
30 |       - run:
31 |           name: run tests
32 |           command: |
33 |             . venv/bin/activate
34 |             pip install pytest
35 |             py.test tests
36 | 


--------------------------------------------------------------------------------
/k8s_snapshots/backends/__init__.py:
--------------------------------------------------------------------------------
 1 | from importlib import import_module
 2 | import pykube.objects
 3 | from ..errors import ConfigurationError
 4 | 
 5 | 
 6 | BACKENDS = ['google', 'aws', 'digitalocean']
 7 | 
 8 | 
 9 | def get_backends():
10 |     for name in BACKENDS:
11 |         try:
12 |             backend = import_module('k8s_snapshots.backends.%s' % name)
13 |         except ImportError:
14 |             continue
15 |         yield name, backend
16 | 
17 | 
18 | def get_backend(name: str):
19 |     try:
20 |         return import_module('k8s_snapshots.backends.%s' % name)
21 |     except ImportError as e:
22 |         raise ConfigurationError(f'No such backed: "{name}"', error=e)
23 | 
24 | 
25 | def find_backend_for_volume(volume: pykube.objects.PersistentVolume):
26 |     """
27 |     See if we have a provider that supports this volume.
28 |     """
29 |     for name, backend in get_backends():
30 |         if backend.supports_volume(volume):
31 |             return name, backend
32 | 
33 |     return None, None
34 | 
35 | 


--------------------------------------------------------------------------------
/manifests/rbac.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ServiceAccount
 3 | metadata:
 4 |   labels:
 5 |     app: k8s-snapshots
 6 |   name: k8s-snapshots
 7 |   namespace: kube-system
 8 | ---
 9 | apiVersion: rbac.authorization.k8s.io/v1
10 | kind: ClusterRole
11 | metadata:
12 |   labels:
13 |     app: k8s-snapshots
14 |   name: k8s-snapshots
15 |   namespace: kube-system
16 | rules:
17 |   - apiGroups:
18 |       - "k8s-snapshots.elsdoerfer.com"
19 |     resources:
20 |       - snapshotrules
21 |     verbs:
22 |       - get
23 |       - list
24 |       - watch
25 |   - apiGroups:
26 |       - ""
27 |     resources:
28 |       - namespaces
29 |       - pods
30 |       - persistentvolumeclaims
31 |       - persistentvolumes
32 |     verbs:
33 |       - get
34 |       - list
35 |       - watch
36 | ---
37 | apiVersion: rbac.authorization.k8s.io/v1
38 | kind: ClusterRoleBinding
39 | metadata:
40 |   labels:
41 |     app: k8s-snapshots
42 |   name: k8s-snapshots
43 |   namespace: kube-system
44 | roleRef:
45 |   apiGroup: rbac.authorization.k8s.io
46 |   kind: ClusterRole
47 |   name: k8s-snapshots
48 | subjects:
49 |   - kind: ServiceAccount
50 |     name: k8s-snapshots
51 |     namespace: kube-system
52 | 
53 | 


--------------------------------------------------------------------------------
/tests/test_kube.py:
--------------------------------------------------------------------------------
 1 | import pykube
 2 | 
 3 | from k8s_snapshots import kube
 4 | from tests.fixtures.kube import mock_kube
 5 | 
 6 | 
 7 | def test_mock_kube(fx_context):
 8 |     n_resources = 5
 9 |     volume_names = [f'test-volume-{i}' for i in range(0, n_resources)]
10 | 
11 |     def _volume(name, namespace='default'):
12 |         return pykube.objects.PersistentVolume(
13 |             fx_context.kube_client(),
14 |             {
15 |                 'apiVersion': 'v1',
16 |                 'kind': 'PersistentVolume',
17 |                 'metadata': {
18 |                     'name': name,
19 |                 },
20 |             }
21 |         )
22 | 
23 |     resources = [_volume(volume_name) for volume_name in volume_names]
24 | 
25 |     with mock_kube(resources) as _kube:
26 |         for expected_resource, volume_name in zip(resources, volume_names):
27 |             assert expected_resource.name == volume_name, \
28 |                 'Resources was not ceated properly'
29 |             kube_resource = kube.get_resource_or_none_sync(
30 |                 fx_context.kube_client(),
31 |                 pykube.objects.PersistentVolume,
32 |                 name=volume_name
33 |             )
34 |             assert kube_resource == expected_resource
35 | 
36 |             assert len(kube_resource.name)
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017, Michael Elsdörfer <http://elsdoerfer.name>
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions
 6 | are met:
 7 | 
 8 |     1. Redistributions of source code must retain the above copyright
 9 |        notice, this list of conditions and the following disclaimer.
10 | 
11 |     2. Redistributions in binary form must reproduce the above
12 |        copyright notice, this list of conditions and the following
13 |        disclaimer in the documentation and/or other materials
14 |        provided with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 | POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/k8s_snapshots/serialize.py:
--------------------------------------------------------------------------------
 1 | from datetime import timedelta
 2 | import json
 3 | from typing import (
 4 |     TypeVar,
 5 |     Mapping,
 6 |     Sequence,
 7 |     Optional, Callable)
 8 | 
 9 | import isodate
10 | import pendulum
11 | from structlog.processors import _json_fallback_handler
12 | 
13 | Serializable = TypeVar(
14 |     'Serializable',
15 |     int,
16 |     float,
17 |     str,
18 |     bool,
19 |     Mapping,
20 |     Sequence,
21 | )
22 | 
23 | _DEFAULT_FALLBACK_PROCESSOR = _json_fallback_handler
24 | 
25 | 
26 | def dumps(*args, **kwargs):
27 |     kwargs['default'] = Processor()
28 |     return json.dumps(*args, **kwargs)
29 | 
30 | 
31 | class Processor:
32 |     def __init__(self, fallback_processor=_DEFAULT_FALLBACK_PROCESSOR):
33 |         self.fallback_processor = fallback_processor
34 | 
35 |     def __call__(self, obj):
36 |         return process(obj, fallback_processor=self.fallback_processor)
37 | 
38 | 
39 | def process(
40 |         obj,
41 |         fallback_processor: Optional[
42 |             Callable[..., Serializable]
43 |         ]=_DEFAULT_FALLBACK_PROCESSOR,
44 | ) -> Serializable:
45 |     if isinstance(obj, timedelta):
46 |         return isodate.duration_isoformat(obj)
47 | 
48 |     if isinstance(obj, pendulum.Pendulum):
49 |         return obj.isoformat()
50 | 
51 |     if fallback_processor is not None:
52 |         return fallback_processor(obj)
53 | 
54 |     raise TypeError(
55 |         f'Cannot process object of type {type(obj)}, no fallback_processor '
56 |         f'provided'
57 |     )
58 | 


--------------------------------------------------------------------------------
/k8s_snapshots/context.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pykube
 3 | from importlib import import_module
 4 | import structlog
 5 | from .backends import get_backend
 6 | 
 7 | 
 8 | _logger = structlog.get_logger()
 9 | 
10 | 
11 | class Context:
12 |     def __init__(self, config=None):
13 |         self.config = config
14 |         self._kube_config = None
15 |         self._backend = None
16 | 
17 |     @property
18 |     def kube_config(self):
19 |         if self._kube_config is None:
20 |             self._kube_config = self.load_kube_config()
21 | 
22 |         return self._kube_config
23 | 
24 |     def get_backend(self):
25 |         return get_backend(self.config.get('cloud_provider'))
26 | 
27 |     def load_kube_config(self):
28 |         cfg = None
29 | 
30 |         kube_config_file = self.config.get('kube_config_file')
31 | 
32 |         if kube_config_file:
33 |             _logger.info('kube-config.from-file', file=kube_config_file)
34 |             cfg = pykube.KubeConfig.from_file(kube_config_file)
35 | 
36 |         if not cfg:
37 |             # See where we can get it from.
38 |             default_file = os.path.expanduser('~/.kube/config')
39 |             if os.path.exists(default_file):
40 |                 _logger.info(
41 |                     'kube-config.from-file.default',
42 |                     file=default_file)
43 |                 cfg = pykube.KubeConfig.from_file(default_file)
44 | 
45 |         # Maybe we are running inside Kubernetes.
46 |         if not cfg:
47 |             _logger.info('kube-config.from-service-account')
48 |             cfg = pykube.KubeConfig.from_service_account()
49 | 
50 |         return cfg
51 | 
52 |     def kube_client(self):
53 |         return pykube.HTTPClient(self.kube_config)
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/CHANGES:
--------------------------------------------------------------------------------
 1 | 2.0 (2017-08-26)
 2 | ------------------
 3 | 
 4 | Adds support for cloud backends.
 5 | 
 6 | IMPORTANT BREAKING CHANGES
 7 | 
 8 | - The VOLUMES options has been replaced by a custom SnapshotRule
 9 |   resource. If you need to setup manual snapshot rules for disks not
10 |   based on a PersistentVolume resource, see the readme for instructions.
11 | 
12 | Other changes
13 | 
14 | - Support an AWS backend.
15 | - Improve GCE disk detection; now based on actual data data, not on a
16 |   provisioner label.
17 | - GLCOUD_PROJECT environment variable no longer required, is read from
18 |   instance metadata.
19 | 
20 | 
21 | 1.0.1 (2017-08-16)
22 | ------------------
23 | 
24 | - Fix manual volume support via VOLUMES.
25 | - Fix race condition that caused PersistentVolume resources not be be
26 |   watched.
27 | 
28 | 
29 | 1.0 (2017-08-10)
30 | ----------------
31 | 
32 | Vastly improved rewrite. Joar Wandborg contributed most of the changes
33 | in this release.
34 | 
35 | 
36 | IMPORTANT BREAKING CHANGES
37 | 
38 | - *k8s-snapshots* now labels the snapshots it creates, and only looks
39 |   at those snapshots that have this label when making decisions about
40 |   when to create and delete snapshots.
41 | 
42 |   Thus, when you upgrade, all existing snapshots will be invisible to
43 |   *k8s-snapshots*, and it will begin with a blank slate.
44 | 
45 |   We recommend that within the Google Cloud UI, you add the label to
46 |   your existing snapshots you want the tool to consider. The label
47 |   expected is:
48 | 
49 |   created-by=k8s-snapshots
50 | 
51 | - The format used to define deltas has changed. It now uses ISO 8601
52 |   durations, so the annotation string will look like this:
53 | 
54 |   backup.kubernetes.io/deltas: PT1H P30D P180D
55 | 
56 |   This is 1 hour, 30 days and 180 days.
57 | 


--------------------------------------------------------------------------------
/tests/test_snapshots.py:
--------------------------------------------------------------------------------
 1 | from datetime import timedelta
 2 | from pendulum import Pendulum
 3 | from unittest import TestCase
 4 | 
 5 | from k8s_snapshots.backends.abstract import Snapshot
 6 | from k8s_snapshots.context import Context
 7 | from k8s_snapshots.rule import Rule
 8 | from k8s_snapshots.snapshot import snapshots_for_rule_are_outdated
 9 | 
10 | 
11 | class TestSnapshotsAreUpToDate(TestCase):
12 |     TEST_DISK = 'test-disk'
13 | 
14 |     def setUp(self):
15 |         self.mock_context = Context({})
16 |         self.rule = Rule(
17 |             name='test_rule',
18 |             deltas=[timedelta(hours=1), timedelta(days=30)],
19 |             backend='test_backend',
20 |             disk=self.TEST_DISK)
21 | 
22 |     def test_snapshot_is_required_without_existing_snapshots(self):
23 |         assert snapshots_for_rule_are_outdated(self.rule, [])
24 | 
25 |     def test_snapshot_not_required_with_recent_snapshots(self):
26 |         assert not snapshots_for_rule_are_outdated(self.rule, [
27 |             Snapshot(
28 |                 created_at=Pendulum.now('utc') - timedelta(minutes=59),
29 |                 name='snapshot-1',
30 |                 disk=self.TEST_DISK)
31 |         ])
32 | 
33 |     def test_snapshot_required_with_outdated_snapshot(self):
34 |         assert snapshots_for_rule_are_outdated(self.rule, [
35 |             Snapshot(
36 |                 created_at=Pendulum.now('utc') - timedelta(hours=1, minutes=1),
37 |                 name='snapshot-1',
38 |                 disk=self.TEST_DISK)
39 |         ])
40 | 
41 |     def test_snapshot_required_with_snapshot_for_different_disk(self):
42 |         assert snapshots_for_rule_are_outdated(self.rule, [
43 |             Snapshot(
44 |                 created_at=Pendulum.now('utc') - timedelta(minutes=5),
45 |                 name='snapshot-1',
46 |                 disk='some-other-disk')
47 |         ])
48 | 


--------------------------------------------------------------------------------
/docs/google-cloud.md:
--------------------------------------------------------------------------------
 1 | ### Configure access permissions to Google Cloud
 2 | 
 3 | If there are no default credentials to Kubernetes and the Cloud
 4 | snapshot API, or the default credentials do not have the required
 5 | access scope, you may need to configure these.
 6 | 
 7 | <table>
 8 |   <tr>
 9 |     <td>CLOUD_PROVIDER</td>
10 |     <td>
11 |       Set to 'google' to use gcloud exclusively.
12 |       Can be detected based on volume spec gcePersistentDisk.
13 |      </td>
14 |   </tr>
15 |   <tr>
16 |     <td>GCLOUD_PROJECT</td>
17 |     <td>
18 |       Name of the Google Cloud project. This is required to use the Google
19 |       Cloud API, but if it's not given, we try to read the value from
20 |       the [instance metadata service](https://cloud.google.com/compute/docs/storing-retrieving-metadata)
21 |       which will usually work.
22 |      </td>
23 |   </tr>
24 |   <tr>
25 |     <td>GCLOUD_CREDENTIALS_FILE</td>
26 |     <td>
27 |       Filename to the JSON gcloud credentials file used to authenticate.
28 |       You'll want to mount it into the container.
29 |       By default set to here for for PyKube:
30 |       ~/.config/gcloud/application_default_credentials.json
31 |       PyKube doesn't use env to locate the config but
32 |       GOOGLE_APPLICATION_CREDENTIALS takes precedence.
33 |     </td>
34 |   </tr>
35 |   <tr>
36 |     <td>GOOGLE_APPLICATION_CREDENTIALS</td>
37 |     <td>
38 |       The contents of the JSON keyfile that is used to authenticate.
39 |     </td>
40 |   </tr>
41 |   <tr>
42 |     <td>KUBE_CONFIG_FILE</td>
43 |     <td>
44 |       Authentification with the Kubernetes API. By default, the
45 |       pod service account is used.
46 |     </td>
47 |   </tr>
48 | </table>
49 | 
50 | When using a service account with a custom role to access the Google Cloud API, the following permissions are required:
51 | ```
52 | compute.disks.createSnapshot
53 | compute.snapshots.create
54 | compute.snapshots.delete
55 | compute.snapshots.get
56 | compute.snapshots.list
57 | compute.snapshots.setLabels
58 | compute.zoneOperations.get
59 | ```


--------------------------------------------------------------------------------
/k8s_snapshots/events.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Here is a collection of logging ``event```values that are expected to be kept
 3 | more stable.
 4 | 
 5 | These events are provided as a reference for external logging metric tools.
 6 | """
 7 | import enum
 8 | 
 9 | 
10 | class EventEnum(enum.Enum):
11 |     """ Base class for Event Enums """
12 |     pass
13 | 
14 | 
15 | @enum.unique
16 | class Annotation(EventEnum):
17 |     """
18 |     Events related to 'deltas' annotations.
19 |     """
20 |     FOUND = 'annotation.found'
21 |     NOT_FOUND = 'annotation.not-found'
22 |     ERROR = 'annotation.error'
23 |     INVALID = 'annotation.invalid'
24 | 
25 | 
26 | @enum.unique
27 | class VolumeEvent(EventEnum):
28 |     """
29 |     Events related to Kubernetes PersistentVolume and PersistentVolumeClaim
30 |     resource events.
31 |     """
32 |     RECEIVED = 'volume-event.received'
33 | 
34 | 
35 | @enum.unique
36 | class Volume(EventEnum):
37 |     """
38 |     Events related to Kubernetes PersistentVolumes
39 |     """
40 |     UNSUPPORTED = 'volume.unsupported'
41 |     NOT_FOUND = 'volume.not-found'
42 | 
43 | 
44 | @enum.unique
45 | class Snapshot(EventEnum):
46 |     """
47 |     Events related to snapshots.
48 |     """
49 |     SCHEDULED = 'snapshot.scheduled'
50 |     START = 'snapshot.start'
51 |     ERROR = 'snapshot.error'
52 |     CREATED = 'snapshot.created'
53 |     EXPIRED = 'snapshot.expired'
54 | 
55 | 
56 | @enum.unique
57 | class Rule(EventEnum):
58 |     """
59 |     Events related to snapshot Rule()s.
60 |     """
61 |     PENDING = 'rule.pending'
62 |     ADDED_FROM_CONFIG = 'rule.from-config'
63 |     ADDED = 'rule.added'
64 |     UPDATED = 'rule.updated'
65 |     REMOVED = 'rule.removed'
66 |     HEARTBEAT = 'rule.heartbeat'
67 | 
68 | 
69 | @enum.unique
70 | class Expiration(EventEnum):
71 |     """
72 |     Events related to snapshot expiration.
73 |     """
74 |     STARTED = 'expire.started'
75 |     KEPT = 'expire.kept'
76 |     DELETE = 'expire.delete'
77 |     COMPLETE = 'expire.complete'
78 | 
79 | 
80 | @enum.unique
81 | class Ping(EventEnum):
82 |     """
83 |     Events related to sending pings.
84 |     """
85 |     SENT = 'ping.sent'
86 | 


--------------------------------------------------------------------------------
/docs/aws.md:
--------------------------------------------------------------------------------
 1 | ### Configure access permissions on AWS
 2 | 
 3 | To be able to create snapshots, on AWS our pod will need the following permissions: 
 4 | 
 5 | ```json
 6 | {
 7 |   "Version": "2012-10-17",
 8 |   "Statement": [
 9 |     {
10 |       "Effect": "Allow",
11 |       "Action": [
12 |         "ec2:DescribeAvailabilityZones",
13 |         "ec2:CreateTags",
14 |         "ec2:DescribeTags",
15 |         "ec2:DescribeVolumeAttribute",
16 |         "ec2:DescribeVolumeStatus",
17 |         "ec2:DescribeVolumes",
18 |         "ec2:CreateSnapshot",
19 |         "ec2:DeleteSnapshot",
20 |         "ec2:DescribeSnapshots"
21 |       ],
22 |       "Resource": "*"
23 |     }
24 |   ]
25 | }
26 | ```
27 | 
28 | If there are no default credentials injected into your nodes, or the default
29 | credentials do not have the required access scope, you may need to
30 | configure these environment variables:
31 | 
32 | <table>
33 |   <tr>
34 |     <td>AWS_ACCESS_KEY_ID</td>
35 |     <td>
36 |       AWS IAM Access Key ID that is used to authenticate.
37 |      </td>
38 |   </tr>
39 |   <tr>
40 |     <td>AWS_SECRET_ACCESS_KEY</td>
41 |     <td>
42 |       AWS IAM Secret Access Key that is used to authenticate.
43 |     </td>
44 |   </tr>
45 |   <tr>
46 |     <td>AWS_REGION</td>
47 |     <td>
48 |       The region is usually detected via the meta data service. You can override the value.
49 |     </td>
50 |   </tr>
51 | </table>
52 | 
53 | 
54 | ### A tip for kops users
55 | 
56 | On older versions of kops, master nodes did have the permissions required. A solution there
57 | is to just run `k8s-snapshots` on a master node.
58 | 
59 | To run on a Master, we need to:
60 |    * [Overcome a Taint](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/)
61 |    * [Specify that we require a Master](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/)
62 | 
63 | To do this, add the following to the above manifest for the k8s-snapshots
64 | Deployment:
65 | 
66 | ```
67 | spec:
68 |   ...
69 |   template:
70 |   ...
71 |     spec:
72 |       ...
73 |       tolerations:
74 |       - key: "node-role.kubernetes.io/master"
75 |         operator: "Equal"
76 |         value: ""
77 |         effect: "NoSchedule"
78 |       nodeSelector:
79 |         kubernetes.io/role: master
80 | ```


--------------------------------------------------------------------------------
/k8s_snapshots/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Dict
 3 | import confcollect
 4 | import structlog
 5 | from k8s_snapshots.errors import ConfigurationError
 6 | 
 7 | 
 8 | _logger = structlog.get_logger()
 9 | 
10 | 
11 | DEFAULT_CONFIG = {
12 |     #: Set to True to make logs more machine-readable
13 |     'json_log': False,
14 |     #: If zero, prints one line of JSON per message, if set to a positive
15 |     #: non-zero integer to get indented JSON output
16 |     'structlog_json_indent': 0,
17 |     #: Anything [^a-z0-9-] will be replaced by '-', the timezone will always be
18 |     #: UTC.
19 |     'snapshot_datetime_format': '%d%m%y-%H%M%S',
20 |     'log_level': 'INFO',
21 |     'kube_config_file': '',
22 |     'use_claim_name': False,
23 |     'ping_url': '',
24 |     #: The key used when annotating PVs and PVCs with deltas
25 |     'deltas_annotation_key': 'backup.kubernetes.io/deltas',
26 |     #: This label will be set on all snapshots created by k8s-snapshots
27 |     'snapshot_author_label': 'k8s-snapshots',
28 |     'snapshot_author_label_key': 'created-by',
29 |     #: Number of seconds between Rule.HEARTBEAT events, ``0`` to disable.
30 |     'schedule_heartbeat_interval_seconds': 600,
31 |     #: Turns debug mode on, not recommended in production
32 |     'debug': False,
33 | 
34 |     'gcloud_project': '',
35 |     'gcloud_credentials_file': os.path.join(
36 |         os.path.expanduser('~'),
37 |         ".config/gcloud/application_default_credentials.json"
38 |     ),
39 |     'google_application_credentials': '',
40 | 
41 |     'aws_region': ''
42 | }
43 | 
44 | 
45 | def validate_config(config: Dict) -> bool:
46 |     return True
47 | 
48 | 
49 | def from_environ_basic() -> Dict:
50 |     config = DEFAULT_CONFIG.copy()
51 |     config.update(confcollect.from_environ(by_defaults=DEFAULT_CONFIG))
52 |     # Backwards compatability
53 |     if config.get('gcloud_json_keyfile_name') and not config.get('gcloud_credentials_file'):
54 |         config['gcloud_credentials_file'] = config.get('gcloud_json_keyfile_name')
55 |     if config.get('gcloud_json_keyfile_string') and not config.get('google_application_credentials'):
56 |         config['google_application_credentials'] = config.get('gcloud_json_keyfile_string')
57 | 
58 |     return config
59 | 
60 | 
61 | def from_environ() -> Dict:
62 |     config = from_environ_basic()
63 | 
64 |     if not validate_config(config):
65 |         raise ConfigurationError(
66 |             'Invalid configuration. See log for more details',
67 |             config=config
68 |         )
69 | 
70 |     return config
71 | 


--------------------------------------------------------------------------------
/tests/test_deltas.py:
--------------------------------------------------------------------------------
 1 | from datetime import timedelta
 2 | 
 3 | import isodate
 4 | import pytest
 5 | 
 6 | import pendulum
 7 | 
 8 | from k8s_snapshots import errors
 9 | from k8s_snapshots.rule import parse_deltas
10 | 
11 | 
12 | @pytest.mark.parametrize(
13 |     [
14 |         'deltas',
15 |         'expected_timedeltas',
16 |     ],
17 |     [
18 |         pytest.param(
19 |             'PT1M P1M',
20 |             [
21 |                 isodate.Duration(minutes=1),
22 |                 isodate.Duration(months=1),
23 |             ]
24 |         ),
25 |         pytest.param(
26 |             'P7D P1D',
27 |             [
28 |                 isodate.Duration(days=7),
29 |                 isodate.Duration(days=1),
30 |             ]
31 |         ),
32 |         pytest.param(
33 |             'PT1M PT7.5H P1M P5W P1Y',
34 |             [
35 |                 isodate.Duration(minutes=1),
36 |                 isodate.Duration(hours=7.5),
37 |                 isodate.Duration(months=1),
38 |                 isodate.Duration(weeks=5),
39 |                 isodate.Duration(years=1),
40 |             ],
41 |         ),
42 |         pytest.param(
43 |             'PT1D PT1D',
44 |             [],
45 |             marks=pytest.mark.xfail(
46 |                 reason='T may only be used before time-based values such as '
47 |                        'minute, hour, second',
48 |                 raises=errors.DeltasParseError,
49 |                 strict=True,
50 |             )
51 |         ),
52 |         pytest.param(
53 |             'PT1M',
54 |             [],
55 |             marks=pytest.mark.xfail(
56 |                 raises=errors.DeltasParseError,
57 |                 reason='Two deltas are required',
58 |                 strict=True,
59 |             )
60 |         ),
61 |         pytest.param(
62 |             'P1S P2S',
63 |             [],
64 |             marks=pytest.mark.xfail(
65 |                 raises=errors.DeltasParseError,
66 |                 reason='PT is required',
67 |                 strict=True
68 |             )
69 |         ),
70 |         pytest.param(
71 |             'pt2m',
72 |             [],
73 |             marks=pytest.mark.xfail(
74 |                 raises=errors.DeltasParseError,
75 |                 reason='ISO 8601 does not allow lowercase characters',
76 |                 strict=True
77 |             )
78 |         ),
79 |         pytest.param(
80 |             None,
81 |             [],
82 |             marks=pytest.mark.xfail(
83 |                 raises=errors.DeltasParseError,
84 |                 reason='deltas is None',
85 |                 strict=True,
86 |             )
87 |         )
88 |     ]
89 | )
90 | def test_parse_deltas(deltas, expected_timedeltas):
91 |     parsed_deltas = parse_deltas(deltas)
92 |     assert parsed_deltas == expected_timedeltas
93 | 


--------------------------------------------------------------------------------
/k8s_snapshots/errors.py:
--------------------------------------------------------------------------------
  1 | import traceback
  2 | from typing import Dict, List, Iterable
  3 | 
  4 | 
  5 | class StructuredError(Exception):
  6 |     def __init__(self, message=None, **data):
  7 |         self.message = message
  8 |         self.data = data
  9 | 
 10 |     def __str__(self):
 11 |         return f'{self.__class__.__qualname__}: {self.message} {self.data!r}'
 12 | 
 13 |     def __repr__(self):
 14 |         return f'<{self.__class__.__name__}: {self.message} ' \
 15 |                f'data={self.data!r}>'
 16 | 
 17 |     def __structlog__(self):
 18 |         return self._serializable_exc()
 19 | 
 20 |     def _exc_chain(self) -> Iterable[Exception]:
 21 |         chain = []  # reverse chronological order
 22 |         exc = self
 23 | 
 24 |         while exc is not None:
 25 |             chain.append(exc)
 26 |             exc = exc.__cause__
 27 | 
 28 |         return reversed(chain)
 29 | 
 30 |     def _serializable_exc(self) -> List[Dict]:
 31 |         def serialize_exc(exc: Exception) -> Dict:
 32 |             if isinstance(exc, StructuredError):
 33 |                 return exc.to_dict()
 34 |             else:
 35 |                 exc_type = exc.__class__
 36 |                 exc_tb = exc.__traceback__
 37 |                 return {
 38 |                     'type': exc_type.__qualname__,
 39 |                     'message': str(exc),
 40 |                     'readable': traceback.format_exception(
 41 |                         exc_type,
 42 |                         exc,
 43 |                         exc_tb,
 44 |                         chain=False
 45 |                     )
 46 |                 }
 47 | 
 48 |         return [serialize_exc(exc) for exc in self._exc_chain()]
 49 | 
 50 |     def to_dict(self) -> Dict:
 51 |         return {
 52 |             'type': self.__class__.__qualname__,
 53 |             'message': self.message,
 54 |             'data': self.data,
 55 |             'readable': traceback.format_exception(
 56 |                 self.__class__,
 57 |                 self,
 58 |                 self.__traceback__,
 59 |                 chain=False
 60 |             )
 61 |         }
 62 | 
 63 | 
 64 | class ConfigurationError(StructuredError):
 65 |     """ Raised for invalid configuration """
 66 |     pass
 67 | 
 68 | 
 69 | class DeltasParseError(StructuredError):
 70 |     """
 71 |     Raised for invalid delta strings
 72 | 
 73 |     -   In configuration.
 74 |     -   In PV or PVC annotations.
 75 |     """
 76 |     pass
 77 | 
 78 | 
 79 | class RuleDependsOn(StructuredError):
 80 |     pass
 81 | 
 82 | 
 83 | class VolumeNotFound(StructuredError):
 84 |     pass
 85 | 
 86 | 
 87 | class UnsupportedVolume(StructuredError):
 88 |     """ Raised for PersistentVolumes we can't snapshot """
 89 |     pass
 90 | 
 91 | 
 92 | class SnapshotCreateError(StructuredError):
 93 |     pass
 94 | 
 95 | 
 96 | class AnnotationError(StructuredError):
 97 |     pass
 98 | 
 99 | 
100 | class AnnotationNotFound(AnnotationError):
101 |     pass
102 | 


--------------------------------------------------------------------------------
/k8s_snapshots/__main__.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import functools
  3 | import signal
  4 | import sys
  5 | 
  6 | import confcollect
  7 | import structlog
  8 | 
  9 | import k8s_snapshots.config
 10 | from k8s_snapshots.logconf import configure_from_config
 11 | from k8s_snapshots.core import daemon
 12 | 
 13 | 
 14 | def main():
 15 |     # Read config initially just to setup logging
 16 |     configure_from_config(k8s_snapshots.config.from_environ_basic())
 17 | 
 18 |     # Now with logging setup, read and validate the config.
 19 |     config = k8s_snapshots.config.from_environ()
 20 |     
 21 |     if config['debug']:
 22 |         sys.excepthook = debug_excepthook
 23 | 
 24 |     # Late import to keep module-level get_logger after configure_from_config
 25 |     _logger = structlog.get_logger(__name__)
 26 | 
 27 |     _logger.bind(
 28 |         gcloud_project=config['gcloud_project'],
 29 |         deltas_annotation_key=config['deltas_annotation_key'],
 30 |     )
 31 | 
 32 |     loop = asyncio.get_event_loop()
 33 | 
 34 |     main_task = asyncio.ensure_future(daemon(config))
 35 | 
 36 |     _log = _logger.new(loop=loop, main_task=main_task)
 37 | 
 38 |     def handle_signal(name, timeout=10):
 39 |         _log.info('Received signal', signal_name=name)
 40 | 
 41 |         if main_task.cancelled():
 42 |             _log.info('main task already cancelled, forcing a quit')
 43 |             return
 44 | 
 45 |         _log.info(
 46 |             'Cancelling main task',
 47 |             task_cancel=main_task.cancel()
 48 |         )
 49 | 
 50 |     for sig_name in ['SIGINT', 'SIGTERM']:
 51 |         loop.add_signal_handler(
 52 |             getattr(signal, sig_name),
 53 |             functools.partial(handle_signal, sig_name))
 54 | 
 55 |     loop.add_signal_handler(signal.SIGUSR1, print_tasks)
 56 | 
 57 |     try:
 58 |         loop.run_until_complete(main_task)
 59 |     except asyncio.CancelledError:
 60 |         _log.exception('main task cancelled')
 61 |     except Exception as exc:
 62 |         _log.exception('Unhandled exception in main task')
 63 |         raise
 64 |     finally:
 65 |         loop.run_until_complete(shutdown(loop=loop))
 66 | 
 67 | 
 68 | def debug_excepthook(exc_type, exc, exc_tb):
 69 |     import pdb
 70 |     loop = asyncio.get_event_loop()
 71 |     loop.stop()
 72 |     pdb.post_mortem(exc_tb)
 73 |     sys.__excepthook__(exc_type, exc, exc_tb)
 74 | 
 75 | 
 76 | _shutdown = False
 77 | 
 78 | 
 79 | async def shutdown(*, loop=None):
 80 |     _logger = structlog.get_logger()
 81 |     global _shutdown
 82 |     if _shutdown:
 83 |         _logger.warning('Already shutting down')
 84 |         return
 85 | 
 86 |     _shutdown = True
 87 | 
 88 |     _logger.debug(
 89 |         'shutting down',
 90 |     )
 91 | 
 92 |     print_tasks()
 93 | 
 94 |     _logger.info('Shutdown complete')
 95 | 
 96 | 
 97 | def print_tasks():
 98 |     tasks = list(asyncio.all_tasks())
 99 |     structlog.get_logger().debug('print tasks', tasks=tasks)
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     sys.exit(main() or 0)
104 | 


--------------------------------------------------------------------------------
/tests/test_volume_from_pvc.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | 
  3 | import pytest
  4 | import pykube
  5 | 
  6 | from k8s_snapshots import errors
  7 | from k8s_snapshots.core import volume_from_pvc
  8 | from tests.fixtures import make_resource
  9 | from tests.fixtures.kube import mock_kube
 10 | 
 11 | PV_RESOURCE = make_resource(
 12 |     pykube.objects.PersistentVolume,
 13 |     'test-pv',
 14 | )
 15 | 
 16 | 
 17 | @pytest.mark.parametrize(
 18 |     [
 19 |         'resource',  # resource to get volume from
 20 |         'resources',  # resources in mocked kube
 21 |         'expected_volume_index',  # index in 'resources' for the expected volume
 22 |     ],
 23 |     [
 24 |         pytest.param(
 25 |             make_resource(
 26 |                 pykube.objects.PersistentVolumeClaim,
 27 |                 'test-pvc',
 28 |                 spec={
 29 |                     'volumeName': 'correct-pv'
 30 |                 }
 31 |             ),
 32 |             [
 33 |                 make_resource(
 34 |                     pykube.objects.PersistentVolume,
 35 |                     'incorrect-pv',
 36 |                 ),
 37 |                 make_resource(
 38 |                     pykube.objects.PersistentVolume,
 39 |                     'correct-pv',
 40 |                 ),
 41 |             ],
 42 |             1,
 43 |             id='valid_from_volume_claim'
 44 |         ),
 45 |         pytest.param(
 46 |             make_resource(
 47 |                 pykube.objects.PersistentVolumeClaim,
 48 |                 'test-pvc',
 49 |                 spec={
 50 |                     'volumeName': 'nonexistent-pv'
 51 |                 }
 52 |             ),
 53 |             [
 54 |                 make_resource(
 55 |                     pykube.objects.PersistentVolume,
 56 |                     'existing-but-different-pv'
 57 |                 )
 58 |             ],
 59 |             None,
 60 |             id='no_volume_for_claim',
 61 |             marks=pytest.mark.xfail(
 62 |                 reason='Volume referred by claim\'s .spec.volumeName does not '
 63 |                        'exist',
 64 |                 raises=errors.VolumeNotFound,
 65 |                 strict=True,
 66 |             )
 67 |         ),
 68 |         pytest.param(
 69 |             make_resource(
 70 |                 pykube.objects.PersistentVolumeClaim,
 71 |                 'claim-without-spec-volumename',
 72 |             ),
 73 |             [],
 74 |             None,
 75 |             id='claim_without_spec_volumeName',
 76 |             marks=pytest.mark.xfail(
 77 |                 reason='Invalid claim spec, missing .spec.volumeName',
 78 |                 raises=errors.VolumeNotFound,
 79 |                 strict=True,
 80 |             )
 81 |         )
 82 |     ]
 83 | )
 84 | def test_volume_from_resource(
 85 |         fx_context,
 86 |         resource,
 87 |         resources,
 88 |         expected_volume_index,
 89 | ):
 90 |     loop = asyncio.get_event_loop()
 91 | 
 92 |     with mock_kube(resources):
 93 |         result = loop.run_until_complete(
 94 |             volume_from_pvc(
 95 |                 ctx=fx_context,
 96 |                 resource=resource,
 97 |             )
 98 |         )
 99 | 
100 |         if expected_volume_index is not None:
101 |             assert result == resources[expected_volume_index]
102 | 


--------------------------------------------------------------------------------
/k8s_snapshots/backends/abstract.py:
--------------------------------------------------------------------------------
  1 | import enum
  2 | import pendulum
  3 | from typing import Dict, List, NamedTuple, Any
  4 | from ..context import Context
  5 | import pykube.objects
  6 | 
  7 | 
  8 | @enum.unique
  9 | class SnapshotStatus(enum.Enum):
 10 |     PENDING = 'snapshot.pending'
 11 |     COMPLETE = 'snapshot.complete'
 12 | 
 13 | 
 14 | # It's up to a backend to decide how a disk should be identified.
 15 | # However, it does need to be something that is hashable, ideally
 16 | # a tuple.
 17 | DiskIdentifier = Any
 18 | 
 19 | 
 20 | class Snapshot(NamedTuple):
 21 |     """
 22 |     Identifies an existing snapshot.
 23 |     """
 24 |     name: str
 25 |     created_at: pendulum.Pendulum
 26 |     # A disk id that is known to Kubernetes.
 27 |     disk: DiskIdentifier
 28 | 
 29 | 
 30 | # Snapshot creation is a multi-step process. This is an arbitrary value that
 31 | # a Cloud provider backend can return to refer to the snapshot within the
 32 | # cloud as it's being created. This is distinct from :class:`Snapshot`, which
 33 | # represents a completed snapshot.
 34 | NewSnapshotIdentifier = Any
 35 | 
 36 | 
 37 | def get_disk_identifier(volume: pykube.objects.PersistentVolume) -> DiskIdentifier:
 38 |     """Return a DiskIdentifier from a PersistentVolume."""
 39 |     raise NotImplementedError()
 40 | 
 41 | 
 42 | def supports_volume(volume: pykube.objects.PersistentVolume):
 43 |     """Return either the given persistent volume is supported by
 44 |     the backend."""
 45 |     raise NotImplementedError()
 46 | 
 47 | 
 48 | def validate_disk_identifier(disk_id: Dict) -> DiskIdentifier:
 49 |     """Should take the user-specified dictionary, and convert it to
 50 |     it's own, local `DiskIdentifier`. If the disk_id is not valid,
 51 |     it should raise a `ValueError` with a suitable error message.
 52 |     """
 53 |     raise NotImplementedError()
 54 | 
 55 | 
 56 | def load_snapshots(ctx: Context, label_filters: Dict[str, str]) -> List[Snapshot]:
 57 |     """
 58 |     Return the existing snapshots. Important!! This function must filter
 59 |     the list of returned snapshots by ``label_filters``. This is because
 60 |     usually cloud providers make filtering part of their API.
 61 |     """
 62 |     raise NotImplementedError()
 63 | 
 64 | 
 65 | def create_snapshot(
 66 |     ctx: Context,
 67 |     disk: DiskIdentifier,
 68 |     snapshot_name: str,
 69 |     snapshot_description: str
 70 | ) -> NewSnapshotIdentifier:
 71 |     """
 72 |     Create a snapshot for the given disk.
 73 | 
 74 |     This operation is expected to be asynchronous, so the value you return
 75 |     will identify the snapshot for the next call.
 76 |     """
 77 |     raise NotImplementedError()
 78 | 
 79 | 
 80 | def get_snapshot_status(
 81 |     ctx: Context,
 82 |     snapshot_identifier: NewSnapshotIdentifier
 83 | ) -> SnapshotStatus:
 84 |     """
 85 |     Should return the current status of the snapshot.
 86 |     """
 87 |     raise NotImplementedError()
 88 | 
 89 | 
 90 | def set_snapshot_labels(
 91 |     ctx: Context,
 92 |     snapshot_identifier: NewSnapshotIdentifier,
 93 |     labels: Dict
 94 | ):
 95 |     """
 96 |     Set labels on the snapshot.
 97 |     """
 98 |     raise NotImplementedError()
 99 | 
100 | 
101 | def delete_snapshot(
102 |     ctx: Context,
103 |     snapshot: Snapshot
104 | ):
105 |     """
106 |     Delete the snapshot given.
107 |     """
108 |     raise NotImplementedError()
109 | 


--------------------------------------------------------------------------------
/k8s_snapshots/backends/digitalocean.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, NamedTuple
  2 | import digitalocean
  3 | from digitalocean.baseapi import NotFoundError
  4 | from .abstract import (
  5 |     DiskIdentifier, Snapshot, NewSnapshotIdentifier, SnapshotStatus)
  6 | import pendulum
  7 | import pykube.objects
  8 | from ..context import Context
  9 | import structlog
 10 | 
 11 | 
 12 | _logger = structlog.get_logger(__name__)
 13 | 
 14 | 
 15 | class DODiskIdentifier(NamedTuple):
 16 |     volume_id: str
 17 | 
 18 | 
 19 | class InvalidVolumeNameError(ValueError):
 20 |     def __init__(self, volume_name):
 21 |         super().__init__("DigitalOcean has no volume named %s.", volume_name)
 22 | 
 23 | 
 24 | def get_disk_identifier(
 25 |     volume: pykube.objects.PersistentVolume
 26 | ) -> DODiskIdentifier:
 27 |     volume_id = volume.obj['spec']['csi']['volumeHandle']
 28 | 
 29 |     return DODiskIdentifier(volume_id=volume_id)
 30 | 
 31 | 
 32 | def supports_volume(volume: pykube.objects.PersistentVolume):
 33 |     csi = volume.obj['spec'].get('csi')
 34 |     return csi is not None and csi.get('driver') == 'dobs.csi.digitalocean.com'
 35 | 
 36 | 
 37 | def validate_disk_identifier(disk_id: Dict) -> DiskIdentifier:
 38 |     try:
 39 |         do_volumes = digitalocean.Manager().get_all_volumes()
 40 |         volume_name = disk_id['volumeName']
 41 |         do_volume = next((volume for volume in do_volumes
 42 |                           if volume.name == volume_name),
 43 |                          None)
 44 | 
 45 |         if do_volume is None:
 46 |             raise InvalidVolumeNameError(volume_name)
 47 | 
 48 |         return DODiskIdentifier(volume_id=do_volume.id)
 49 |     except InvalidVolumeNameError as err:
 50 |         raise err
 51 |     except:
 52 |         raise ValueError(disk_id)
 53 | 
 54 | 
 55 | def load_snapshots(
 56 |     ctx: Context, label_filters: Dict[str, str]
 57 | ) -> List[Snapshot]:
 58 |     snapshots = digitalocean.Manager().get_volume_snapshots()
 59 | 
 60 |     tag_filters = set(k+':'+v for k, v in label_filters.items())
 61 |     filtered = [snapshot
 62 |                 for snapshot in snapshots
 63 |                 if tag_filters.intersection(snapshot.tags)]
 64 | 
 65 |     _logger.debug('digitalocean.load_snaphots', label_filters=label_filters,
 66 |                   tag_filters=tag_filters, snapshots_count=len(snapshots),
 67 |                   filtered=filtered)
 68 | 
 69 |     return list(map(lambda snapshot: Snapshot(
 70 |         name=snapshot.id,
 71 |         created_at=pendulum.parse(snapshot.created_at),
 72 |         disk=DODiskIdentifier(volume_id=snapshot.resource_id),
 73 |     ), filtered))
 74 | 
 75 | 
 76 | def create_snapshot(
 77 |     ctx: Context,
 78 |     disk: DODiskIdentifier,
 79 |     snapshot_name: str,
 80 |     snapshot_description: str
 81 | ) -> NewSnapshotIdentifier:
 82 |     volume = digitalocean.Volume(id=disk.volume_id)
 83 | 
 84 |     snapshot = volume.snapshot(snapshot_name)
 85 | 
 86 |     return snapshot['snapshot']['id']
 87 | 
 88 | 
 89 | def get_snapshot_status(
 90 |     ctx: Context,
 91 |     snapshot_identifier: NewSnapshotIdentifier
 92 | ) -> SnapshotStatus:
 93 |     # DO provides no way to know if a snapshost has finished
 94 |     return SnapshotStatus.COMPLETE
 95 | 
 96 | 
 97 | def set_snapshot_labels(
 98 |     ctx: Context,
 99 |     snapshot_identifier: NewSnapshotIdentifier,
100 |     labels: Dict
101 | ):
102 |     for label, value in labels.items():
103 |         tag_name = label + ":" + value
104 |         tag = digitalocean.Tag(name=tag_name)
105 | 
106 |         # Create the tag if it does not exist yet.
107 |         _create_missing_tag(tag)
108 | 
109 |         tag.add_snapshots(snapshot_identifier)
110 | 
111 | 
112 | def _create_missing_tag(tag: digitalocean.Tag):
113 |     # If the tag does not exist, load() raise NotFoundError so we create it.
114 |     try:
115 |         tag.load()
116 |         return
117 |     except NotFoundError:
118 |         tag.create()
119 | 
120 | 
121 | def delete_snapshot(
122 |     ctx: Context,
123 |     snapshot: Snapshot
124 | ):
125 |     do_snapshot = digitalocean.Manager().get_snapshot(snapshot.name)
126 |     do_snapshot.destroy()
127 | 


--------------------------------------------------------------------------------
/k8s_snapshots/asyncutils.py:
--------------------------------------------------------------------------------
  1 | # Consider: https://github.com/vxgmichel/aiostream
  2 | 
  3 | import asyncio
  4 | 
  5 | import structlog
  6 | from aiochannel import Channel
  7 | 
  8 | _logger = structlog.get_logger()
  9 | 
 10 | 
 11 | async def run_in_executor(func):
 12 |     return await asyncio.get_event_loop().run_in_executor(None, func)
 13 | 
 14 | 
 15 | async def combine(**generators):
 16 |     """Given a bunch of async generators, merges the events from
 17 |     all of them. Each should have a name, i.e. `foo=gen, bar=gen`.
 18 |     """
 19 |     combined = Channel()
 20 | 
 21 |     async def listen_and_forward(name, generator):
 22 |         async for value in generator:
 23 |             await combined.put({name: value})
 24 | 
 25 |     tasks = []
 26 |     for name, generator in generators.items():
 27 |         task = asyncio.ensure_future(listen_and_forward(name, generator))
 28 | 
 29 |         # When task one or fails, close channel so that later our
 30 |         # iterator stops reading.
 31 |         def cb(task):
 32 |             if task.exception():
 33 |                 combined.close()
 34 |         task.add_done_callback(cb)
 35 |         tasks.append(task)
 36 | 
 37 |     # This one will stop when either all generators are exhaused,
 38 |     # or any one of the fails.
 39 |     async for item in combined:
 40 |         yield item
 41 | 
 42 |     # TODO: gather() can hang, and the task cancellation doesn't
 43 |     # really work. Happens if one of the generators has an error.
 44 |     # It seem that is because once we attach a done callback to
 45 |     # the task, gather() doesn't handle the exception anymore??
 46 |     # Any tasks that are still running at this point, cancel them.
 47 |     for task in tasks:
 48 |         task.cancel()
 49 |     # Will consume any task exceptions
 50 |     await asyncio.gather(*tasks)
 51 | 
 52 | 
 53 | async def combine_latest(defaults=None, **generators):
 54 |     """Like "combine", but always includes the latest value from
 55 |     every generator.
 56 |     """
 57 |     current = defaults.copy() if defaults else {}
 58 |     async for value in combine(**generators):
 59 |         current.update(value)
 60 |         yield current
 61 | 
 62 | 
 63 | async def debounce(stream, delay):
 64 |     debounced = Channel()
 65 |     loop = asyncio.get_event_loop()
 66 | 
 67 |     async def iterator():
 68 |         scheduled_call = None
 69 |         async for item in stream:
 70 |             if scheduled_call:
 71 |                 scheduled_call.cancel()
 72 |             scheduled_call = loop.call_later(
 73 |                 delay,
 74 |                 lambda: asyncio.ensure_future(debounced.put(item))
 75 |             )
 76 | 
 77 |     # Read the incoming iterator in a task. If the task fails, close the
 78 |     # channel so the iterator below will stop reading.
 79 |     task = asyncio.ensure_future(iterator())
 80 |     def cb(task):
 81 |         if task.exception():
 82 |             debounced.close()
 83 |     task.add_done_callback(cb)
 84 | 
 85 |     async for item in debounced:
 86 |         yield item
 87 | 
 88 |     task.cancel()
 89 |     await asyncio.gather(task)
 90 | 
 91 | 
 92 | class StreamReader:
 93 |     """Allows iterating over the same iterable multiple times, at the same
 94 |     time. That is, while the source iterable is only running multiple times,
 95 |     you can consume it with more than one iterator.
 96 | 
 97 |     We begin reading from the source when the first iterator starts, and we
 98 |     stop once the later iterator leaves.
 99 |     """
100 | 
101 |     def __init__(self, iterable):
102 |         self._task = None
103 |         self.iterable = iterable
104 |         self.channels = []
105 | 
106 |     async def _iterate_task(self):
107 |         async for item in self.iterable:
108 |             for channel in self.channels:
109 |                 await channel.put(item)
110 | 
111 |     def _ensure_running(self):
112 |         if self._task:
113 |             return
114 | 
115 |         self._task = asyncio.ensure_future(self._iterate_task())
116 |         def cb(task):
117 |             if task.exception():
118 |                 for channel in self.channels:
119 |                     channel.close()
120 |                 # Can we fail the channels here, propagate the
121 |                 # exception to the readers?
122 |                 raise task.exception()
123 |         self._task.add_done_callback(cb)
124 | 
125 |     def _end(self):
126 |         self._task.cancel()
127 | 
128 |     def iter(self):
129 |         # Return a new channel that will receive all the events
130 |         channel = Channel()
131 |         self.channels.append(channel)
132 |         self._ensure_running()
133 |         return channel
134 | 


--------------------------------------------------------------------------------
/k8s_snapshots/kube.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import threading
  3 | from typing import (Optional, Iterable, AsyncGenerator, TypeVar, Type,
  4 |                     NamedTuple, Callable)
  5 | 
  6 | import pykube
  7 | import structlog
  8 | from aiochannel import Channel
  9 | 
 10 | from k8s_snapshots.context import Context
 11 | 
 12 | _logger = structlog.get_logger(__name__)
 13 | 
 14 | Resource = TypeVar(
 15 |     'Resource',
 16 |     bound=pykube.objects.APIObject,
 17 | )
 18 | 
 19 | ClientFactory = Callable[[], pykube.HTTPClient]
 20 | 
 21 | # Copy of a locally-defined namedtuple in
 22 | # pykube.query.WatchQuery.object_stream()
 23 | _WatchEvent = NamedTuple('_WatchEvent', [
 24 |     ('type', str),
 25 |     ('object', Resource),
 26 | ])
 27 | 
 28 | 
 29 | class SnapshotRule(pykube.objects.APIObject):
 30 |     version = "k8s-snapshots.elsdoerfer.com/v1"
 31 |     endpoint = "snapshotrules"
 32 |     kind = "SnapshotRule"
 33 | 
 34 | 
 35 | class Kubernetes:
 36 |     """
 37 |     Allows for easier mocking of Kubernetes resources.
 38 |     """
 39 | 
 40 |     def __init__(self, client_factory: Optional[ClientFactory] = None):
 41 |         """
 42 | 
 43 |         Parameters
 44 |         ----------
 45 |         client_factory
 46 |             Used in threaded operations to create a local
 47 |             :any:`pykube.HTTPClient` instance.
 48 |         """
 49 |         # Used for threaded operations
 50 |         self.client_factory = client_factory
 51 | 
 52 |     def get_or_none(self,
 53 |                     resource_type: Type[Resource],
 54 |                     name: str,
 55 |                     namespace: Optional[str] = None) -> Optional[Resource]:
 56 |         """
 57 |         Sync wrapper for :any:`pykube.query.Query().get_or_none`
 58 |          """
 59 |         resource_query = resource_type.objects(self.client_factory())
 60 |         if namespace is not None:
 61 |             resource_query = resource_query.filter(namespace=namespace)
 62 | 
 63 |         return resource_query.get_or_none(name=name)
 64 | 
 65 |     def watch(
 66 |             self,
 67 |             resource_type: Type[Resource],
 68 |     ) -> Iterable[_WatchEvent]:
 69 |         """
 70 |         Sync wrapper for :any:`pykube.query.Query().watch().object_stream()`
 71 |         """
 72 |         return resource_type.objects(self.client_factory())\
 73 |             .filter(namespace=pykube.all).watch().object_stream()
 74 | 
 75 | 
 76 | def get_resource_or_none_sync(
 77 |         client_factory: ClientFactory,
 78 |         resource_type: Type[Resource],
 79 |         name: str,
 80 |         namespace: Optional[str] = None) -> Optional[Resource]:
 81 |     return Kubernetes(client_factory).get_or_none(
 82 |         resource_type,
 83 |         name,
 84 |         namespace,
 85 |     )
 86 | 
 87 | 
 88 | async def get_resource_or_none(client_factory: ClientFactory,
 89 |                                resource_type: Type[Resource],
 90 |                                name: str,
 91 |                                namespace: Optional[str] = None,
 92 |                                *,
 93 |                                loop=None) -> Optional[Resource]:
 94 |     loop = loop or asyncio.get_event_loop()
 95 | 
 96 |     def _get():
 97 |         return get_resource_or_none_sync(
 98 |             client_factory=client_factory,
 99 |             resource_type=resource_type,
100 |             name=name,
101 |             namespace=namespace,
102 |         )
103 | 
104 |     return await loop.run_in_executor(
105 |         None,
106 |         _get,
107 |     )
108 | 
109 | 
110 | def watch_resources_sync(
111 |         client_factory: ClientFactory,
112 |         resource_type: pykube.objects.APIObject,
113 | ) -> Iterable:
114 |     return Kubernetes(client_factory).watch(resource_type=resource_type)
115 | 
116 | 
117 | async def watch_resources(ctx: Context,
118 |                           resource_type: Resource,
119 |                           *,
120 |                           delay: int,
121 |                           allow_missing: bool = False,
122 |                           loop=None) -> AsyncGenerator[_WatchEvent, None]:
123 |     """ Asynchronously watch Kubernetes resources """
124 |     async_gen = _watch_resources_thread_wrapper(
125 |         ctx.kube_client, resource_type, allow_missing=allow_missing, loop=loop)
126 | 
127 |     # Workaround a race condition in pykube:
128 |     # https: // github.com / kelproject / pykube / issues / 138
129 |     await asyncio.sleep(delay)
130 | 
131 |     async for item in async_gen:
132 |         yield item
133 | 
134 | 
135 | async def _watch_resources_thread_wrapper(
136 |         client_factory: Callable[[], pykube.HTTPClient],
137 |         resource_type: Type[Resource],
138 |         allow_missing: bool = False,
139 |         *,
140 |         loop=None) -> AsyncGenerator[_WatchEvent, None]:
141 |     """ Async wrapper for pykube.watch().object_stream() """
142 |     loop = loop or asyncio.get_event_loop()
143 |     _log = _logger.bind(resource_type_name=resource_type.__name__, )
144 |     channel = Channel()
145 | 
146 |     def worker():
147 |         try:
148 |             _log.debug('watch-resources.worker.start')
149 |             while True:
150 |                 sync_iterator = watch_resources_sync(
151 |                     client_factory=client_factory, resource_type=resource_type)
152 |                 _log.debug('watch-resources.worker.watch-opened')
153 |                 for event in sync_iterator:
154 |                     # only put_nowait seems to cause SIGSEGV
155 |                     loop.call_soon_threadsafe(channel.put_nowait, event)
156 |                 _log.debug('watch-resources.worker.watch-closed')
157 |         except pykube.exceptions.HTTPError as e:
158 |             # TODO: It's possible that the user creates the resource
159 |             # while we are already running. We should pick this up
160 |             # automatically, i.e. watch ThirdPartyResource, or just
161 |             # check every couple of seconds.
162 |             if e.code == 404 and allow_missing:
163 |                 _log.info('watch-resources.worker.skipped')
164 |             else:
165 |                 _log.exception('watch-resources.worker.error')
166 |         except:
167 |             _log.exception('watch-resources.worker.error')
168 |         finally:
169 |             _log.debug('watch-resources.worker.finalized')
170 |             channel.close()
171 | 
172 |     thread = threading.Thread(
173 |         target=worker,
174 |         daemon=True,
175 |     )
176 |     thread.start()
177 | 
178 |     async for channel_event in channel:
179 |         yield channel_event
180 | 
181 |     _log.debug('watch-resources.done')
182 | 


--------------------------------------------------------------------------------
/k8s_snapshots/rule.py:
--------------------------------------------------------------------------------
  1 | from datetime import timedelta
  2 | from typing import Dict, Any, List, Union, Iterable
  3 | 
  4 | import attr
  5 | import isodate
  6 | import pykube
  7 | import structlog
  8 | 
  9 | from k8s_snapshots import kube
 10 | from k8s_snapshots.context import Context
 11 | from k8s_snapshots.errors import (
 12 |     UnsupportedVolume,
 13 |     AnnotationNotFound,
 14 |     AnnotationError,
 15 |     DeltasParseError
 16 | )
 17 | from k8s_snapshots.kube import SnapshotRule
 18 | from k8s_snapshots.logging import Loggable
 19 | from k8s_snapshots.backends import find_backend_for_volume, get_backend
 20 | from k8s_snapshots.backends.abstract import DiskIdentifier
 21 | 
 22 | _logger = structlog.get_logger(__name__)
 23 | 
 24 | 
 25 | @attr.s(slots=True)
 26 | class Rule(Loggable):
 27 |     """
 28 |     A rule describes how and when to make backups.
 29 |     """
 30 |     name = attr.ib()
 31 |     deltas = attr.ib()
 32 |     backend = attr.ib()
 33 |     disk = attr.ib()
 34 | 
 35 |     #: For Kubernetes resources: The selfLink of the source
 36 |     source = attr.ib(default=None)
 37 | 
 38 |     def to_dict(self) -> Dict[str, Any]:
 39 |         return attr.asdict(self)
 40 | 
 41 | 
 42 | def get_backend_for_rule(ctx: Context, rule: Rule):
 43 |     return get_backend(rule.backend)
 44 | 
 45 | 
 46 | def rule_name_from_k8s_source(
 47 |         source: Union[
 48 |             pykube.objects.PersistentVolumeClaim,
 49 |             pykube.objects.PersistentVolume,
 50 |             SnapshotRule
 51 |         ],
 52 |         name: str = False
 53 | ) -> str:
 54 |     """Generates a name for a rule based on a kubernetes resource,
 55 |     will consider:
 56 |     """
 57 | 
 58 |     short_kind = {
 59 |         'PersistentVolume': 'pv',
 60 |         'PersistentVolumeClaim': 'pvc',
 61 |         'SnapshotRule': 'rule'
 62 |     }.pop(source.kind)
 63 | 
 64 |     source_namespace = source.namespace
 65 | 
 66 |     # PV's have a namespace set to an empty string ''
 67 |     if source_namespace == 'default' or not source_namespace:
 68 |         namespace = ''
 69 |     else:
 70 |         namespace = f'{source.namespace}-'
 71 | 
 72 |     if not name:
 73 |         name = source.name
 74 |     rule_name = f'{namespace}{short_kind}-{name}'
 75 | 
 76 |     _logger.debug(
 77 |         'rule-name-from-k8s',
 78 |         key_hints=[
 79 |             'source_namespace',
 80 |             'source.kind',
 81 |             'source.metadata.namespace',
 82 |             'source.metadata.name',
 83 |             'rule_name',
 84 |         ],
 85 |         source_namespace=source_namespace,
 86 |         source=source.obj,
 87 |         rule_name=rule_name,
 88 |     )
 89 |     return rule_name
 90 | 
 91 | 
 92 | def parse_deltas(
 93 |         delta_string: str
 94 | ) -> List[Union[timedelta, isodate.Duration]]:
 95 |     """q§Parse the given string into a list of ``timedelta`` instances.
 96 |     """
 97 |     if delta_string is None:
 98 |         raise DeltasParseError(
 99 |             f'Delta string is None',
100 |         )
101 | 
102 |     deltas = []
103 |     for item in delta_string.split(' '):
104 |         item = item.strip()
105 |         if not item:
106 |             continue
107 |         try:
108 |             deltas.append(isodate.parse_duration(item))
109 |         except ValueError as exc:
110 |             raise DeltasParseError(
111 |                 f'Could not parse duration: {item!r}',
112 |                 error=exc,
113 |                 item=item,
114 |                 deltas=deltas,
115 |                 delta_string=delta_string,
116 |             ) from exc
117 | 
118 |     if deltas and len(deltas) < 2:
119 |         raise DeltasParseError(
120 |             'At least two deltas are required',
121 |             deltas=deltas,
122 |             delta_string=delta_string,
123 |         )
124 | 
125 |     return deltas
126 | 
127 | 
128 | def serialize_deltas(deltas: Iterable[timedelta]) -> str:
129 |     delta_strs = [
130 |         isodate.duration_isoformat(delta)
131 |         for delta in deltas
132 |     ]
133 |     return ' '.join(delta_strs)
134 | 
135 | 
136 | async def rule_from_pv(
137 |     ctx: Context,
138 |     volume: pykube.objects.PersistentVolume,
139 |     deltas: List[timedelta],
140 |     *,
141 |     source: Union[
142 |         pykube.objects.PersistentVolumeClaim,
143 |         pykube.objects.PersistentVolume,
144 |         SnapshotRule
145 |     ]
146 | ) -> Rule:
147 |     """Given a persistent volume object, create a backup rule
148 |     object. Can return None if this volume is not configured for
149 |     backups, or is not suitable.
150 | 
151 |     The configuration for the rule will either come from the volume,
152 |     or it's claim, if one is associated.
153 |     """
154 |     _log = _logger.new(volume=volume.obj)
155 | 
156 |     # Do we have a backend that supports this disk?
157 |     backend_name, backend_module = find_backend_for_volume(volume)
158 |     if not backend_module:
159 |         raise UnsupportedVolume(
160 |             'Unsupported volume',
161 |             volume=volume
162 |         )
163 | 
164 |     # Let the backend parse and validate this volume.
165 |     disk = backend_module.get_disk_identifier(volume)
166 |     _log.debug('Volume supported by backend',
167 |                volume=volume, backend=backend_module, disk=disk)
168 | 
169 |     # If configured, use the name from the claim
170 |     claim_name = ""
171 |     if ctx.config.get('use_claim_name'):
172 |         claim_ref = volume.obj['spec'].get('claimRef')
173 |         if claim_ref:
174 |             claim_name = claim_ref.get('name')
175 | 
176 |     return Rule(
177 |         name=rule_name_from_k8s_source(source, claim_name),
178 |         backend=backend_name,
179 |         source=source,
180 |         deltas=deltas,
181 |         disk=disk
182 |     )
183 | 
184 | 
185 | def get_deltas(annotations: Dict, deltas_annotation_key: str) -> List[timedelta]:
186 |     """
187 |     Helper annotation-deltas-getter
188 | 
189 |     Parameters
190 |     ----------
191 |     annotations
192 | 
193 |     Returns
194 |     -------
195 | 
196 |     """
197 |     try:
198 |         deltas_str = annotations[deltas_annotation_key]
199 |     except KeyError as exc:
200 |         raise AnnotationNotFound(
201 |             'No such annotation key',
202 |             key=deltas_annotation_key
203 |         ) from exc
204 | 
205 |     if not deltas_str:
206 |         raise AnnotationError('Invalid delta string', deltas_str=deltas_str)
207 | 
208 |     try:
209 |         deltas = parse_deltas(deltas_str)
210 |     except DeltasParseError as exc:
211 |         raise AnnotationError(
212 |             'Invalid delta string',
213 |             deltas_str=deltas_str
214 |         ) from exc
215 | 
216 |     if deltas is None or not deltas:
217 |         raise AnnotationError(
218 |             'parse_deltas returned invalid deltas',
219 |             deltas_str=deltas_str,
220 |             deltas=deltas,
221 |         )
222 | 
223 |     return deltas
224 | 


--------------------------------------------------------------------------------
/k8s_snapshots/backends/aws.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, NamedTuple
  2 | import pykube.objects
  3 | import requests
  4 | import pendulum
  5 | import boto3
  6 | from urllib.parse import urlparse
  7 | from ..context import Context
  8 | from k8s_snapshots.snapshot import Snapshot
  9 | from .abstract import NewSnapshotIdentifier, SnapshotStatus
 10 | from ..errors import SnapshotCreateError
 11 | 
 12 | 
 13 | def validate_config(config):
 14 |     """Ensure the config of this backend is correct.
 15 | 
 16 |     manual volumes are validated by the backend
 17 |         - for aws, google cloud, need different data, say, region or zone.
 18 |     """
 19 |     pass
 20 | 
 21 | 
 22 | def supports_volume(volume: pykube.objects.PersistentVolume):
 23 |     """Returns `True` if the AWS backend can handle the given volume.
 24 | 
 25 |     Currently, this responds to volumes using the in-tree "awsElasticBlockStore" driver,
 26 |     as well as CSI volumes using `ebs.csi.aws.com`.
 27 |     """
 28 |     if 'csi' in volume.obj['spec']:
 29 |         if volume.obj['spec'].get('csi')['driver'] == 'ebs.csi.aws.com':
 30 |             return True
 31 |     return bool(volume.obj['spec'].get('awsElasticBlockStore'))
 32 | 
 33 | 
 34 | class AWSDiskIdentifier(NamedTuple):
 35 |     """An AWS volume id (e.g. `vol-07c6ffacaac8cf641`) + region (e.g. `eu-west-1`).
 36 |     """
 37 |     region: str
 38 |     volume_id: str
 39 | 
 40 | 
 41 | def get_current_region(ctx):
 42 |     """Get the current region from the metadata service.
 43 |     """
 44 |     if not ctx.config['aws_region']:
 45 |         response = requests.get(
 46 |             'http://169.254.169.254/latest/meta-data/placement/availability-zone',
 47 |             timeout=5)
 48 |         response.raise_for_status()
 49 |         ctx.config['aws_region'] = response.text[:-1]
 50 | 
 51 |     return ctx.config['aws_region']
 52 | 
 53 | 
 54 | def get_disk_identifier(volume: pykube.objects.PersistentVolume) -> AWSDiskIdentifier:
 55 |     """Parses the AWS volume id, and the region the volume is in, from the given `PersistentVolume`,
 56 |     and returns them as a `AWSDiskIdentifier` tuple.
 57 | 
 58 |     This information is not encoded in a standard way and differs between Kubernetes versions and
 59 |     storage backends.
 60 |     """
 61 | 
 62 |     csi = volume.obj['spec'].get('csi')
 63 |     if csi and csi['driver'] == 'ebs.csi.aws.com':
 64 |         volume_url = csi['volumeHandle']
 65 |     else:
 66 |         volume_url = volume.obj['spec'].get('awsElasticBlockStore')['volumeID']
 67 | 
 68 |     # A url such as `aws://eu-west-1a/vol-00292b2da3d4ed1e4`. The region is included.
 69 |     if volume_url.startswith('aws://'):
 70 |         parts = urlparse(volume_url)
 71 |         zone = parts.netloc
 72 |         volume_id = parts.path[1:]
 73 | 
 74 |         return AWSDiskIdentifier(region=zone[:-1], volume_id=volume_id)
 75 | 
 76 |     # We then assume the volume id is given directly, e.g. `vol-00292b2da3d4ed1e4`.
 77 |     volume_id = volume_url
 78 | 
 79 |     # We still need the region. Sometimes there is a label:
 80 |     region = volume.obj.get('metadata').get('labels', {}).get('failure-domain.beta.kubernetes.io/region')
 81 |     if region:
 82 |         return AWSDiskIdentifier(region=region, volume_id=volume_id)
 83 | 
 84 |     # Or we would expect there to be a nodeAffinity selector
 85 |     nodeSelectorTerms = volume.obj['spec']['nodeAffinity']['required']['nodeSelectorTerms']
 86 |     for term in nodeSelectorTerms:
 87 |         matchExpressions = term.get('matchExpressions')
 88 |         if matchExpressions:
 89 |             for expression in matchExpressions:
 90 |                 if expression.get('key') in ("failure-domain.beta.kubernetes.io/region",):
 91 |                     region = expression.get('values')[0]
 92 |                 if expression.get('key') in ('topology.ebs.csi.aws.com/zone',):
 93 |                     region = expression.get('values')[0][:-1]
 94 | 
 95 |     return AWSDiskIdentifier(region=region, volume_id=volume_id)
 96 | 
 97 | 
 98 | def parse_timestamp(date) -> pendulum.Pendulum:
 99 |     return pendulum.instance(date)
100 | 
101 | 
102 | def validate_disk_identifier(disk_id: Dict):
103 |     try:
104 |         return AWSDiskIdentifier(
105 |             region=disk_id['region'],
106 |             volume_id=disk_id['volumeId']
107 |         )
108 |     except:
109 |         raise ValueError(disk_id)
110 | 
111 | # AWS can filter by volume-id, which means we wouldn't have to match in Python.
112 | # In any case, it might be easier to let the backend handle the matching. Then
113 | # it relies less on the DiskIdentifier object always matching.
114 | #filters={'volume-id': volume.id}
115 | def load_snapshots(ctx: Context, label_filters: Dict[str, str]) -> List[Snapshot]:
116 |     connection = get_connection(ctx, region=get_current_region(ctx))
117 | 
118 |     snapshots = connection.describe_snapshots(
119 |         OwnerIds=['self'],
120 |         Filters=[{'Name': f'tag:{k}', 'Values': [v]} for k, v in label_filters.items()]
121 |     )
122 | 
123 |     return list(map(lambda snapshot: Snapshot(
124 |         name=snapshot['SnapshotId'],
125 |         created_at=parse_timestamp(snapshot['StartTime']),
126 |         disk=AWSDiskIdentifier(
127 |             volume_id=snapshot['VolumeId'],
128 |             region=ctx.config['aws_region']
129 |         )
130 |     ), snapshots['Snapshots']))
131 | 
132 | 
133 | def create_snapshot(
134 |     ctx: Context,
135 |     disk: AWSDiskIdentifier,
136 |     snapshot_name: str,
137 |     snapshot_description: str
138 | ) -> NewSnapshotIdentifier:
139 | 
140 |     connection = get_connection(ctx, disk.region)
141 | 
142 |     # TODO: Seems like the API doesn't actually allow us to set a snapshot
143 |     # name, although it's possible in the UI.
144 |     snapshot = connection.create_snapshot(
145 |         VolumeId=disk.volume_id,
146 |         Description=snapshot_name
147 |     )
148 |     
149 |     return {
150 |         'id': snapshot['SnapshotId'],
151 |         'region': disk.region
152 |     }
153 | 
154 | 
155 | def get_snapshot_status(
156 |     ctx: Context,
157 |     snapshot_identifier: NewSnapshotIdentifier
158 | ) -> SnapshotStatus:
159 |     connection = get_connection(ctx, snapshot_identifier['region'])
160 | 
161 |     snapshots = connection.describe_snapshots(
162 |         SnapshotIds=[snapshot_identifier['id']]
163 |     )
164 |     snapshot = snapshots['Snapshots'][0]
165 |     
166 |     # Can be pending | completed | error
167 |     if snapshot['State'] == 'pending':
168 |         return SnapshotStatus.PENDING
169 |     elif snapshot['State'] == 'completed':
170 |         return SnapshotStatus.COMPLETE
171 |     elif snapshot['State'] == 'error':
172 |         raise SnapshotCreateError(snapshot['status'])
173 |     else:
174 |         raise NotImplementedError()
175 | 
176 | 
177 | def set_snapshot_labels(
178 |     ctx: Context,
179 |     snapshot_identifier: NewSnapshotIdentifier,
180 |     labels: Dict
181 | ):
182 |     connection = get_connection(ctx, snapshot_identifier['region'])
183 |     connection.create_tags(
184 |         Resources=[snapshot_identifier['id']],
185 |         Tags=[{'Key': k, 'Value': v} for k, v in labels.items()]
186 |     )
187 | 
188 | 
189 | def delete_snapshot(
190 |     ctx: Context,
191 |     snapshot: Snapshot
192 | ):
193 |     connection = get_connection(ctx, snapshot.disk.region)
194 |     connection.delete_snapshot(SnapshotId=snapshot.name)
195 | 
196 | 
197 | def get_connection(ctx: Context, region):
198 |     connection = boto3.client('ec2', region_name=region)
199 |     return connection
200 | 


--------------------------------------------------------------------------------
/k8s_snapshots/logconf.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import logging.config
  3 | from collections import OrderedDict
  4 | from typing import Optional, List, Any, Dict
  5 | 
  6 | import structlog
  7 | import sys
  8 | 
  9 | from k8s_snapshots import serialize
 10 | 
 11 | 
 12 | class ProcessStructuredErrors:
 13 |     def __init__(self):
 14 |         pass
 15 | 
 16 |     def __call__(self, logger, method_name, event_dict):
 17 |         exc_info = event_dict.pop('exc_info', None)
 18 | 
 19 |         if exc_info is None:
 20 |             return event_dict
 21 | 
 22 |         exc_type, exc, exc_tb = structlog.processors._figure_out_exc_info(
 23 |             exc_info)
 24 | 
 25 |         __structlog__ = getattr(exc, '__structlog__', None)
 26 | 
 27 |         if not callable(__structlog__):
 28 |             event_dict['exc_info'] = exc_info
 29 |             return event_dict
 30 | 
 31 |         structured_error = __structlog__()
 32 |         event_dict['structured_error'] = structured_error
 33 | 
 34 |         return event_dict
 35 | 
 36 | 
 37 | def add_message(logger, method_name, event_dict):
 38 |     """
 39 |     Creates a ``message`` value based on the ``hint`` and ``key_hint`` keys.
 40 | 
 41 |     ``key_hint`` : ``Optional[str]``
 42 |         a '.'-separated path of dictionary keys.
 43 | 
 44 |     ``hint`` : ``Optional[str]``
 45 |         will be formatted using ``.format(**event_dict)``.
 46 |     """
 47 |     def from_hint(ed):
 48 |         hint = event_dict.pop('hint', None)
 49 |         if hint is None:
 50 |             return
 51 | 
 52 |         try:
 53 |             return hint.format(**event_dict)
 54 |         except Exception as exc:
 55 |             return f'! error formatting message: {exc!r}'
 56 | 
 57 |     def path_value(dict_: Dict[str, Any], key_path: str) -> Optional[Any]:
 58 |         value = dict_
 59 | 
 60 |         for key in key_path.split('.'):
 61 |             if value is None:
 62 |                 return
 63 | 
 64 |             __structlog__ = getattr(value, '__structlog__', None)
 65 |             if __structlog__ is not None:
 66 |                 value = __structlog__()
 67 | 
 68 |             value = value.get(key)
 69 | 
 70 |         return value
 71 | 
 72 |     def from_key_hint(ed) -> Optional[str]:
 73 |         key_hint = ed.pop('key_hint', None)
 74 |         if key_hint is None:
 75 |             return
 76 | 
 77 |         value = path_value(ed, key_hint)
 78 | 
 79 |         return format_kv(key_hint, value)
 80 | 
 81 |     def from_key_hints(ed) -> List[str]:
 82 |         key_hints = ed.pop('key_hints', None)
 83 |         if key_hints is None:
 84 |             return []
 85 | 
 86 |         return [
 87 |             format_kv(key_hint, path_value(ed, key_hint))
 88 |             for key_hint in key_hints
 89 |         ]
 90 | 
 91 |     def format_kv(key: str, value: Any) -> str:
 92 |         return f'{key}={serialize.process(value)}'
 93 | 
 94 |     hints = [
 95 |         from_hint(event_dict),
 96 |         from_key_hint(event_dict)
 97 |     ]
 98 |     hints += from_key_hints(event_dict)
 99 | 
100 |     if all(hint is None for hint in hints):
101 |         if event_dict.get('message') is None:
102 |             event_dict['message'] = event_dict.get('event')
103 |         return event_dict
104 | 
105 |     prefix = event_dict['event']
106 |     hint = ', '.join(hint for hint in hints if hint is not None)
107 | 
108 |     message = event_dict.get('message')
109 |     if message is not None:
110 |         message = f'{prefix}: {message}, {hint}'
111 |     else:
112 |         message = f'{prefix}: {hint}'
113 | 
114 |     event_dict['message'] = message
115 |     return event_dict
116 | 
117 | 
118 | def configure_from_config(config):
119 |     configure_logging(
120 |         level_name=config['log_level'],
121 |         for_humans=not config['json_log'],
122 |         json_indent=config['structlog_json_indent'] or None,
123 |     )
124 | 
125 | 
126 | def configure_logging(
127 |         level_name: str='INFO',
128 |         for_humans: bool=False,
129 |         json_indent: Optional[int]=None,
130 | ):
131 |     configure_structlog(
132 |         for_humans=for_humans,
133 |         json_indent=json_indent,
134 |         level_name=level_name,
135 |     )
136 | 
137 | 
138 | def configure_structlog(
139 |         for_humans: bool=False,
140 |         json_indent: Optional[int]=None,
141 |         level_name: str='INFO'
142 | ):
143 |     key_order = ['message', 'event', 'level']
144 |     timestamper = structlog.processors.TimeStamper(fmt='ISO')
145 | 
146 |     processors = [
147 |         event_enum_to_str,
148 |         ProcessStructuredErrors(),
149 |         structlog.stdlib.add_logger_name,
150 |         structlog.stdlib.add_log_level,
151 |         rename_level_to_severity,
152 |         timestamper,
153 |         structlog.processors.StackInfoRenderer(),
154 |         structlog.processors.format_exc_info,
155 |         add_func_name,
156 |         add_message,
157 |         order_keys(key_order),
158 |         structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
159 |     ]
160 | 
161 |     if for_humans:
162 |         renderer = structlog.dev.ConsoleRenderer()  # <===
163 |     else:
164 |         # Make it so that 0 ⇒ None
165 |         indent = json_indent or None
166 |         renderer = structlog.processors.JSONRenderer(
167 |             indent=indent,
168 |             serializer=serialize.dumps
169 |         )
170 | 
171 |     foreign_pre_chain = [
172 |         # Add the log level and a timestamp to the event_dict if the log entry
173 |         # is not from structlog.
174 |         structlog.processors.StackInfoRenderer(),
175 |         structlog.processors.format_exc_info,
176 |         structlog.stdlib.add_log_level,
177 |         structlog.stdlib.add_logger_name,
178 |         foreign_event_to_message,
179 |         rename_level_to_severity,
180 |         timestamper,
181 |     ]
182 | 
183 |     if level_name == 'DEBUG':
184 |         root_logger_level = 'DEBUG'
185 |     else:
186 |         root_logger_level = 'ERROR'
187 | 
188 |     logging_config = {
189 |         'version': 1,
190 |         'disable_existing_loggers': False,
191 |         'formatters': {
192 |             'structlog': {
193 |                 '()': structlog.stdlib.ProcessorFormatter,
194 |                 'processor': renderer,
195 |                 'foreign_pre_chain': foreign_pre_chain,
196 |             },
197 |         },
198 |         'handlers': {
199 |             'default': {
200 |                 'level': level_name,
201 |                 'class': 'logging.StreamHandler',
202 |                 'stream': sys.stdout,
203 |                 'formatter': 'structlog',
204 |             },
205 |         },
206 |         'loggers': {
207 |             '': {
208 |                 'handlers': ['default'],
209 |                 'level': root_logger_level,
210 |                 'propagate': True,
211 |             },
212 |             'k8s_snapshots': {
213 |                 'level': 'DEBUG',
214 |             }
215 |         }
216 |     }
217 | 
218 |     logging.config.dictConfig(logging_config)
219 | 
220 |     structlog.configure(
221 |         processors=processors,
222 |         context_class=OrderedDict,
223 |         logger_factory=structlog.stdlib.LoggerFactory(),
224 |         wrapper_class=structlog.stdlib.BoundLogger,
225 |         cache_logger_on_first_use=True,
226 |     )
227 | 
228 | 
229 | def foreign_event_to_message(logger, method_name, event_dict):
230 |     event = event_dict.get('event')
231 | 
232 |     if event is not None and 'message' not in event_dict:
233 |         event_dict['message'] = event
234 |         event_dict['event'] = 'foreign'
235 | 
236 |     return event_dict
237 | 
238 | 
239 | def rename_level_to_severity(logger, method_name, event_dict):
240 |     level = event_dict.pop('level', None)
241 | 
242 |     event_dict['severity'] = level.upper()
243 | 
244 |     return event_dict
245 | 
246 | 
247 | def add_func_name(logger, method_rame, event_dict):
248 |     record = event_dict.get('_record')
249 |     if record is None:
250 |         return event_dict
251 | 
252 |     event_dict['function'] = record.funcName
253 | 
254 |     return event_dict
255 | 
256 | 
257 | def order_keys(order):
258 |     """
259 |     Order keys for JSON readability when not using json_log=True
260 |     """
261 |     def processor(logger, method_name, event_dict):
262 |         if not isinstance(event_dict, OrderedDict):
263 |             return event_dict
264 | 
265 |         for key in reversed(order):
266 |             if key in event_dict:
267 |                 event_dict.move_to_end(key, last=False)
268 | 
269 |         return event_dict
270 |     return processor
271 | 
272 | 
273 | def event_enum_to_str(logger, method_name, event_dict):
274 |     from k8s_snapshots import events
275 |     event = event_dict.get('event')
276 |     if event is None:
277 |         return event_dict
278 | 
279 |     if isinstance(event, events.EventEnum):
280 |         event_dict['event'] = event.value
281 | 
282 |     return event_dict
283 | 


--------------------------------------------------------------------------------
/tests/fixtures/kube.py:
--------------------------------------------------------------------------------
  1 | import contextlib
  2 | from typing import Dict, Iterable, Optional, Tuple, List, Any, Type, Hashable, \
  3 |     NamedTuple, Generator, Callable
  4 | from unittest import mock
  5 | from unittest.mock import MagicMock, Mock
  6 | 
  7 | import structlog
  8 | import pykube
  9 | import pytest
 10 | from _pytest.fixtures import FixtureRequest
 11 | 
 12 | from k8s_snapshots import kube, errors
 13 | from k8s_snapshots.context import Context
 14 | 
 15 | _logger = structlog.get_logger(__name__)
 16 | 
 17 | KUBE_SAFETY_CHECK_CONFIG_KEY = 'test-fixture-safety-check'
 18 | 
 19 | KUBE_CONFIG = pykube.KubeConfig({
 20 |     'apiVersion': 'v1',
 21 |     'kind': 'Config',
 22 |     'clusters': [
 23 |         {
 24 |             'name': 'test-fixture-cluster',
 25 |             'certificate-authority-data': 'From fixture fx_kube_config',
 26 |             'server': 'http://test-fixture-server',
 27 |         },
 28 |     ],
 29 |     'contexts': [
 30 |         {
 31 |             'name': 'test-fixture-context',
 32 |             'context': {
 33 |                 'cluster': 'test-fixture-cluster',
 34 |                 'user': 'test-fixture-user',
 35 |             },
 36 |         },
 37 |     ],
 38 |     'current-context': 'test-fixture-context',
 39 |     KUBE_SAFETY_CHECK_CONFIG_KEY: 'I am present',
 40 | })
 41 | 
 42 | LABEL_ZONE_VALUE = 'test-zone'
 43 | 
 44 | LABEL_ZONE_KEY = 'failure-domain.beta.kubernetes.io/zone'
 45 | LABEL_ZONE = {LABEL_ZONE_KEY: LABEL_ZONE_VALUE}
 46 | 
 47 | DELTAS_ANNOTATION = 'PT1M PT2M'
 48 | 
 49 | DEFAULT = object()
 50 | 
 51 | 
 52 | @pytest.fixture(scope='session', autouse=True)
 53 | def fx_mock_context_kube_config():
 54 |     with mock.patch(
 55 |             'k8s_snapshots.context.Context.load_kube_config',
 56 |             return_value=KUBE_CONFIG) as _mock:
 57 |         assert Context().load_kube_config() == KUBE_CONFIG
 58 |         yield _mock
 59 | 
 60 | 
 61 | @pytest.fixture(scope='session', autouse=True)
 62 | def fx_mock_context_kube_client():
 63 |     def _fake_client(self: Context):
 64 |         return MagicMock(
 65 |             spec=pykube.HTTPClient,
 66 |             config=self.load_kube_config()
 67 |         )
 68 |     with mock.patch(
 69 |         'k8s_snapshots.context.Context.kube_client',
 70 |         _fake_client,
 71 |     ) as _mock:
 72 |         yield _mock
 73 | 
 74 | 
 75 | @pytest.fixture
 76 | def fx_kube_config(request: FixtureRequest) -> pykube.KubeConfig:
 77 |     """
 78 |     Minimal fake pykube.HTTPClient config fixture.
 79 |     """
 80 |     return KUBE_CONFIG
 81 | 
 82 | 
 83 | class MockKubernetes(kube.Kubernetes):
 84 |     def __init__(self, *args, **kwargs):
 85 |         super(MockKubernetes, self).__init__(*args, **kwargs)
 86 | 
 87 |     def get_or_none(
 88 |             self,
 89 |             resource_type: Type[kube.Resource],
 90 |             name: str,
 91 |             namespace: Optional[str]=None,
 92 |     ) -> Optional[kube.Resource]:
 93 |         return self.resource_map.get(
 94 |             self.make_key(
 95 |                 resource_type,
 96 |                 name,
 97 |                 namespace
 98 |             )
 99 |         )
100 | 
101 |     def watch(
102 |             self,
103 |             resource_type: Type[kube.Resource],
104 |     ):
105 |         raise NotImplementedError
106 | 
107 |     # Mock-specific methods
108 | 
109 |     ResourceKey = NamedTuple(
110 |         'ResourceKey',
111 |         [
112 |             ('namespace', str),
113 |             ('resource_type', Type[kube.Resource]),
114 |             ('name', str)
115 |         ]
116 |     )
117 | 
118 |     resource_map: Dict[ResourceKey, kube.Resource] = {}
119 | 
120 |     # def filter_resources(
121 |     #         self,
122 |     #         namespace: Optional[str]=None,
123 |     #         resource_type: Optional[Type[kube.Resource]]=None,
124 |     #         name: Optional[str]=None
125 |     # ) -> Generator[kube.Resource, None, None]:
126 |     #     tests: List[Callable[[self.ResourceKey, kube.Resource], bool]]
127 |     #     tests = []
128 |     #     if namespace is not None:
129 |     #         tests.append(lambda k, v: k.namespace == namespace)
130 |     #
131 |     #     if resource_type is not None:
132 |     #         tests.append(lambda k, v: k.resource_type == resource_type)
133 |     #
134 |     #     if name is not None:
135 |     #         tests.append(lambda k, v: k.name == name)
136 |     #
137 |     #     for key, resource in self.resource_map.items():
138 |     #         if all(test(key, resource) for test in tests):
139 |     #             yield resource
140 | 
141 |     @classmethod
142 |     def resource_key(cls, resource: kube.Resource) -> Hashable:
143 |         return cls.make_key(type(resource), resource.name, resource.namespace)
144 | 
145 |     @classmethod
146 |     def make_key(
147 |             cls,
148 |             resource_type: Type[kube.Resource],
149 |             name: str,
150 |             namespace: Any=DEFAULT,
151 |     ) -> ResourceKey:
152 |         if namespace is DEFAULT:
153 |             namespace = 'default'
154 |         return cls.ResourceKey(namespace, resource_type, name)
155 | 
156 |     @classmethod
157 |     def add_resource(cls, resource, overwrite=False):
158 |         key = cls.make_key(type(resource), resource.name, resource.namespace)
159 |         if not overwrite and key in cls.resource_map:
160 |             raise AssertionError(
161 |                 f'An object with the key {key!r} already exists in the '
162 |                 f'resource map')
163 |         _logger.debug('MockKubernetes.add_resource', resource=resource)
164 |         cls.resource_map[key] = resource
165 | 
166 |     @classmethod
167 |     @contextlib.contextmanager
168 |     def patch(cls, resources: Iterable[kube.Resource]):
169 |         try:
170 |             _logger.debug(
171 |                 'MockKubernetes.patch',
172 |                 message='Patching Kubernetes',
173 |                 resources=resources
174 |             )
175 |             for resource in resources:
176 |                 cls.add_resource(resource)
177 | 
178 |             patch_kubernetes = mock.patch(
179 |                 'k8s_snapshots.kube.Kubernetes',
180 |                 cls
181 |             )
182 |             with patch_kubernetes:
183 |                 yield
184 |         finally:
185 |             _logger.debug(
186 |                 'MockKubernetes.patch',
187 |                 message='Cleaning up after patch'
188 |             )
189 |             cls.resource_map.clear()
190 | 
191 | 
192 | @contextlib.contextmanager
193 | def mock_kube(resources: Iterable[kube.Resource]):
194 |     """
195 |     Mock the resources available through the `k8s_snapshots.kube.Kubernetes`
196 |     abstraction.
197 | 
198 |     Parameters
199 |     ----------
200 |     resources
201 | 
202 |     Returns
203 |     -------
204 |     The `k8s_snapshots.kube.Kubernetes` mock
205 | 
206 |     """
207 |     with MockKubernetes.patch(resources):
208 |         yield
209 | 
210 | 
211 | def make_resource(
212 |         resource_type: Type[kube.Resource],
213 |         name,
214 |         namespace=DEFAULT,
215 |         labels=DEFAULT,
216 |         annotations=DEFAULT,
217 |         spec=DEFAULT,
218 | ) -> kube.Resource:
219 |     """
220 |     Create a Kubernetes Resource.
221 |     """
222 | 
223 |     if namespace is DEFAULT:
224 |         namespace = 'default'
225 | 
226 |     if annotations is DEFAULT:
227 |         annotations = {}
228 | 
229 |     api = MagicMock(
230 |         spec=pykube.HTTPClient,
231 |         config=Mock()
232 |     )
233 | 
234 |     if spec is DEFAULT:
235 |         spec = {}
236 | 
237 |     obj = {
238 |         'metadata': {
239 |             'name': name,
240 |             'annotations': annotations,
241 |             'selfLink': f'test/{namespace}/{resource_type.endpoint}/{name}'
242 |         },
243 |         'spec': spec,
244 |     }
245 | 
246 |     if labels is not DEFAULT:
247 |         obj['metadata']['labels'] = labels
248 |     if namespace is not DEFAULT:
249 |         obj['metadata']['namespace'] = namespace
250 | 
251 |     return resource_type(api, obj)
252 | 
253 | 
254 | def make_volume_and_claim(
255 |         ctx,
256 |         volume_name='test-pv',
257 |         claim_name='test-pvc',
258 |         volume_annotations=DEFAULT,
259 |         claim_annotations=DEFAULT,
260 |         claim_namespace=DEFAULT,
261 |         volume_zone_label=DEFAULT,
262 | ) -> Tuple[
263 |     pykube.objects.PersistentVolume,
264 |     pykube.objects.PersistentVolumeClaim
265 | ]:
266 |     """
267 |     Creates
268 | 
269 |     """
270 |     if volume_zone_label is DEFAULT:
271 |         volume_zone_label = {LABEL_ZONE_KEY: LABEL_ZONE_VALUE}
272 | 
273 |     pv = make_resource(
274 |         pykube.objects.PersistentVolume,
275 |         volume_name,
276 |         annotations=volume_annotations,
277 |         labels=volume_zone_label,
278 |         spec={
279 |             'claimRef': {
280 |                 'name': claim_name,
281 |                 'namespace': claim_namespace,
282 |             },
283 |             'gcePersistentDisk': {
284 |                 'pdName': 'test-pd'
285 |             }
286 |         }
287 |     )
288 | 
289 |     pvc = make_resource(
290 |         pykube.objects.PersistentVolumeClaim,
291 |         claim_name,
292 |         annotations=claim_annotations,
293 |         namespace=claim_namespace,
294 |         spec={
295 |             'volumeName': volume_name,
296 |         }
297 |     )
298 | 
299 |     return pv, pvc
300 | 
301 | 
302 | @pytest.fixture
303 | def fx_volume_zone_label(request):
304 |     return {LABEL_ZONE_KEY: LABEL_ZONE_VALUE}
305 | 
306 | 
307 | @pytest.fixture
308 | def fx_annotation_deltas(request):
309 |     deltas = request.getfixturevalue('fx_deltas')
310 |     context = request.getfixturevalue('fx_context')
311 |     return {
312 |         context.config['deltas_annotation_key']: deltas
313 |     }
314 | 
315 | 
316 | def spec_gce_persistent_disk(pd_name):
317 |     return {
318 |         'gcePersistentDisk': {
319 |             'pdName': pd_name
320 |         }
321 |     }
322 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Interval-based Volume Snapshots and Expiry on Kubernetes
  2 | ========================================================
  3 | 
  4 | **What you do:** Create a custom `SnapshotRule` resource which defines your desired snapshot intervals.
  5 | **What I do:** Create snapshots of your volumes, and expire old ones using a Grandfather-father-son backup scheme.
  6 | 
  7 | **Supported Environments**:
  8 | 
  9 | - Google Compute Engine disks.
 10 | - AWS EBS disks.
 11 | - Digital Ocean.
 12 | 
 13 | Want to help adding support for other backends? It's pretty straightforward.
 14 | Have a look at the [API that backends need to implement](https://github.com/miracle2k/k8s-snapshots/blob/master/k8s_snapshots/backends/abstract.py).
 15 | 
 16 | 
 17 | Quickstart
 18 | ----------
 19 | 
 20 | A persistent volume claim:
 21 | 
 22 | ```
 23 | cat <<EOF | kubectl apply -f -
 24 | apiVersion: "k8s-snapshots.elsdoerfer.com/v1"
 25 | kind: SnapshotRule
 26 | metadata:
 27 |   name: postgres
 28 | spec:
 29 |   deltas: P1D P30D
 30 |   persistentVolumeClaim: postgres-data
 31 | EOF
 32 | ```
 33 | 
 34 | A specific AWS EC2 volume:
 35 | 
 36 | ```
 37 | cat <<EOF | kubectl apply -f -
 38 | apiVersion: "k8s-snapshots.elsdoerfer.com/v1"
 39 | kind: SnapshotRule
 40 | metadata:
 41 |   name: mysql
 42 | spec:
 43 |   deltas: P1D P30D
 44 |   backend: aws
 45 |   disk:
 46 |      region: eu-west-1
 47 |      volumeId: vol-0aa6f44aad0daf9f2
 48 | EOF
 49 | ```
 50 | 
 51 | 
 52 | You can also use an annotation instead of the CRDs:
 53 | 
 54 | 
 55 | ```bash
 56 | kubectl patch pv pvc-01f74065-8fe9-11e6-abdd-42010af00148 -p \
 57 |   '{"metadata": {"annotations": {"backup.kubernetes.io/deltas": "P1D P30D P360D"}}}'
 58 | ``` 
 59 | 
 60 | 
 61 | Usage
 62 | -----
 63 | 
 64 | ### How to enable backups
 65 | 
 66 | To backup a volume, you can create a `SnapshotRule` custom resource.
 67 | See more on this in the section further doiwn below.
 68 | 
 69 | Alternatively, you can add an annotation with the name
 70 | ``backup.kubernetes.io/deltas`` to either your `PersistentVolume` or
 71 | `PersistentVolumeClaim` resources.
 72 | 
 73 | Since ``PersistentVolumes`` are often created automatically for you
 74 | by Kubernetes, you may want to annotate the volume claim in your
 75 | resource definition file. Alternatively, you can ``kubectl edit pv``
 76 | a ``PersistentVolume`` created by Kubernetes and add the annotation.
 77 | 
 78 | The value of the annotation are a set of deltas that define how often
 79 | a snapshot is created, and how many snapshots should be kept. See
 80 | the section above for more information on how deltas work.
 81 | 
 82 | In the end, your annotation may look like this:
 83 | 
 84 | ```
 85 | backup.kubernetes.io/deltas: PT1H P2D P30D P180D
 86 | ```
 87 | 
 88 | There is also the option of manually specifying the volume names
 89 | to be backed up as options to the *k8s-snapshots* daemon. See below
 90 | for more information.
 91 | 
 92 | 
 93 | ### How the deltas work
 94 | 
 95 | The expiry logic of [tarsnapper](https://github.com/miracle2k/tarsnapper)
 96 | is used.
 97 | 
 98 | The generations are defined by a list of deltas formatted as [ISO 8601
 99 | durations](https://en.wikipedia.org/wiki/ISO_8601#Durations) (this differs from
100 | tarsnapper). ``PT60S`` or ``PT1M`` means a minute, ``PT12H`` or ``P0.5D`` is
101 | half a day, ``P1W`` or ``P7D`` is a week. The number of backups in each
102 | generation is implied by it's and the parent generation's delta.
103 | 
104 | For example, given the deltas ``PT1H P1D P7D``, the first generation will
105 | consist of 24 backups each one hour older than the previous
106 | (or the closest approximation possible given the available backups),
107 | the second generation of 7 backups each one day older than the previous,
108 | and backups older than 7 days will be discarded for good.
109 | 
110 | If the daemon is not running for a while, it will still  try to approximate your desired 
111 | snapshot scheme as closely as possible.
112 | 
113 | The most recent backup is always kept.
114 | 
115 | The first delta is the backup interval.
116 | 
117 | 
118 | Setup
119 | -----
120 | 
121 | `k8s-snapshots` needs access to your Kubernetes cluster resources 
122 | (to read the desired snapshot configuration) and access to your cloud infrastructure
123 | (to make snapshots).
124 | 
125 | Depending on your environment, it may be able to configure itself. Or, you might need to 
126 | provide some configuration options.
127 | 
128 | Use the example deployment file given below to start off.
129 | 
130 | ```bash
131 | cat <<EOF | kubectl create -f -
132 | apiVersion: apps/v1
133 | kind: Deployment
134 | metadata:
135 |   name: k8s-snapshots
136 |   namespace: kube-system
137 | spec:
138 |   replicas: 1
139 |   selector:
140 |     matchLabels:
141 |       app: k8s-snapshots
142 |   template:
143 |     metadata:
144 |       labels:
145 |         app: k8s-snapshots
146 |     spec:
147 |       containers:
148 |       - name: k8s-snapshots
149 |         image: elsdoerfer/k8s-snapshots:latest
150 | EOF
151 | ```
152 | 
153 | ### 1. Based on your cluster.
154 | 
155 | See the [docs/](docs/) folder for platform-specific instructions.
156 | 
157 | 
158 | ### 2. For Role-based Access Control (RBAC) enabled clusters
159 | 
160 | In Kubernetes clusters with RBAC, the required permissions need to be provided to the `k8s-snapshots` pods to watch and list `persistentvolume` or `persistentvolumeclaims`. We provide a manifest to setup a `ServiceAccount` with a minimal set of permissions in [rbac.yaml](manifests/rbac.yaml).
161 | 
162 | ```
163 | kubectl apply -f manifests/rbac.yaml
164 | ```
165 | 
166 | Furthermore, under GKE, "Because of the way Container Engine checks permissions when you create a Role or ClusterRole, you must first create a RoleBinding that grants you all of the permissions included in the role you want to create."
167 | 
168 | If the above kubectl apply command produces an error about "attempt to grant extra privileges", the following will grant _your_ user the necessary privileges *first*, so that you can then bind them to the service account:
169 | 
170 | ```
171 |   kubectl create clusterrolebinding your-user-cluster-admin-binding --clusterrole=cluster-admin --user=your.google.cloud.email@example.org
172 | ```
173 | 
174 | Finally, adjust the deployment by adding ```serviceAccountName: k8s-snapshots``` to the spec (else you'll end up using the "default" service account), as follows:
175 | 
176 | ```
177 | <snip>
178 |     spec:
179 |      serviceAccountName: k8s-snapshots
180 |      containers:
181 |       - name: k8s-snapshots
182 |         image: elsdoerfer/k8s-snapshots:v2.0
183 | </snip>
184 | ```
185 | 
186 | Further Configuration Options
187 | -----------------------------
188 | 
189 | 
190 | ### Pinging a third party service
191 | 
192 | <table>
193 |   <tr>
194 |     <td>PING_URL</td>
195 |     <td>
196 |       We'll send a GET request to this url whenever a backup completes.
197 |       This is useful for integrating with monitoring services like
198 |       Cronitor or Dead Man's Snitch.
199 |     </td>
200 |   </tr>
201 | </table>
202 | 
203 | 
204 | ### Make snapshot names more readable
205 | 
206 | If your persistent volumes are auto-provisioned by Kubernetes, then
207 | you'll end up with snapshot names such as
208 | ``pv-pvc-01f74065-8fe9-11e6-abdd-42010af00148``. If you want that
209 | prettier, set the enviroment variable ``USE_CLAIM_NAME=true``. Instead
210 | of the auto-generated name of the persistent volume, *k8s-snapshots*
211 | will instead use the name that you give to your
212 | ``PersistentVolumeClaim``.
213 | 
214 | 
215 | ### SnapshotRule resources
216 | 
217 | It's possible to ask *k8s-snapshots* to create snapshots of volumes
218 | for which no `PersistentVolume` object exists within the Kubernetes
219 | cluster. For example, you might have a volume at your Cloud provider
220 | that you use within Kubernetes by referencing it directly.
221 | 
222 | To do this, we use a custom Kubernetes resource, `SnapshotRule`.
223 | 
224 | First, you need to create this custom resource.
225 | 
226 | On Kubernetes 1.7 and higher:
227 | 
228 | ```
229 | cat <<EOF | kubectl create -f -
230 | apiVersion: apiextensions.k8s.io/v1beta1
231 | kind: CustomResourceDefinition
232 | metadata:
233 |   name: snapshotrules.k8s-snapshots.elsdoerfer.com
234 | spec:
235 |   group: k8s-snapshots.elsdoerfer.com
236 |   version: v1
237 |   scope: Namespaced
238 |   names:
239 |     plural: snapshotrules
240 |     singular: snapshotrule
241 |     kind: SnapshotRule
242 |     shortNames:
243 |     - sr
244 | EOF
245 | ```
246 | 
247 | Or on Kubernetes 1.6 and lower:
248 | 
249 | ```
250 | cat <<EOF | kubectl create -f -
251 | apiVersion: apps/v1
252 | kind: ThirdPartyResource
253 | metadata:
254 |   name: snapshot-rule.k8s-snapshots.elsdoerfer.com
255 | description: "Defines snapshot management rules for a disk."
256 | versions:
257 | - name: v1
258 | EOF
259 | ```
260 | 
261 | You can then create `SnapshotRule` resources:
262 | 
263 | ```
264 | cat <<EOF | kubectl apply -f -
265 | apiVersion: "k8s-snapshots.elsdoerfer.com/v1"
266 | kind: SnapshotRule
267 | metadata:
268 |   name: mysql
269 | spec:
270 |   deltas: P1D P30D
271 |   backend: aws
272 |   disk:
273 |      region: eu-west-1
274 |      volumeId: vol-0aa6f44aad0daf9f2
275 | EOF
276 | ```
277 | 
278 | This is an example for backing up an EBS disk on the Amazon cloud. The
279 | `disk` option requires different keys, depending on the backend. See
280 | the [examples folder](https://github.com/miracle2k/k8s-snapshots/tree/master/examples).
281 | 
282 | You may also point `SnapshotRule` resources to PersistentVolumes (or
283 | PersistentVolumeClaims). This is intended as an alternative to adding
284 | an annotation; it may be desirable for some to separate the snapshot
285 | functionality from the resource.
286 | 
287 | ```
288 | cat <<EOF | kubectl apply -f -
289 | apiVersion: "k8s-snapshots.elsdoerfer.com/v1"
290 | kind: SnapshotRule
291 | metadata:
292 |   name: mysql
293 | spec:
294 |   deltas: P1D P30D
295 |   persistentVolumeClaim: datadir-mysql
296 | EOF
297 | ```
298 | 
299 | 
300 | ### Backing up the etcd volumes of a kops cluster
301 | 
302 | After setting up the custom resource definitions (see previous section), use
303 | snapshot rules as defined in the `examples/backup-kops-etcd.yml` file. Reference
304 | the volume ids of your etcd volumes.
305 | 
306 | 
307 | ### Other environment variables
308 | 
309 | <table>
310 |   <tr>
311 |     <td>LOG_LEVEL</td>
312 |     <td>**Default: INFO**. Possible values: DEBUG, INFO, WARNING, ERROR</td>
313 |   </tr>
314 |   <tr>
315 |     <td>JSON_LOG</td>
316 |     <td>**Default: False**. Output the log messages as JSON objects for
317 |         easier processing.</td>
318 |   </tr>
319 |   <tr>
320 |     <td>TZ</td>
321 |     <td>**Default: UTC**. Used to change the timezone. ie. TZ=America/Montreal</td>
322 |   </tr>
323 | </table>
324 | 
325 | 
326 | FAQ
327 | ----
328 | 
329 | **What if I manually create snapshots for the same volumes that
330 | *k8s-snapshots* manages?**
331 | 
332 | Starting with v0.3, when *k8s-snapshots* decides when to create the
333 | next snapshot, and which snapshots it deletes, it no longer considers
334 | snapshots that are not correctly labeled by it.
335 | 


--------------------------------------------------------------------------------
/k8s_snapshots/snapshot.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import inspect
  3 | from datetime import timedelta
  4 | from typing import Dict, Tuple, List, Iterable, Callable, Union, \
  5 |     Awaitable, Any, Set
  6 | 
  7 | import aiohttp
  8 | import pendulum
  9 | import re
 10 | import structlog
 11 | from tarsnapper.expire import expire
 12 | 
 13 | from k8s_snapshots import events, errors, serialize
 14 | from k8s_snapshots.asyncutils import run_in_executor, combine_latest, debounce
 15 | from k8s_snapshots.context import Context
 16 | from k8s_snapshots.rule import Rule, get_backend_for_rule
 17 | from .backends.abstract import Snapshot, NewSnapshotIdentifier, SnapshotStatus
 18 | 
 19 | 
 20 | _logger = structlog.get_logger(__name__)
 21 | 
 22 | 
 23 | async def expire_snapshots(ctx, rule: Rule):
 24 |     """
 25 |     Expire existing snapshots for the rule.
 26 |     """
 27 |     _log = _logger.new(
 28 |         rule=rule,
 29 |     )
 30 | 
 31 |     _log.debug(events.Expiration.STARTED)
 32 | 
 33 |     backend = get_backend_for_rule(ctx, rule)
 34 | 
 35 |     snapshots_objects = filter_snapshots_by_rule(
 36 |         await load_snapshots(ctx, {backend}), rule)
 37 |     snapshots_with_date = {s: s.created_at for s in snapshots_objects}
 38 | 
 39 |     to_keep = expire(snapshots_with_date, rule.deltas)
 40 |     expired_snapshots: List[str] = []
 41 |     kept_snapshots = []
 42 | 
 43 |     for snapshot, snapshot_time_created in snapshots_with_date.items():
 44 |         _log_inner = _log.new(
 45 |             snapshot_name=snapshot.name,
 46 |             snapshot_time_created=snapshot_time_created,
 47 |             key_hints=[
 48 |                 'snapshot_name',
 49 |                 'snapshot_time_created',
 50 |             ]
 51 |         )
 52 | 
 53 |         if snapshot in to_keep:
 54 |             _log_inner.debug(events.Expiration.KEPT)
 55 |             kept_snapshots.append(snapshot.name)
 56 |             continue
 57 | 
 58 |         if snapshot not in to_keep:
 59 |             _log_inner.info(events.Expiration.DELETE)
 60 | 
 61 |             # TODO: Deleting a snapshot is usually an async process too,
 62 |             # and to be completely accurate, we should wait for it to complete.
 63 |             backend = get_backend_for_rule(ctx, rule)
 64 |             await run_in_executor(
 65 |                 lambda: backend.delete_snapshot(ctx, snapshot)
 66 |             )
 67 |             expired_snapshots.append(snapshot.name)
 68 | 
 69 |     _log.info(
 70 |         events.Expiration.COMPLETE,
 71 |         snapshots={
 72 |             'expired': expired_snapshots,
 73 |             'kept': kept_snapshots,
 74 |         }
 75 |     )
 76 | 
 77 | 
 78 | async def make_backup(ctx, rule):
 79 |     """Execute a single backup job.
 80 | 
 81 |     1. Create the snapshot
 82 |     2. Wait until the snapshot is finished.
 83 |     3. Expire old snapshots
 84 |     """
 85 | 
 86 |     backend = get_backend_for_rule(ctx, rule)
 87 |     snapshot_name = new_snapshot_name(ctx, rule)
 88 | 
 89 |     _log = _logger.new(
 90 |         snapshot_name=snapshot_name,
 91 |         rule=rule
 92 |     )
 93 | 
 94 |     time_start = pendulum.now()
 95 | 
 96 |     try:
 97 |         snapshot_identifier = await create_snapshot(
 98 |             ctx,
 99 |             rule,
100 |             snapshot_name,
101 |             snapshot_description=serialize.dumps(rule),
102 |         )
103 | 
104 |         _log.debug(
105 |             'snapshot.operation-started',
106 |             key_hints=[
107 |                 'snapshot_name'
108 |             ],
109 |             snapshot_identifier=snapshot_identifier
110 |         )
111 | 
112 |         await poll_for_status(
113 |             lambda: get_snapshot_status(ctx, backend, snapshot_identifier),
114 |             retry_for=(SnapshotStatus.PENDING,)
115 |         )
116 | 
117 |     # TODO: If there is some kind of coding error, we should crash I think.
118 |     except Exception as exc:
119 |         _log.exception(
120 |             events.Snapshot.ERROR,
121 |             key_hints=['snapshot_name', 'rule.name']
122 |         )
123 |         raise errors.SnapshotCreateError(
124 |             'Error creating snapshot'
125 |         ) from exc
126 | 
127 |     await set_snapshot_labels(
128 |         ctx,
129 |         backend,
130 |         snapshot_identifier,
131 |         snapshot_labels(ctx),
132 |     )
133 |     time_taken = pendulum.now() - time_start
134 | 
135 |     _log.info(
136 |         events.Snapshot.CREATED,
137 |         snapshot_identifier=snapshot_identifier,
138 |         time_taken=time_taken,
139 |         time_taken_seconds=time_taken.total_seconds(),
140 |         key_hints=[
141 |             'snapshot_name',
142 |             'rule.name',
143 |             'time_taken_seconds'
144 |         ],
145 |     )
146 | 
147 |     ping_url = ctx.config.get('ping_url')
148 |     if ping_url:
149 |         async with aiohttp.ClientSession() as session:
150 |             response = await session.request('GET', ping_url)
151 |             _log.info(
152 |                 events.Ping.SENT,
153 |                 status=response.status,
154 |                 url=ping_url,
155 |             )
156 | 
157 |     await expire_snapshots(ctx, rule)
158 | 
159 | 
160 | async def create_snapshot(
161 |         ctx: Context,
162 |         rule: Rule,
163 |         snapshot_name: str,
164 |         snapshot_description: str
165 | ) -> NewSnapshotIdentifier:
166 |     _log = _logger.new(
167 |         disk=rule.disk,
168 |         rule=rule,
169 |         snapshot_name=snapshot_name,
170 |         snapshot_description=snapshot_description
171 |     )
172 | 
173 |     _log.info(
174 |         events.Snapshot.START,
175 |         key_hints=['snapshot_name', 'rule.name']
176 |     )
177 | 
178 |     backend = get_backend_for_rule(ctx, rule)
179 |     return await run_in_executor(
180 |         lambda: backend.create_snapshot(
181 |             ctx,
182 |             rule.disk,
183 |             snapshot_name,
184 |             snapshot_description
185 |         )
186 |     )
187 | 
188 | 
189 | async def poll_for_status(
190 |     refresh_func: Callable[..., Union[Dict, Awaitable[Dict]]],
191 |     retry_for: Tuple[SnapshotStatus],
192 |     sleep_time: int=1,
193 | ):
194 |     """
195 |     Call refresh_func until the return value  is not one of the values
196 |     in ``retry_for``.
197 | 
198 |     Parameters
199 |     ----------
200 |     refresh_func
201 |         Callable that returns either
202 | 
203 |         -   The new version of the resource.
204 |         -   An awaitable for the new version of the resource.
205 |     retry_for
206 |         A list of statuses to retry for.
207 |     sleep_time
208 |         The time, in seconds, to sleep for between calls.
209 | 
210 |     Returns
211 |     -------
212 | 
213 |     """
214 |     _log = _logger.new()
215 |     refresh_count = 0
216 |     time_start = pendulum.now()
217 | 
218 |     while True:
219 |         await asyncio.sleep(sleep_time)  # Sleep first
220 | 
221 |         result = refresh_func()
222 |         if inspect.isawaitable(result):
223 |             result = await result
224 | 
225 |         _log.debug(
226 |             'poll-for-status.refreshed',
227 |             key_hints=[
228 |                 'result'
229 |             ],
230 |             refresh_count=refresh_count,
231 |             result=result
232 |         )
233 | 
234 |         if not result in retry_for:
235 |             break
236 | 
237 |         refresh_count += 1
238 | 
239 |     time_taken = pendulum.now() - time_start
240 | 
241 |     _log.debug(
242 |         'poll-for-status.done',
243 |         key_hints=[
244 |             'refresh_count',
245 |             'time_taken',
246 |         ],
247 |         refresh_count=refresh_count,
248 |         time_start=time_start,
249 |         time_taken=time_taken
250 |     )
251 | 
252 |     return result
253 | 
254 | 
255 | def snapshot_author_label(ctx: Context) -> Tuple[str, str]:
256 |     return (
257 |         ctx.config['snapshot_author_label_key'],
258 |         ctx.config['snapshot_author_label']
259 |     )
260 | 
261 | 
262 | def snapshot_labels(ctx: Context) -> Dict:
263 |     return dict([snapshot_author_label(ctx)])
264 | 
265 | 
266 | async def set_snapshot_labels(
267 |     ctx: Context,
268 |     backend: Any,
269 |     snapshot_identifier: NewSnapshotIdentifier,
270 |     labels: Dict
271 | ):
272 |     _log = _logger.new(
273 |         snapshot_identifier=snapshot_identifier,
274 |         labels=labels,
275 |     )
276 | 
277 |     _log.debug(
278 |         'snapshot.set-labels',
279 |         key_hints=['body.labels']
280 |     )
281 |     return await run_in_executor(
282 |         lambda: backend.set_snapshot_labels(ctx, snapshot_identifier, labels)
283 |     )
284 | 
285 | 
286 | def new_snapshot_name(ctx: Context, rule: Rule) -> str:
287 |     """
288 |     Get a new snapshot name for rule.
289 |     Returns rule name and pendulum.now('utc') formatted according to settings.
290 |     """
291 | 
292 |     time_str = re.sub(
293 |         r'[^-a-z0-9]', '-',
294 |         pendulum.now('utc').format(ctx.config['snapshot_datetime_format']),
295 |         flags=re.IGNORECASE)
296 | 
297 |     # Won't be truncated
298 |     suffix = f'-{time_str}'
299 | 
300 |     # Will be truncated
301 |     name_truncated = rule.name[:63 - len(suffix)]
302 | 
303 |     return f'{name_truncated}{suffix}'
304 | 
305 | 
306 | async def get_snapshot_status(
307 |     ctx: Context,
308 |     backend: Any,
309 |     snapshot_identifier: NewSnapshotIdentifier
310 | ):
311 |     return await run_in_executor(
312 |         lambda: backend.get_snapshot_status(ctx, snapshot_identifier)
313 |     )
314 | 
315 | 
316 | async def get_snapshots(ctx: Context, rulesgen, reload_trigger):
317 |     """Query the existing snapshots from the cloud provider backend(s).
318 | 
319 |     "rules" are all the disk rules we know about, and through it, we know
320 |     the set of backends that are in play, and that need to verified.
321 | 
322 |     If the channel "reload_trigger" contains any value, we
323 |     refresh the list of snapshots. This will then cause the
324 |     next backup to be scheduled.
325 |     """
326 | 
327 |     combined = combine_latest(
328 |         rules=debounce(rulesgen, 4),
329 |         reload=reload_trigger
330 |     )
331 | 
332 |     async for item in combined:
333 |         # Figure out a set of backends that are in use with the rules
334 |         backends = set()
335 |         for rule in item['rules']:
336 |             backends.add(get_backend_for_rule(ctx, rule))
337 | 
338 |         # Load and yield the snapshots for the set of backends.
339 |         yield await load_snapshots(ctx, backends)
340 | 
341 | 
342 | async def load_snapshots(ctx: Context, backends: Set[Any]) -> List[Snapshot]:
343 |     snapshot_label_filters = dict([snapshot_author_label(ctx)])
344 | 
345 |     tasks = map(lambda backend: run_in_executor(
346 |         lambda: backend.load_snapshots(ctx, snapshot_label_filters)
347 |     ), backends)
348 | 
349 |     snapshot_results = await asyncio.gather(*tasks)
350 |     return [snapshot for result in snapshot_results for snapshot in result]
351 | 
352 | 
353 | def determine_next_snapshot(snapshots, rules):
354 |     """
355 |     Given a list of snapshots, and a list of rules, determine the next snapshot
356 |     to be made.
357 | 
358 |     Returns a 2-tuple (rule, target_datetime)
359 |     """
360 |     next_rule = None
361 |     next_timestamp = None
362 |     next_snapshot_times = None
363 | 
364 |     for rule in rules:
365 |         _log = _logger.new(rule=rule)
366 |         snapshot_times = get_snapshot_times_for_rule(snapshots, rule)
367 | 
368 |         # There are no snapshots for this rule; create the first one.
369 |         if not snapshot_times:
370 |             next_rule = rule
371 |             next_timestamp = pendulum.now('utc') + timedelta(seconds=10)
372 |             next_snapshot_times = snapshot_times
373 |             break
374 | 
375 |         target = snapshot_times[0] + rule.deltas[0]
376 |         if not next_timestamp or target < next_timestamp:
377 |             next_rule = rule
378 |             next_timestamp = target
379 |             next_snapshot_times = snapshot_times
380 | 
381 |     if next_rule is not None and next_timestamp is not None:
382 |         _logger.info(
383 |             events.Snapshot.SCHEDULED,
384 |             key_hints=['rule.name', 'target'],
385 |             target=next_timestamp,
386 |             rule=next_rule,
387 |             times=list(map(lambda t: str(t), next_snapshot_times))
388 |         )
389 | 
390 |     return next_rule, next_timestamp
391 | 
392 | 
393 | def get_snapshot_times_for_rule(snapshots: List[Snapshot], rule: Rule):
394 |     # Find all the snapshots that match this rule
395 |     # This returns a <filter> object
396 |     snapshots_for_rule = filter_snapshots_by_rule(snapshots, rule)
397 |     # Rewrite the list to snapshot
398 |     snapshot_times = [item.created_at for item in snapshots_for_rule]
399 |     # Sort by timestamp
400 |     snapshot_times = sorted(snapshot_times, reverse=True)
401 |     return list(snapshot_times)
402 | 
403 | 
404 | def filter_snapshots_by_rule(snapshots: List[Snapshot], rule: Rule) -> Iterable[Snapshot]:
405 |     def match_disk(snapshot: Snapshot):
406 |         return snapshot.disk == rule.disk
407 |     return filter(match_disk, snapshots)
408 | 
409 | 
410 | async def is_snapshot_required(ctx: Context, rule: Rule):
411 |     backend = get_backend_for_rule(ctx, rule)
412 |     all_snapshots = await load_snapshots(ctx, {backend})
413 |     return snapshots_for_rule_are_outdated(rule, all_snapshots)
414 | 
415 | 
416 | def snapshots_for_rule_are_outdated(rule: Rule, existing_snapshots: List[Snapshot]):
417 |     snapshot_times = get_snapshot_times_for_rule(existing_snapshots, rule)
418 | 
419 |     if not snapshot_times:
420 |         return True
421 | 
422 |     next_snapshot_time = snapshot_times[0] + rule.deltas[0]
423 |     return next_snapshot_time < pendulum.now('utc')
424 | 


--------------------------------------------------------------------------------
/k8s_snapshots/backends/google.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import pendulum
  3 | import re
  4 | import requests
  5 | from typing import List, Dict, NamedTuple
  6 | from googleapiclient import discovery
  7 | from oauth2client.service_account import ServiceAccountCredentials
  8 | from oauth2client.client import GoogleCredentials
  9 | import pykube.objects
 10 | import structlog
 11 | from k8s_snapshots.context import Context
 12 | from .abstract import Snapshot, SnapshotStatus, DiskIdentifier, NewSnapshotIdentifier
 13 | from ..errors import SnapshotCreateError, UnsupportedVolume
 14 | 
 15 | 
 16 | _logger = structlog.get_logger(__name__)
 17 | 
 18 | 
 19 | #: The regex that a snapshot name has to match.
 20 | #: Regex provided by the createSnapshot error response.
 21 | GOOGLE_SNAPSHOT_NAME_REGEX = r'^(?:[a-z](?:[-a-z0-9]{0,61}[a-z0-9])?)$'
 22 | 
 23 | # Google Label keys and values must conform to the following restrictions:
 24 | # - Keys and values cannot be longer than 63 characters each.
 25 | # - Keys and values can only contain lowercase letters, numeric characters,
 26 | #   underscores, and dashes. International characters are allowed.
 27 | # - Label keys must start with a lowercase letter and international characters
 28 | #   are allowed.
 29 | # - Label keys cannot be empty.
 30 | # See https://cloud.google.com/compute/docs/labeling-resources for more
 31 | 
 32 | #: The regex that a label key and value has to match, additionally it has to be
 33 | #: lowercase, this is checked with str().islower()
 34 | GOOGLE_LABEL_REGEX = r'^(?:[-\w]{0,63})$'
 35 | 
 36 | 
 37 | def get_project_id(ctx: Context):
 38 |     if not ctx.config['gcloud_project']:
 39 |         response = requests.get(
 40 |             'http://metadata.google.internal/computeMetadata/v1/project/project-id',
 41 |             headers={
 42 |                 'Metadata-Flavor': 'Google'
 43 |             })
 44 |         response.raise_for_status()
 45 |         ctx.config['gcloud_project'] = response.text
 46 | 
 47 |     return ctx.config['gcloud_project']
 48 | 
 49 | 
 50 | # TODO: This is currently not called. When should we do so? Once the Google
 51 | # Cloud backend is loaded for the first time?
 52 | def validate_config(config):
 53 |     """Ensure the config of this backend is correct.
 54 |     """
 55 | 
 56 |     is_valid = True
 57 | 
 58 |     test_datetime = pendulum.now('utc').format(
 59 |         config['snapshot_datetime_format'])
 60 |     test_snapshot_name = f'dummy-snapshot-{test_datetime}'
 61 | 
 62 |     if not re.match(GOOGLE_SNAPSHOT_NAME_REGEX, test_snapshot_name):
 63 |         _logger.error(
 64 |             'config.error',
 65 |             key='snapshot_datetime_format',
 66 |             message='Snapshot datetime format returns invalid string. '
 67 |                     'Note that uppercase characters are forbidden.',
 68 |             test_snapshot_name=test_snapshot_name,
 69 |             regex=GOOGLE_SNAPSHOT_NAME_REGEX
 70 |         )
 71 |         is_valid = False
 72 | 
 73 |     # Configuration keys that are either a Google
 74 |     glabel_key_keys = {'snapshot_author_label'}
 75 |     glabel_value_keys = {'snapshot_author_label_key'}
 76 | 
 77 |     for key in glabel_key_keys | glabel_value_keys:
 78 |         value = config[key]  # type: str
 79 |         re_match = re.match(GOOGLE_LABEL_REGEX, value)
 80 |         is_glabel_key = key in glabel_key_keys
 81 |         is_glabel_valid = (
 82 |             re_match and value.islower() and
 83 |             value[0].isalpha() or not is_glabel_key
 84 |         )
 85 | 
 86 |         if not is_glabel_valid:
 87 |             _logger.error(
 88 |                 'config.error',
 89 |                 message=f'Configuration value is not a valid '
 90 |                         f'Google Label {"Key" if is_glabel_key else "Value"}. '
 91 |                         f'See '
 92 |                         f'https://cloud.google.com/compute/docs/labeling-resources '
 93 |                         f'for more',
 94 |                 key_hints=['value', 'regex'],
 95 |                 key=key,
 96 |                 is_lower=value.islower(),
 97 |                 value=config[key],
 98 |                 regex=GOOGLE_LABEL_REGEX,
 99 |             )
100 |             is_valid = False
101 | 
102 |     return is_valid
103 | 
104 | 
105 | class GoogleDiskIdentifier(NamedTuple):
106 |     name: str
107 |     regional: bool
108 |     zone: str = None
109 |     region: str = None
110 | 
111 | 
112 | def get_disk_identifier(volume: pykube.objects.PersistentVolume) -> GoogleDiskIdentifier:
113 |     gce_disk = volume.obj['spec']['gcePersistentDisk']['pdName']
114 | 
115 |     # How can we know the zone? In theory, the storage class can
116 |     # specify a zone; but if not specified there, K8s can choose a
117 |     # random zone within the master region. So we really can't trust
118 |     # that value anyway.
119 |     # There is a label that gives a failure region, but labels aren't
120 |     # really a trustworthy source for this.
121 |     # Apparently, this is a thing in the Kubernetes source too, see:
122 |     # getDiskByNameUnknownZone in pkg/cloudprovider/providers/gce/gce.go,
123 |     # e.g. https://github.com/jsafrane/kubernetes/blob/2e26019629b5974b9a311a9f07b7eac8c1396875/pkg/cloudprovider/providers/gce/gce.go#L2455
124 |     gce_disk_zone = volume.labels.get(
125 |         'failure-domain.beta.kubernetes.io/zone'
126 |     )
127 | 
128 |     if not gce_disk_zone:
129 |         raise UnsupportedVolume('cannot find the zone of the disk')
130 | 
131 |     gce_disk_region = volume.labels.get(
132 |         'failure-domain.beta.kubernetes.io/region'
133 |     )
134 | 
135 |     if not gce_disk_region:
136 |         raise UnsupportedVolume('cannot find the region of the disk')
137 | 
138 |     if "__" in gce_disk_zone:
139 |         # seems like Google likes to put __ in between zones in the label
140 |         # failure-domain.beta.kubernetes.io/zone when the pv is regional
141 |         return GoogleDiskIdentifier(name=gce_disk, region=gce_disk_region, regional=True)
142 |     else:
143 |         return GoogleDiskIdentifier(name=gce_disk, zone=gce_disk_zone, regional=False)
144 | 
145 | 
146 | def supports_volume(volume: pykube.objects.PersistentVolume):
147 |     return bool(volume.obj['spec'].get('gcePersistentDisk'))
148 | 
149 | 
150 | def parse_timestamp(date_str: str) -> pendulum.Pendulum:
151 |     return pendulum.parse(date_str).in_timezone('utc')
152 | 
153 | 
154 | def validate_disk_identifier(disk_id: Dict) -> DiskIdentifier:
155 |     """Should take the user-specified dictionary, and convert it to
156 |     it's own, local `DiskIdentifier`. If the disk_id is not valid,
157 |     it should raise a `ValueError` with a suitable error message.
158 |     """
159 | 
160 |     try:
161 |         return GoogleDiskIdentifier(
162 |             zone=disk_id['zone'],
163 |             name=disk_id['name']
164 |         )
165 |     except:
166 |         raise ValueError(disk_id)
167 | 
168 | 
169 | def snapshot_list_filter_expr(label_filters: Dict[str, str]) -> str:
170 |     key = list(label_filters.keys())[0]
171 |     value = label_filters[key]
172 |     return f'labels.{key} eq {value}'
173 | 
174 | 
175 | def load_snapshots(ctx, label_filters: Dict[str, str]) -> List[Snapshot]:
176 |     """
177 |     Return the existing snapshots.
178 |     """
179 |     snapshots = get_gcloud(ctx).snapshots()
180 |     request = snapshots.list(
181 |         project=get_project_id(ctx),
182 |         filter=snapshot_list_filter_expr(label_filters),
183 |         maxResults=500,
184 |     )
185 | 
186 |     loaded_snapshots = []
187 | 
188 |     while request is not None:
189 |         resp = request.execute()
190 |         for item in resp.get('items', []):
191 |             # We got to parse out the disk zone and name from the source disk.
192 |             # It's an url that ends with '/zones/{zone}/disks/{name}'/
193 |             sourceDiskList = item['sourceDisk'].split('/')
194 | 
195 |             disk = sourceDiskList[-1]
196 | 
197 |             if "regions" in sourceDiskList:
198 |                 region = sourceDiskList[8]
199 |                 loaded_snapshots.append(Snapshot(
200 |                     name=item['name'],
201 |                     created_at=parse_timestamp(item['creationTimestamp']),
202 |                     disk=GoogleDiskIdentifier(name=disk, region=region, regional=True)
203 |                 ))
204 |             else:
205 |                 zone = sourceDiskList[8]
206 |                 loaded_snapshots.append(Snapshot(
207 |                     name=item['name'],
208 |                     created_at=parse_timestamp(item['creationTimestamp']),
209 |                     disk=GoogleDiskIdentifier(name=disk, zone=zone, regional=False)
210 |                 ))
211 | 
212 |         request = snapshots.list_next(request, resp)
213 | 
214 |     return loaded_snapshots
215 | 
216 | 
217 | def create_snapshot(
218 |     ctx: Context,
219 |     disk: GoogleDiskIdentifier,
220 |     snapshot_name: str,
221 |     snapshot_description: str
222 | ) -> NewSnapshotIdentifier:
223 |     request_body = {
224 |         'name': snapshot_name,
225 |         'description': snapshot_description
226 |     }
227 | 
228 |     gcloud = get_gcloud(ctx)
229 | 
230 |     # Returns a ZoneOperation: {kind: 'compute#operation',
231 |     # operationType: 'createSnapshot', ...}.
232 |     # Google's documentation is confusing regarding this, since there's two
233 |     # tables of payload parameter descriptions on the page, one of them
234 |     # describes the input parameters, but contains output-only parameters,
235 |     # the correct table can be found at
236 |     # https://cloud.google.com/compute/docs/reference/latest/disks/createSnapshot#response
237 |     if disk.regional:
238 |         operation = gcloud.regionDisks().createSnapshot(
239 |             disk=disk.name,
240 |             project=get_project_id(ctx),
241 |             region=disk.region,
242 |             body=request_body
243 |         ).execute()
244 |         return {
245 |             'snapshot_name': snapshot_name,
246 |             'region': disk.region,
247 |             'operation_name': operation['name']
248 |         }
249 | 
250 |     else:
251 |         operation = gcloud.disks().createSnapshot(
252 |             disk=disk.name,
253 |             project=get_project_id(ctx),
254 |             zone=disk.zone,
255 |             body=request_body
256 |         ).execute()
257 |         return {
258 |             'snapshot_name': snapshot_name,
259 |             'zone': disk.zone,
260 |             'operation_name': operation['name']
261 |         }
262 | 
263 | def get_snapshot_status(
264 |     ctx: Context,
265 |     snapshot_identifier: NewSnapshotIdentifier
266 | ) -> SnapshotStatus:
267 |     """In Google Cloud, the createSnapshot operation returns a ZoneOperation
268 |     object which goes from PENDING, to RUNNING, to DONE.
269 |     The snapshot object itself can be CREATING, DELETING, FAILED, READY,
270 |     or UPLOADING.
271 | 
272 |     We check both states to make sure the snapshot was created.
273 |     """
274 | 
275 |     _log = _logger.new(
276 |         snapshot_identifier=snapshot_identifier,
277 |     )
278 | 
279 |     gcloud = get_gcloud(ctx)
280 | 
281 |     # First, check the operation state
282 | 
283 |     if "region" in snapshot_identifier:
284 |         operation = gcloud.regionOperations().get(
285 |             project=get_project_id(ctx),
286 |             region=snapshot_identifier['region'],
287 |             operation=snapshot_identifier['operation_name']
288 |         ).execute()
289 |     else:
290 |         operation = gcloud.zoneOperations().get(
291 |             project=get_project_id(ctx),
292 |             zone=snapshot_identifier['zone'],
293 |             operation=snapshot_identifier['operation_name']
294 |         ).execute()
295 | 
296 |     if not operation['status'] == 'DONE':
297 |         _log.debug('google.status.operation_not_complete',
298 |                    status=operation['status'])
299 |         return SnapshotStatus.PENDING
300 | 
301 |     # To be sure, check the state of the snapshot itself
302 |     snapshot = gcloud.snapshots().get(
303 |         snapshot=snapshot_identifier['snapshot_name'],
304 |         project=get_project_id(ctx)
305 |     ).execute()
306 | 
307 |     status = snapshot['status']
308 |     if status == 'FAILED':
309 |         _log.debug('google.status.failed',
310 |                    status=status)
311 |         raise SnapshotCreateError(status)
312 |     elif status != 'READY':
313 |         _log.debug('google.status.not_ready',
314 |                    status=status)
315 |         return SnapshotStatus.PENDING
316 | 
317 |     return SnapshotStatus.COMPLETE
318 | 
319 | 
320 | def set_snapshot_labels(
321 |     ctx: Context,
322 |     snapshot_identifier: NewSnapshotIdentifier,
323 |     labels: Dict
324 | ):
325 |     gcloud = get_gcloud(ctx)
326 | 
327 |     snapshot = gcloud.snapshots().get(
328 |         snapshot=snapshot_identifier['snapshot_name'],
329 |         project=get_project_id(ctx)
330 |     ).execute()
331 | 
332 |     body = {
333 |         'labels': labels,
334 |         'labelFingerprint': snapshot['labelFingerprint'],
335 |     }
336 |     return gcloud.snapshots().setLabels(
337 |         resource=snapshot_identifier['snapshot_name'],
338 |         project=get_project_id(ctx),
339 |         body=body,
340 |     ).execute()
341 | 
342 | 
343 | def delete_snapshot(
344 |     ctx: Context,
345 |     snapshot: Snapshot
346 | ):
347 |     gcloud = get_gcloud(ctx)
348 |     return gcloud.snapshots().delete(
349 |         snapshot=snapshot.name,
350 |         project=get_project_id(ctx)
351 |     ).execute()
352 | 
353 | 
354 | def get_gcloud(ctx, version: str= 'v1'):
355 |     """
356 |     Get a configured Google Compute API Client instance.
357 | 
358 |     Note that the Google API Client is not threadsafe. Cache the instance locally
359 |     if you want to avoid OAuth overhead between calls.
360 | 
361 |     Parameters
362 |     ----------
363 |     version
364 |         Compute API version
365 |     """
366 |     SCOPES = 'https://www.googleapis.com/auth/compute'
367 |     credentials = None
368 | 
369 |     if ctx.config.get('gcloud_credentials_file'):
370 |         credentials = ServiceAccountCredentials.from_json_keyfile_name(
371 |             ctx.config.get('gcloud_credentials_file'),
372 |             scopes=SCOPES)
373 | 
374 |     if ctx.config.get('google_application_credentials'):
375 |         keyfile = json.loads(ctx.config.get('google_application_credentials'))
376 |         credentials = ServiceAccountCredentials.from_json_keyfile_dict(
377 |             keyfile, scopes=SCOPES)
378 | 
379 |     if not credentials:
380 |         credentials = GoogleCredentials.get_application_default()
381 | 
382 |     if not credentials:
383 |         raise RuntimeError("Auth for Google Cloud was not configured")
384 | 
385 |     compute = discovery.build(
386 |         'compute',
387 |         version,
388 |         credentials=credentials,
389 |         # https://github.com/google/google-api-python-client/issues/299#issuecomment-268915510
390 |         cache_discovery=False
391 |     )
392 |     return compute
393 | 


--------------------------------------------------------------------------------
/k8s_snapshots/core.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | TODO: prevent a backup loop: A failsafe mechanism to make sure we
  4 | don't create more than x snapshots per disk; in case something
  5 | is wrong with the code that loads the exsting snapshots from GCloud.
  6 | """
  7 | import asyncio
  8 | from typing import List, AsyncIterable, Optional, Tuple, Dict
  9 | 
 10 | import pendulum
 11 | import pykube
 12 | import structlog
 13 | from aiochannel import Channel, ChannelEmpty
 14 | from aiostream import stream
 15 | 
 16 | from k8s_snapshots import events
 17 | from k8s_snapshots.backends import get_backend
 18 | from k8s_snapshots.asyncutils import combine_latest, StreamReader
 19 | from k8s_snapshots.context import Context
 20 | from k8s_snapshots.errors import (
 21 |     AnnotationNotFound,
 22 |     AnnotationError,
 23 |     UnsupportedVolume,
 24 |     VolumeNotFound,
 25 |     ConfigurationError,
 26 |     DeltasParseError,
 27 |     RuleDependsOn)
 28 | from k8s_snapshots.kube import (
 29 |     watch_resources,
 30 |     get_resource_or_none,
 31 |     SnapshotRule,
 32 |     _WatchEvent)
 33 | from k8s_snapshots.rule import (
 34 |     rule_from_pv, Rule, parse_deltas, rule_name_from_k8s_source, get_deltas)
 35 | from k8s_snapshots.snapshot import (
 36 |     make_backup,
 37 |     get_snapshots,
 38 |     determine_next_snapshot,
 39 |     is_snapshot_required
 40 | )
 41 | 
 42 | _logger = structlog.get_logger()
 43 | 
 44 | 
 45 | async def volume_from_pvc(
 46 |         ctx: Context,
 47 |         resource: pykube.objects.PersistentVolumeClaim
 48 | ) -> pykube.objects.PersistentVolume:
 49 |     """Given a `PersistentVolumeClaim`, return the `PersistentVolume`
 50 |     it is bound to.
 51 |     """
 52 |     _log = _logger.new(resource=resource)
 53 | 
 54 |     pvc = resource
 55 | 
 56 |     try:
 57 |         volume_name = resource.obj['spec']['volumeName']
 58 |     except KeyError as exc:
 59 |         raise VolumeNotFound(
 60 |             'Could not get volume name from volume claim',
 61 |             volume_claim=pvc.obj
 62 |         ) from exc
 63 | 
 64 |     _log = _log.bind(
 65 |         volume_name=volume_name
 66 |     )
 67 | 
 68 |     _log.debug(
 69 |         'Looking for volume',
 70 |         key_hints=['volume_name']
 71 |     )
 72 | 
 73 |     volume = await get_resource_or_none(
 74 |         ctx.kube_client,
 75 |         pykube.objects.PersistentVolume,
 76 |         volume_name,
 77 |     )
 78 |     if volume is None:
 79 |         raise VolumeNotFound(
 80 |             f'Could not find volume with name {volume_name!r}',
 81 |             volume_claim=pvc.obj,
 82 |         )
 83 |     return volume
 84 | 
 85 | 
 86 | async def rule_from_snapshotrule(
 87 |     ctx: Context,
 88 |     resource: SnapshotRule
 89 | ) -> Optional[Rule]:
 90 |     """This tries to build a rule within a `SnapshotRule` resource -
 91 |     the resource that we custom designed for this purpose.
 92 | 
 93 |     This is invoked whenever Kubernetes tells us that such a resource
 94 |     was created, deleted, or updated.
 95 | 
 96 |     There are two separate ways a `SnapshotRule` can be used:
 97 | 
 98 |     - A `SnapshotRule` resource can refer to a specific Cloud disk
 99 |       id to be snapshotted, e.g. 'example-disk' on 'gcloud'. This
100 |       skips Kubernetes entirely.
101 | 
102 |     - A `SnapshotRule` resource can refer to a `PersistentVolumeClaim`.
103 |       The disk this claim is bound to is the one we will snapshot.
104 |       Rather than defining the snapshot interval etc. as annotations
105 |       of the claim, they are defined here, in a separate resource.
106 |     """
107 |     _log = _logger.new(resource=resource, rule=resource.obj)
108 | 
109 |     spec = resource.obj.get('spec', {})
110 | 
111 |     # Validate the deltas
112 |     try:
113 |         deltas_str = resource.obj.get('spec', {}).get('deltas')
114 |         try:
115 |             deltas = parse_deltas(deltas_str)
116 |         except DeltasParseError as exc:
117 |             raise AnnotationError(
118 |                 'Invalid delta string',
119 |                 deltas_str=deltas_str
120 |             ) from exc
121 | 
122 |         if deltas is None or not deltas:
123 |             raise AnnotationError(
124 |                 'parse_deltas returned invalid deltas',
125 |                 deltas_str=deltas_str,
126 |                 deltas=deltas,
127 |             )
128 |     except AnnotationError:
129 |         _log.exception(
130 |             'rule.invalid',
131 |             key_hints=['rule.metadata.name'],
132 |         )
133 |         return
134 | 
135 |     # Refers to a disk from a cloud provider
136 |     if spec.get('disk'):
137 |         # Validate the backend
138 |         backend_name = spec.get('backend')
139 |         try:
140 |             backend = get_backend(backend_name)
141 |         except ConfigurationError as e:
142 |             _log.exception(
143 |                 'rule.invalid',
144 |                 message=e.message,
145 |                 backend=backend_name
146 |             )
147 |             return
148 | 
149 |         # Validate the disk identifier
150 |         disk = resource.obj.get('spec', {}).get('disk')
151 |         try:
152 |             disk = backend.validate_disk_identifier(disk)
153 |         except ValueError:
154 |             _log.exception(
155 |                 'rule.invalid',
156 |                 key_hints=['rule.metadata.name'],
157 |             )
158 |             return
159 | 
160 |         rule = Rule(
161 |             name=rule_name_from_k8s_source(resource),
162 |             deltas=deltas,
163 |             backend=backend_name,
164 |             disk=disk
165 |         )
166 |         return rule
167 | 
168 |     # Refers to a volume claim
169 |     if spec.get('persistentVolumeClaim'):
170 | 
171 |         # Find the claim
172 |         volume_claim = await get_resource_or_none(
173 |             ctx.kube_client,
174 |             pykube.objects.PersistentVolumeClaim,
175 |             spec.get('persistentVolumeClaim'),
176 |             namespace=resource.metadata['namespace']
177 |         )
178 | 
179 |         if not volume_claim:
180 |             _log.warning(
181 |                 events.Rule.PENDING,
182 |                 reason='Volume claim does not exist',
183 |                 key_hints=['rule.metadata.name'],
184 |             )
185 |             raise RuleDependsOn(
186 |                 'The volume claim targeted by this SnapshotRule does not exist yet',
187 |                 kind='PersistentVolumeClaim',
188 |                 namespace=resource.metadata['namespace'],
189 |                 name=spec.get('persistentVolumeClaim')
190 |             )
191 | 
192 |         # Find the volume
193 |         try:
194 |             volume = await volume_from_pvc(ctx, volume_claim)
195 |         except VolumeNotFound:
196 |             _log.warning(
197 |                 events.Rule.PENDING,
198 |                 reason='Volume claim is not bound',
199 |                 key_hints=['rule.metadata.name'],
200 |             )
201 |             raise RuleDependsOn(
202 |                 'The volume claim targeted by this SnapshotRule is not bound yet',
203 |                 kind='PersistentVolumeClaim',
204 |                 namespace=resource.metadata['namespace'],
205 |                 name=spec.get('persistentVolumeClaim')
206 |             )
207 | 
208 |         return await rule_from_pv(ctx, volume, deltas, source=resource)
209 | 
210 | 
211 | async def rule_from_persistent_volume(
212 |     ctx: Context,
213 |     volume: pykube.objects.PersistentVolume
214 | ) -> Optional[Rule]:
215 |     _log = _logger.new(resource=volume)
216 | 
217 |     volume_name = volume.name
218 |     _log = _log.bind(
219 |         volume_name=volume_name,
220 |         volume=volume.obj,
221 |     )
222 | 
223 |     try:
224 |         _log.debug('Checking volume for deltas')
225 |         deltas = get_deltas(volume.annotations,
226 |                             ctx.config.get('deltas_annotation_key'))
227 |     except AnnotationNotFound:
228 |         _log.info(
229 |             events.Annotation.NOT_FOUND,
230 |             key_hints=['volume.metadata.name']
231 |         )
232 |         return
233 |     except AnnotationError:
234 |         _log.exception(
235 |             events.Annotation.ERROR,
236 |             key_hints=['volume.metadata.name'],
237 |         )
238 |         return
239 | 
240 |     try:
241 |         return await rule_from_pv(ctx, volume, deltas, source=volume)
242 |     except UnsupportedVolume as exc:
243 |         _log.info(
244 |             events.Volume.UNSUPPORTED,
245 |             key_hints=['volume.metadata.name'],
246 |             exc_info=exc,
247 |         )
248 | 
249 | 
250 | async def rule_from_persistent_volume_claim(
251 |     ctx: Context,
252 |     volume_claim: pykube.objects.PersistentVolumeClaim
253 | ) -> Optional[Rule]:
254 |     """
255 |     If a `PersistentVolumeClaim` is annotated, we create a rule
256 |     based on those annotations, for the disk that the claim is bound to.
257 | 
258 |     If the claim is currently unbound, we return `None`. We do not have
259 |     have to worry about being notified of any future binding, since
260 |     Kubernetes will update the `PersistentVolumeClaim` resource when
261 |     that happens, so we will see that update.
262 |     """
263 |     _log = _logger.new(resource=volume_claim, volume_claim=volume_claim.obj)
264 | 
265 |     try:
266 |         _log.debug('Checking volume claim for deltas')
267 |         deltas = get_deltas(
268 |             volume_claim.annotations, ctx.config.get('deltas_annotation_key'))
269 |     except AnnotationNotFound as exc:
270 |         _log.exception(
271 |             events.Annotation.NOT_FOUND,
272 |             key_hints=['volume_claim.metadata.name'],
273 |         )
274 |         return
275 |     except AnnotationError:
276 |         _log.exception(
277 |             events.Annotation.ERROR,
278 |             key_hints=['volume_claim.metadata.name'],
279 |         )
280 |         return
281 | 
282 |     try:
283 |         volume = await volume_from_pvc(ctx, volume_claim)
284 |     except VolumeNotFound:
285 |         _log.warning(
286 |             events.Rule.PENDING,
287 |             reason='Volume claim is not bound',
288 |             key_hints=['volume_claim.metadata.name'],
289 |         )
290 |         return
291 | 
292 |     return await rule_from_pv(
293 |         ctx,
294 |         volume,
295 |         deltas=deltas,
296 |         source=volume_claim
297 |     )
298 | 
299 | 
300 | async def rules_from_kubernetes(ctx) -> AsyncIterable[List[Rule]]:
301 |     """This generator continuously runs, watching Kubernetes for
302 |     certain resources, consuming changes, and determining which
303 |     snapshot rules have been defined.
304 | 
305 |     Every value it returns is a list of `Rule` objects, a complete
306 |     set of snapshot rules defined at this point in time. Every set
307 |     of rule objects replaces the previous one.
308 |     """
309 | 
310 |     # These are rules that we are ready to "run".
311 |     rules = {}
312 | 
313 |     # These are resources that we know we have to recheck, because
314 |     # they will become rules pending a resource creation. For example:
315 |     # A `SnapshotRule` resource points to volume claim. However, this
316 |     # volume claim is not yet bound. Once Kubernetes creates the volume,
317 |     # we will notify us about creating a `PersistentVolume` and updating
318 |     # a `PersistentVolumeClaim`. It will not, however, send us an
319 |     # update for the `SnapshotRule` - where the rule is actually
320 |     # defined. We thus have to link the rule to the volume.
321 |     pending_rules: Dict[Tuple, pykube.objects.APIObject] = {}
322 | 
323 |     _logger.debug('volume-events.watch')
324 | 
325 |     merged_stream = stream.merge(
326 |         watch_resources(ctx, pykube.objects.PersistentVolume, delay=0),
327 |         watch_resources(ctx, pykube.objects.PersistentVolumeClaim, delay=2),
328 |         watch_resources(ctx, SnapshotRule, delay=3, allow_missing=True)
329 |     )
330 | 
331 |     iterable: AsyncIterable[_WatchEvent] = merged_stream.stream()
332 |     async with iterable as merged_events:
333 |         async for event in merged_events:
334 | 
335 |             _log = _logger.bind(
336 |                 event_type=event.type,
337 |                 event_object=event.object.obj,
338 |             )
339 |             _log.info(
340 |                 events.VolumeEvent.RECEIVED,
341 |                 key_hints=[
342 |                     'event_type',
343 |                     'event_object.metadata.name',
344 |                 ],
345 |             )
346 | 
347 |             # This is how we uniquely identify the rule. This is important
348 |             # such that when an object is deleted, we delete the correct
349 |             # rule.
350 |             key_by = (
351 |                 event.object.kind,
352 |                 event.object.namespace,
353 |                 event.object.name
354 |             )
355 | 
356 |             events_to_process = [
357 |                 (event.type, key_by, event.object)
358 |             ]
359 | 
360 |             # Is there some other object that was depending on *this*
361 |             # object?
362 |             if key_by in pending_rules:
363 |                 depending_object_key, depending_object = pending_rules.pop(key_by)
364 |                 if event.type != 'DELETED':
365 |                     events_to_process.append(('MODIFIED', depending_object_key, depending_object))
366 | 
367 |             for (event_type, rule_key, resource) in events_to_process:
368 | 
369 |                 # TODO: there is probably a bug here, where for rule deletion
370 |                 # we should not have to first successfully build the rule; the key
371 |                 # is enough to delete it. Same with a modification that causes
372 |                 # the rule to break; we should remove it until fixed.
373 |                 try:
374 |                     if isinstance(resource, SnapshotRule):
375 |                         rule = await rule_from_snapshotrule(ctx, resource)
376 |                     elif isinstance(resource, pykube.objects.PersistentVolumeClaim):
377 |                         rule = await rule_from_persistent_volume_claim(ctx, resource)
378 |                     elif isinstance(resource, pykube.objects.PersistentVolume):
379 |                         rule = await rule_from_persistent_volume(ctx, resource)
380 |                     else:
381 |                         raise RuntimeError(f'{resource} is not supported.')
382 | 
383 |                 except RuleDependsOn as exc:
384 |                     # We have to remember this so that when we get an
385 |                     # update for the dependency that we lack here, we
386 |                     # can process this resource once more.
387 |                     pending_rules[(
388 |                         exc.data['kind'],
389 |                         exc.data['namespace'],
390 |                         exc.data['name'],
391 |                     )] = (rule_key, resource)
392 |                     continue
393 | 
394 |                 if not rule:
395 |                     continue
396 | 
397 |                 _log = _log.bind(
398 |                     rule=rule
399 |                 )
400 | 
401 |                 if event_type == 'ADDED' or event_type == 'MODIFIED':
402 |                     if rule:
403 |                         if event_type == 'ADDED' or rule_key not in rules:
404 |                             _log.info(
405 |                                 events.Rule.ADDED,
406 |                                 key_hints=['rule.name']
407 |                             )
408 |                         else:
409 |                             _log.info(
410 |                                 events.Rule.UPDATED,
411 |                                 key_hints=['rule.name']
412 |                             )
413 |                         rules[rule_key] = rule
414 |                     else:
415 |                         if rule_key in rules:
416 |                             _log.info(
417 |                                 events.Rule.REMOVED,
418 |                                 key_hints=['volume_name']
419 |                             )
420 |                             rules.pop(rule_key)
421 | 
422 |                 elif event_type == 'DELETED':
423 |                     if rule_key in rules:
424 |                         _log.info(
425 |                             events.Rule.REMOVED,
426 |                             key_hints=['volume_name']
427 |                         )
428 |                         rules.pop(rule_key)
429 |                 else:
430 |                     _log.warning('Unhandled event')
431 | 
432 |             # We usually have duplicate disks within in `rules`,
433 |             # which is indexed by resource kind. One reason is we
434 |             # watching both PVCs and PVs, and a PVC/PV pair resolve
435 |             # to the same disk. It is also possible that custom rules
436 |             # the user defined contain duplicates. Let's make sure
437 |             # we only have one rule for every disk. Note that which
438 |             # one we pick is undefined.
439 |             #
440 |             # In the (internal) case of PV/PVC pairs it does't matter,
441 |             # since our code is written thus: The rule always references
442 |             # the volume, and we always check the volume, then the claim
443 |             # for deltas. The behaviour for this case is well-defined.
444 |             unique_rules = {rule.disk: rule for rule in rules.values()}.values()
445 |             # TODO: Log in a different place, in a debounced way
446 |             #_logger.info('sync-get-rules.yield', rule_count=len(unique_rules))
447 |             yield list(unique_rules)
448 | 
449 |         _logger.debug('sync-get-rules.done')
450 | 
451 | 
452 | async def get_rules(ctx):
453 |     _log = _logger.new()
454 | 
455 |     async for rules in rules_from_kubernetes(ctx):
456 |         _log.debug('get-rules.rules.updated', rules=rules)
457 |         yield rules
458 | 
459 |     _log.debug('get-rules.done')
460 | 
461 | 
462 | async def watch_schedule(ctx, trigger, *, loop=None):
463 |     """Continually yields the next backup to be created.
464 | 
465 |     It watches two input sources: the rules as defined by
466 |     Kubernetes resources, and the existing snapshots, as returned
467 |     from Google Cloud. If either of them change, a new backup
468 |     is scheduled.
469 |     """
470 |     loop = loop or asyncio.get_event_loop()
471 |     _log = _logger.new()
472 | 
473 | 
474 |     rules_reader = StreamReader(get_rules(ctx))
475 |     snapgen = get_snapshots(ctx, rules_reader.iter(), trigger)
476 | 
477 |     _log.debug('watch_schedule.start')
478 | 
479 |     rules = None
480 | 
481 |     heartbeat_interval_seconds = ctx.config.get(
482 |         'schedule_heartbeat_interval_seconds'
483 |     )
484 | 
485 |     async def heartbeat():
486 |         _logger.info(
487 |             events.Rule.HEARTBEAT,
488 |             rules=rules,
489 |         )
490 | 
491 |         loop.call_later(
492 |             heartbeat_interval_seconds,
493 |             asyncio.ensure_future,
494 |             heartbeat()
495 |         )
496 | 
497 |     if heartbeat_interval_seconds:
498 |         asyncio.ensure_future(heartbeat())
499 | 
500 |     combined = combine_latest(
501 |         rules=rules_reader.iter(),
502 |         snapshots=snapgen,
503 |         defaults={'snapshots': None, 'rules': None}
504 |     )
505 | 
506 |     async for item in combined:
507 |         rules = item.get('rules')
508 |         snapshots = item.get('snapshots')
509 | 
510 |         # Never schedule before we have data from both rules and snapshots
511 |         if rules is None or snapshots is None:
512 |             _log.debug(
513 |                 'watch_schedule.wait-for-both',
514 |             )
515 |             continue
516 | 
517 |         yield determine_next_snapshot(snapshots, rules)
518 | 
519 | 
520 | async def scheduler(ctx, scheduling_chan, snapshot_reload_trigger):
521 |     """The "when to make a backup schedule" depends on the backup delta
522 |     rules as defined in Kubernetes volume resources, and the existing
523 |     snapshots.
524 | 
525 |     This simply observes a stream of 'next planned backup' events and
526 |     sends then to the channel given. Note that this scheduler
527 |     doesn't plan multiple backups in advance. Only ever a single
528 |     next backup is scheduled.
529 |     """
530 |     _log = _logger.new()
531 |     _log.debug('scheduler.start')
532 | 
533 |     async for schedule in watch_schedule(ctx, snapshot_reload_trigger):
534 |         _log.debug('scheduler.schedule', schedule=schedule)
535 |         await scheduling_chan.put(schedule)
536 | 
537 | 
538 | async def backuper(ctx, scheduling_chan, snapshot_reload_trigger):
539 |     """Will take tasks from the given queue, then execute the backup.
540 |     """
541 |     _log = _logger.new()
542 |     _log.debug('backuper.start')
543 | 
544 |     current_target_time = current_target_rule = None
545 |     while True:
546 |         await asyncio.sleep(0.1)
547 | 
548 |         try:
549 |             current_target_rule, current_target_time = scheduling_chan.get_nowait()
550 | 
551 |             # Log a message
552 |             if not current_target_time:
553 |                 _log.debug('backuper.no-target')
554 |             else:
555 |                 _log.debug(
556 |                     'backuper.next-backup',
557 |                     key_hints=[
558 |                         'rule.name',
559 |                         'target_time',
560 |                     ],
561 |                     rule=current_target_rule,
562 |                     target_time=current_target_time,
563 |                     diff=current_target_time.diff(),
564 |                 )
565 |         except ChannelEmpty:
566 |             pass
567 | 
568 |         if not current_target_time:
569 |             continue
570 | 
571 |         if pendulum.now('utc') > current_target_time:
572 |             try:
573 |                 if await is_snapshot_required(ctx, current_target_rule):
574 |                     await make_backup(ctx, current_target_rule)
575 |                     await snapshot_reload_trigger.put(True)
576 |                 else:
577 |                     _log.info('backuper.scheduled_backup_no_longer_required',
578 |                               rule=current_target_rule,
579 |                               target_time=current_target_time)
580 |             finally:
581 |                 current_target_time = current_target_rule = None
582 | 
583 | 
584 | async def daemon(config, *, loop=None):
585 |     """Main app; it runs two tasks; one schedules backups, the other
586 |     one executes the.
587 |     """
588 |     loop = loop or asyncio.get_event_loop()
589 | 
590 |     ctx = Context(config)
591 | 
592 |     # Using this channel, we can trigger a refresh of the list of
593 |     # disk snapshots in the Google Cloud.
594 |     snapshot_reload_trigger = Channel()
595 | 
596 |     # The backup task consumes this channel for the next backup task.
597 |     scheduling_chan = Channel()
598 | 
599 |     schedule_task = asyncio.ensure_future(
600 |         scheduler(ctx, scheduling_chan, snapshot_reload_trigger))
601 |     backup_task = asyncio.ensure_future(
602 |         backuper(ctx, scheduling_chan, snapshot_reload_trigger))
603 | 
604 |     tasks = [schedule_task, backup_task]
605 | 
606 |     _logger.debug('Gathering tasks', tasks=tasks)
607 | 
608 |     try:
609 |         await asyncio.gather(*tasks)
610 |     except asyncio.CancelledError:
611 |         _logger.exception(
612 |             'Received CancelledError',
613 |             tasks=tasks
614 |         )
615 | 
616 |         for task in tasks:
617 |             task.cancel()
618 |             _logger.debug('daemon cancelled task', task=task)
619 | 
620 |         while True:
621 |             finished, pending = await asyncio.wait(
622 |                 tasks,
623 |                 return_when=asyncio.FIRST_COMPLETED)
624 | 
625 |             _logger.debug(
626 |                 'task completed',
627 |                 finished=finished,
628 |                 pending=pending)
629 | 
630 |             if not pending:
631 |                 _logger.debug('all tasks done')
632 |                 raise
633 | 


--------------------------------------------------------------------------------