├── tests ├── src │ ├── __init__.py │ ├── common │ │ ├── __init__.py │ │ ├── util.py │ │ └── kubeclient.py │ ├── test_backuploc.py │ └── test_backup.py ├── pytest.ini ├── config │ ├── .gitignore │ └── sample_testenv.json ├── runtests └── conftest.py ├── docs ├── devguide │ ├── .gitignore │ ├── source │ │ ├── _static │ │ │ ├── css │ │ │ │ └── custom.css │ │ │ └── images │ │ │ │ ├── logo-2.5-vert-small.png │ │ │ │ ├── logo-3.0-vert-med.png │ │ │ │ └── logo-2.0-vert-xsmall.png │ │ ├── index.rst │ │ ├── local_builds.rst │ │ ├── intro.rst │ │ ├── ci_builds.rst │ │ ├── monitoring.rst │ │ ├── impl.rst │ │ ├── conf.py │ │ └── automated_tests.rst │ └── Makefile ├── userguide │ ├── .gitignore │ ├── source │ │ ├── _static │ │ │ ├── css │ │ │ │ └── custom.css │ │ │ └── images │ │ │ │ ├── logo-3.0-vert-med.png │ │ │ │ ├── logo-2.0-vert-xsmall.png │ │ │ │ └── logo-2.5-vert-small.png │ │ ├── introduction.rst │ │ ├── index.rst │ │ ├── roadmap.rst │ │ ├── uninstall.rst │ │ ├── pause_resume.rst │ │ ├── overview.rst │ │ ├── conf.py │ │ ├── installation.rst │ │ ├── troubleshooting.rst │ │ ├── configuration.rst │ │ ├── backup.rst │ │ ├── monitoring.rst │ │ └── restore.rst │ ├── Makefile │ └── make.bat └── README.md ├── kubedr ├── config │ ├── prometheus │ │ ├── kustomization.yaml │ │ └── monitor.yaml │ ├── certmanager │ │ ├── kustomization.yaml │ │ ├── kustomizeconfig.yaml │ │ └── certificate.yaml │ ├── webhook │ │ ├── kustomization.yaml │ │ ├── service.yaml │ │ ├── kustomizeconfig.yaml │ │ └── manifests.yaml │ ├── manager │ │ ├── kustomization.yaml │ │ └── manager.yaml │ ├── samples │ │ ├── kubedr_v1alpha1_backuplocation.yaml │ │ ├── kubedr_v1alpha1_metadatarestore.yaml │ │ ├── kubedr_v1alpha1_metadatabackuppolicy.yaml │ │ └── kubedr_v1alpha1_metadatabackuprecord.yaml │ ├── rbac │ │ ├── role_binding.yaml │ │ ├── auth_proxy_role_binding.yaml │ │ ├── leader_election_role_binding.yaml │ │ ├── auth_proxy_service.yaml │ │ ├── auth_proxy_role.yaml │ │ ├── kustomization.yaml │ │ └── leader_election_role.yaml │ ├── crd │ │ ├── patches │ │ │ ├── cainjection_in_backuplocations.yaml │ │ │ ├── cainjection_in_metadatarestores.yaml │ │ │ ├── cainjection_in_metadatabackuprecords.yaml │ │ │ ├── cainjection_in_metadatabackuppolicies.yaml │ │ │ ├── webhook_in_backuplocations.yaml │ │ │ ├── webhook_in_metadatarestores.yaml │ │ │ ├── webhook_in_metadatabackuppolicies.yaml │ │ │ └── webhook_in_metadatabackuprecords.yaml │ │ ├── kustomizeconfig.yaml │ │ ├── kustomization.yaml │ │ └── bases │ │ │ ├── kubedr.catalogicsoftware.com_metadatabackuprecords.yaml │ │ │ ├── kubedr.catalogicsoftware.com_metadatarestores.yaml │ │ │ ├── kubedr.catalogicsoftware.com_backuplocations.yaml │ │ │ └── kubedr.catalogicsoftware.com_metadatabackuppolicies.yaml │ └── default │ │ ├── manager_prometheus_metrics_patch.yaml │ │ ├── manager_webhook_patch.yaml │ │ ├── webhookcainjection_patch.yaml │ │ ├── manager_auth_proxy_patch.yaml │ │ └── kustomization.yaml ├── PROJECT ├── go.mod ├── .gitignore ├── hack │ └── boilerplate.go.txt ├── Dockerfile ├── api │ └── v1alpha1 │ │ ├── groupversion_info.go │ │ ├── metadatabackuprecord_types.go │ │ ├── backuplocation_types.go │ │ ├── metadatarestore_types.go │ │ ├── backuplocation_webhook.go │ │ ├── metadatabackuppolicy_types.go │ │ └── metadatabackuppolicy_webhook.go ├── Makefile ├── controllers │ ├── suite_test.go │ ├── metadatabackuprecord_controller.go │ ├── metadatarestore_controller.go │ └── backuplocation_controller.go ├── main.go └── metrics │ └── metrics.go ├── logos ├── logo-1.5-vert-xxsmall.png ├── logo-2.0-vert-xsmall.png └── logo-2.5-horiz-small.png ├── .gitignore ├── .yamllint ├── uninstall.sh ├── Makefile ├── README.md └── LICENSE /tests/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/src/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/devguide/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | -------------------------------------------------------------------------------- /docs/userguide/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | 2 | [pytest] 3 | junit_family=legacy 4 | 5 | -------------------------------------------------------------------------------- /kubedr/config/prometheus/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - monitor.yaml 3 | -------------------------------------------------------------------------------- /logos/logo-1.5-vert-xxsmall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/catalogicsoftware/kubedr/HEAD/logos/logo-1.5-vert-xxsmall.png -------------------------------------------------------------------------------- /logos/logo-2.0-vert-xsmall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/catalogicsoftware/kubedr/HEAD/logos/logo-2.0-vert-xsmall.png -------------------------------------------------------------------------------- /logos/logo-2.5-horiz-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/catalogicsoftware/kubedr/HEAD/logos/logo-2.5-horiz-small.png -------------------------------------------------------------------------------- /kubedr/config/certmanager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - certificate.yaml 3 | 4 | configurations: 5 | - kustomizeconfig.yaml 6 | -------------------------------------------------------------------------------- /kubedr/config/webhook/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - manifests.yaml 3 | - service.yaml 4 | 5 | configurations: 6 | - kustomizeconfig.yaml 7 | -------------------------------------------------------------------------------- /tests/config/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Contains config information that is specific to this environemnt 3 | # so it should not be part of git. 4 | testenv.json 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.pdf 3 | 4 | __pycache__/ 5 | test-results.xml 6 | .vscode/** 7 | 8 | kubedr/kubedr.yaml 9 | kubedr/config/rbac/role.yaml 10 | -------------------------------------------------------------------------------- /docs/devguide/source/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | 2 | /* Excluding table of contents is not working */ 3 | /* li:not('.toctree-l1') */ 4 | li { 5 | margin-bottom: .3em; 6 | } 7 | -------------------------------------------------------------------------------- /docs/devguide/source/_static/images/logo-2.5-vert-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/catalogicsoftware/kubedr/HEAD/docs/devguide/source/_static/images/logo-2.5-vert-small.png -------------------------------------------------------------------------------- /docs/devguide/source/_static/images/logo-3.0-vert-med.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/catalogicsoftware/kubedr/HEAD/docs/devguide/source/_static/images/logo-3.0-vert-med.png -------------------------------------------------------------------------------- /docs/userguide/source/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | 2 | /* Excluding table of contents is not working */ 3 | /* li:not('.toctree-l1') */ 4 | li { 5 | margin-bottom: .3em; 6 | } 7 | -------------------------------------------------------------------------------- /docs/userguide/source/_static/images/logo-3.0-vert-med.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/catalogicsoftware/kubedr/HEAD/docs/userguide/source/_static/images/logo-3.0-vert-med.png -------------------------------------------------------------------------------- /docs/devguide/source/_static/images/logo-2.0-vert-xsmall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/catalogicsoftware/kubedr/HEAD/docs/devguide/source/_static/images/logo-2.0-vert-xsmall.png -------------------------------------------------------------------------------- /docs/userguide/source/_static/images/logo-2.0-vert-xsmall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/catalogicsoftware/kubedr/HEAD/docs/userguide/source/_static/images/logo-2.0-vert-xsmall.png -------------------------------------------------------------------------------- /docs/userguide/source/_static/images/logo-2.5-vert-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/catalogicsoftware/kubedr/HEAD/docs/userguide/source/_static/images/logo-2.5-vert-small.png -------------------------------------------------------------------------------- /kubedr/config/manager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - manager.yaml 3 | apiVersion: kustomize.config.k8s.io/v1beta1 4 | kind: Kustomization 5 | images: 6 | - name: controller 7 | newName: kubedr 8 | newTag: "0.42" 9 | -------------------------------------------------------------------------------- /kubedr/config/samples/kubedr_v1alpha1_backuplocation.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kubedr.catalogicsoftware.com/v1alpha1 2 | kind: BackupLocation 3 | metadata: 4 | name: backuplocation-sample 5 | spec: 6 | # Add fields here 7 | foo: bar 8 | -------------------------------------------------------------------------------- /kubedr/config/samples/kubedr_v1alpha1_metadatarestore.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kubedr.catalogicsoftware.com/v1alpha1 2 | kind: MetadataRestore 3 | metadata: 4 | name: metadatarestore-sample 5 | spec: 6 | # Add fields here 7 | foo: bar 8 | -------------------------------------------------------------------------------- /kubedr/config/samples/kubedr_v1alpha1_metadatabackuppolicy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kubedr.catalogicsoftware.com/v1alpha1 2 | kind: MetadataBackupPolicy 3 | metadata: 4 | name: metadatabackuppolicy-sample 5 | spec: 6 | # Add fields here 7 | foo: bar 8 | -------------------------------------------------------------------------------- /kubedr/config/samples/kubedr_v1alpha1_metadatabackuprecord.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kubedr.catalogicsoftware.com/v1alpha1 2 | kind: MetadataBackupRecord 3 | metadata: 4 | name: metadatabackuprecord-sample 5 | spec: 6 | # Add fields here 7 | foo: bar 8 | -------------------------------------------------------------------------------- /kubedr/config/webhook/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: webhook-service 5 | namespace: system 6 | spec: 7 | ports: 8 | - port: 443 9 | targetPort: 9443 10 | selector: 11 | control-plane: controller-manager 12 | -------------------------------------------------------------------------------- /.yamllint: -------------------------------------------------------------------------------- 1 | # Based on gopkg.in/yaml.v2 K8S YAML standard 2 | 3 | extends: default 4 | 5 | rules: 6 | document-start: disable 7 | empty-lines: disable 8 | indentation: 9 | indent-sequences: false 10 | line-length: 11 | level: warning 12 | max: 120 13 | -------------------------------------------------------------------------------- /kubedr/config/rbac/role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: manager-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: manager-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: default 12 | namespace: system 13 | -------------------------------------------------------------------------------- /kubedr/config/rbac/auth_proxy_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: proxy-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: proxy-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: default 12 | namespace: system 13 | -------------------------------------------------------------------------------- /kubedr/config/rbac/leader_election_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: leader-election-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: Role 8 | name: leader-election-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: default 12 | namespace: system 13 | -------------------------------------------------------------------------------- /kubedr/config/rbac/auth_proxy_service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | control-plane: controller-manager 6 | name: controller-manager-metrics-service 7 | namespace: system 8 | spec: 9 | ports: 10 | - name: https 11 | port: 8443 12 | targetPort: https 13 | selector: 14 | control-plane: controller-manager 15 | -------------------------------------------------------------------------------- /kubedr/config/rbac/auth_proxy_role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: proxy-role 5 | rules: 6 | - apiGroups: ["authentication.k8s.io"] 7 | resources: 8 | - tokenreviews 9 | verbs: ["create"] 10 | - apiGroups: ["authorization.k8s.io"] 11 | resources: 12 | - subjectaccessreviews 13 | verbs: ["create"] 14 | -------------------------------------------------------------------------------- /kubedr/PROJECT: -------------------------------------------------------------------------------- 1 | version: "2" 2 | domain: catalogicsoftware.com 3 | repo: kubedr 4 | resources: 5 | - group: kubedr 6 | version: v1alpha1 7 | kind: BackupLocation 8 | - group: kubedr 9 | version: v1alpha1 10 | kind: MetadataBackupPolicy 11 | - group: kubedr 12 | version: v1alpha1 13 | kind: MetadataBackupRecord 14 | - group: kubedr 15 | version: v1alpha1 16 | kind: MetadataRestore 17 | -------------------------------------------------------------------------------- /kubedr/go.mod: -------------------------------------------------------------------------------- 1 | module kubedr 2 | 3 | go 1.12 4 | 5 | require ( 6 | github.com/go-logr/logr v0.1.0 7 | github.com/onsi/ginkgo v1.12.0 8 | github.com/onsi/gomega v1.9.0 9 | github.com/prometheus/client_golang v1.4.0 10 | github.com/robfig/cron v1.2.0 11 | k8s.io/api v0.17.2 12 | k8s.io/apimachinery v0.17.2 13 | k8s.io/client-go v0.17.2 14 | sigs.k8s.io/controller-runtime v0.4.0 15 | ) 16 | -------------------------------------------------------------------------------- /tests/runtests: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | SCRIPT_PATH=`which $0` 6 | export TESTS_ROOTDIR=`dirname $SCRIPT_PATH` 7 | export SRCDIR=$TESTS_ROOTDIR/src 8 | 9 | export PYTHONPATH=$TESTS_ROOTDIR:$SRCDIR:$PYTHONPATH 10 | export DEFAULT_RESULTS_FILE=$TESTS_ROOTDIR/test-results.xml 11 | 12 | ${PYTEST_BIN:-pytest} --tb=native -v --junit-xml=${PYTESTS_RESULTS_FILE:-$DEFAULT_RESULTS_FILE} "$@" 13 | 14 | 15 | -------------------------------------------------------------------------------- /kubedr/config/prometheus/monitor.yaml: -------------------------------------------------------------------------------- 1 | # Prometheus Monitor Service (Metrics) 2 | apiVersion: monitoring.coreos.com/v1 3 | kind: ServiceMonitor 4 | metadata: 5 | labels: 6 | control-plane: controller-manager 7 | name: controller-manager-metrics-monitor 8 | namespace: system 9 | spec: 10 | endpoints: 11 | - path: /metrics 12 | port: https 13 | selector: 14 | control-plane: controller-manager 15 | -------------------------------------------------------------------------------- /kubedr/config/rbac/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - role.yaml 3 | - role_binding.yaml 4 | - leader_election_role.yaml 5 | - leader_election_role_binding.yaml 6 | # Comment the following 3 lines if you want to disable 7 | # the auth proxy (https://github.com/brancz/kube-rbac-proxy) 8 | # which protects your /metrics endpoint. 9 | - auth_proxy_service.yaml 10 | - auth_proxy_role.yaml 11 | - auth_proxy_role_binding.yaml 12 | -------------------------------------------------------------------------------- /kubedr/config/crd/patches/cainjection_in_backuplocations.yaml: -------------------------------------------------------------------------------- 1 | # The following patch adds a directive for certmanager to inject CA into the CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | annotations: 7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 8 | name: backuplocations.kubedr.catalogicsoftware.com 9 | -------------------------------------------------------------------------------- /kubedr/config/crd/patches/cainjection_in_metadatarestores.yaml: -------------------------------------------------------------------------------- 1 | # The following patch adds a directive for certmanager to inject CA into the CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | annotations: 7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 8 | name: metadatarestores.kubedr.catalogicsoftware.com 9 | -------------------------------------------------------------------------------- /kubedr/config/crd/patches/cainjection_in_metadatabackuprecords.yaml: -------------------------------------------------------------------------------- 1 | # The following patch adds a directive for certmanager to inject CA into the CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | annotations: 7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 8 | name: metadatabackuprecords.kubedr.catalogicsoftware.com 9 | -------------------------------------------------------------------------------- /kubedr/config/crd/patches/cainjection_in_metadatabackuppolicies.yaml: -------------------------------------------------------------------------------- 1 | # The following patch adds a directive for certmanager to inject CA into the CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | annotations: 7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 8 | name: metadatabackuppolicies.kubedr.catalogicsoftware.com 9 | -------------------------------------------------------------------------------- /tests/config/sample_testenv.json: -------------------------------------------------------------------------------- 1 | { 2 | "backuploc": { 3 | "endpoint": "http://10.106.44.180:9000", 4 | "access_key": "minio", 5 | "bucket_name_prefix": "kubedr-testbucket1", 6 | "secret_key": "minio123" 7 | }, 8 | "etcd_data": { 9 | "ca.crt": "/tmp/ca.crt", 10 | "client.crt": "/tmp/client.crt", 11 | "client.key": "/tmp/client.key" 12 | }, 13 | "certs_dir": "/var/lib/minikube/certs" 14 | } 15 | -------------------------------------------------------------------------------- /kubedr/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Binaries for programs and plugins 3 | *.exe 4 | *.exe~ 5 | *.dll 6 | *.so 7 | *.dylib 8 | bin 9 | 10 | # Test binary, build with `go test -c` 11 | *.test 12 | 13 | # Output of the go coverage tool, specifically when used with LiteIDE 14 | *.out 15 | 16 | # Kubernetes Generated files - skip generated files, except for vendored files 17 | 18 | !vendor/**/zz_generated.* 19 | 20 | # editor and IDE paraphernalia 21 | .idea 22 | *.swp 23 | *.swo 24 | *~ 25 | 26 | -------------------------------------------------------------------------------- /kubedr/config/certmanager/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # This configuration is for teaching kustomize how to update name ref and var substitution 2 | nameReference: 3 | - kind: Issuer 4 | group: cert-manager.io 5 | fieldSpecs: 6 | - kind: Certificate 7 | group: cert-manager.io 8 | path: spec/issuerRef/name 9 | 10 | varReference: 11 | - kind: Certificate 12 | group: cert-manager.io 13 | path: spec/commonName 14 | - kind: Certificate 15 | group: cert-manager.io 16 | path: spec/dnsNames 17 | -------------------------------------------------------------------------------- /docs/userguide/source/introduction.rst: -------------------------------------------------------------------------------- 1 | 2 | ============== 3 | Introduction 4 | ============== 5 | 6 | Kubernetes stores all the cluster data (such as resource specs) in 7 | *etcd*. The *KubeDR* project implements data protection for this 8 | data. In addition, certificates can be backed up as well but that is 9 | optional. 10 | 11 | .. warning:: 12 | 13 | The project is currently in Alpha phase. Features can still change 14 | and enhancements will be made to existing functionality. There may 15 | be many corner cases that may not work as expected. 16 | -------------------------------------------------------------------------------- /uninstall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | shopt -s expand_aliases 6 | alias k="kubectl -n kubedr-system" 7 | 8 | # Delete all CRs 9 | k delete metadatabackuppolicy --all 10 | k delete metadatabackuprecord --all 11 | k delete backuplocation --all 12 | 13 | # Delete the namespace 14 | kubectl delete namespace kubedr-system 15 | 16 | # Delete CRDs 17 | kubectl delete crd metadatabackuppolicies.kubedr.catalogicsoftware.com 18 | kubectl delete crd metadatabackuprecords.kubedr.catalogicsoftware.com 19 | kubectl delete crd backuplocations.kubedr.catalogicsoftware.com 20 | -------------------------------------------------------------------------------- /kubedr/config/crd/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # This file is for teaching kustomize how to substitute name and namespace reference in CRD 2 | nameReference: 3 | - kind: Service 4 | version: v1 5 | fieldSpecs: 6 | - kind: CustomResourceDefinition 7 | group: apiextensions.k8s.io 8 | path: spec/conversion/webhookClientConfig/service/name 9 | 10 | namespace: 11 | - kind: CustomResourceDefinition 12 | group: apiextensions.k8s.io 13 | path: spec/conversion/webhookClientConfig/service/namespace 14 | create: false 15 | 16 | varReference: 17 | - path: metadata/annotations 18 | -------------------------------------------------------------------------------- /kubedr/config/default/manager_prometheus_metrics_patch.yaml: -------------------------------------------------------------------------------- 1 | # This patch enables Prometheus scraping for the manager pod. 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: controller-manager 6 | namespace: system 7 | spec: 8 | template: 9 | metadata: 10 | annotations: 11 | prometheus.io/scrape: 'true' 12 | spec: 13 | containers: 14 | # Expose the prometheus metrics on default port 15 | - name: manager 16 | ports: 17 | - containerPort: 8080 18 | name: metrics 19 | protocol: TCP 20 | -------------------------------------------------------------------------------- /docs/devguide/source/index.rst: -------------------------------------------------------------------------------- 1 | .. KubeDR documentation master file, created by 2 | sphinx-quickstart on Sun Dec 22 09:04:35 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to the KubeDR Developer Guide! 7 | ====================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | intro 14 | impl 15 | ci_builds 16 | local_builds 17 | automated_tests 18 | monitoring 19 | 20 | Search 21 | ====== 22 | 23 | * :ref:`search` 24 | -------------------------------------------------------------------------------- /kubedr/config/rbac/leader_election_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions to do leader election. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: Role 4 | metadata: 5 | name: leader-election-role 6 | rules: 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - configmaps 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | - create 16 | - update 17 | - patch 18 | - delete 19 | - apiGroups: 20 | - "" 21 | resources: 22 | - configmaps/status 23 | verbs: 24 | - get 25 | - update 26 | - patch 27 | - apiGroups: 28 | - "" 29 | resources: 30 | - events 31 | verbs: 32 | - create 33 | -------------------------------------------------------------------------------- /kubedr/config/default/manager_webhook_patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: manager 11 | ports: 12 | - containerPort: 9443 13 | name: webhook-server 14 | protocol: TCP 15 | volumeMounts: 16 | - mountPath: /tmp/k8s-webhook-server/serving-certs 17 | name: cert 18 | readOnly: true 19 | volumes: 20 | - name: cert 21 | secret: 22 | defaultMode: 420 23 | secretName: webhook-server-cert 24 | -------------------------------------------------------------------------------- /kubedr/hack/boilerplate.go.txt: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Catalogic Software 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | -------------------------------------------------------------------------------- /docs/userguide/source/index.rst: -------------------------------------------------------------------------------- 1 | .. KubeDR documentation master file, created by 2 | sphinx-quickstart on Mon Dec 23 08:34:16 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to the KubeDR User Guide! 7 | ================================= 8 | 9 | .. toctree:: 10 | :numbered: 11 | :maxdepth: 2 12 | :caption: Contents: 13 | 14 | introduction 15 | overview 16 | installation 17 | configuration 18 | backup 19 | pause_resume 20 | restore 21 | monitoring 22 | troubleshooting 23 | uninstall 24 | roadmap 25 | 26 | Search 27 | ====== 28 | 29 | * :ref:`search` 30 | -------------------------------------------------------------------------------- /docs/userguide/source/roadmap.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Roadmap 3 | ========= 4 | 5 | The *Kubedr* project is currently in *Alpha* phase with several 6 | enhancements currently being worked on. 7 | 8 | The following list includes such enhancements as well as robustness 9 | improvements that are being considered for the next release. 10 | 11 | - Support *Helm* installation. 12 | 13 | - Make it easy to switch backup tool. Currently, we use 14 | `restic`_ but the design should support easily switching to any 15 | other tool. 16 | 17 | - Support a file system target in addition to S3 (or any 18 | `PersistentVolume`). 19 | 20 | - Support more restore use cases. 21 | 22 | .. _restic: https://restic.net 23 | -------------------------------------------------------------------------------- /kubedr/config/default/webhookcainjection_patch.yaml: -------------------------------------------------------------------------------- 1 | # This patch add annotation to admission webhook config and 2 | # the variables $(CERTIFICATE_NAMESPACE) and $(CERTIFICATE_NAME) will be substituted by kustomize. 3 | apiVersion: admissionregistration.k8s.io/v1beta1 4 | kind: MutatingWebhookConfiguration 5 | metadata: 6 | name: mutating-webhook-configuration 7 | annotations: 8 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 9 | --- 10 | apiVersion: admissionregistration.k8s.io/v1beta1 11 | kind: ValidatingWebhookConfiguration 12 | metadata: 13 | name: validating-webhook-configuration 14 | annotations: 15 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 16 | -------------------------------------------------------------------------------- /docs/devguide/source/local_builds.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | Local Builds 3 | ============== 4 | 5 | To build locally: 6 | 7 | .. code-block:: bash 8 | 9 | $ make build 10 | 11 | This builds two artifacts: 12 | 13 | - ``kubedr/kubedr.yaml`` 14 | - Docker image ``kubedr:latest`` 15 | 16 | The image tag can be changed by using env variable 17 | ``DOCKER_KUBEDR_IMAGE_TAG``. 18 | 19 | Before applying ``kubedr.yaml``, make sure that the image is accessible 20 | in your test environment. For example, if you are using `minikube`_, 21 | you may need to add the image to its cache, like so: 22 | 23 | .. code-block:: bash 24 | 25 | $ minikube cache add kubedr:latest 26 | 27 | .. _minikube: https://github.com/kubernetes/minikube 28 | -------------------------------------------------------------------------------- /docs/devguide/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/userguide/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /kubedr/config/crd/patches/webhook_in_backuplocations.yaml: -------------------------------------------------------------------------------- 1 | # The following patch enables conversion webhook for CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | name: backuplocations.kubedr.catalogicsoftware.com 7 | spec: 8 | conversion: 9 | strategy: Webhook 10 | webhookClientConfig: 11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank, 12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager) 13 | caBundle: Cg== 14 | service: 15 | namespace: system 16 | name: webhook-service 17 | path: /convert 18 | -------------------------------------------------------------------------------- /kubedr/config/crd/patches/webhook_in_metadatarestores.yaml: -------------------------------------------------------------------------------- 1 | # The following patch enables conversion webhook for CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | name: metadatarestores.kubedr.catalogicsoftware.com 7 | spec: 8 | conversion: 9 | strategy: Webhook 10 | webhookClientConfig: 11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank, 12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager) 13 | caBundle: Cg== 14 | service: 15 | namespace: system 16 | name: webhook-service 17 | path: /convert 18 | -------------------------------------------------------------------------------- /kubedr/config/crd/patches/webhook_in_metadatabackuppolicies.yaml: -------------------------------------------------------------------------------- 1 | # The following patch enables conversion webhook for CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | name: metadatabackuppolicies.kubedr.catalogicsoftware.com 7 | spec: 8 | conversion: 9 | strategy: Webhook 10 | webhookClientConfig: 11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank, 12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager) 13 | caBundle: Cg== 14 | service: 15 | namespace: system 16 | name: webhook-service 17 | path: /convert 18 | -------------------------------------------------------------------------------- /kubedr/config/crd/patches/webhook_in_metadatabackuprecords.yaml: -------------------------------------------------------------------------------- 1 | # The following patch enables conversion webhook for CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | name: metadatabackuprecords.kubedr.catalogicsoftware.com 7 | spec: 8 | conversion: 9 | strategy: Webhook 10 | webhookClientConfig: 11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank, 12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager) 13 | caBundle: Cg== 14 | service: 15 | namespace: system 16 | name: webhook-service 17 | path: /convert 18 | -------------------------------------------------------------------------------- /kubedr/config/default/manager_auth_proxy_patch.yaml: -------------------------------------------------------------------------------- 1 | # This patch inject a sidecar container which is a HTTP proxy for the controller manager, 2 | # it performs RBAC authorization against the Kubernetes API using SubjectAccessReviews. 3 | apiVersion: apps/v1 4 | kind: Deployment 5 | metadata: 6 | name: controller-manager 7 | namespace: system 8 | spec: 9 | template: 10 | spec: 11 | containers: 12 | - name: kube-rbac-proxy 13 | image: gcr.io/kubebuilder/kube-rbac-proxy:v0.4.1 14 | args: 15 | - "--secure-listen-address=0.0.0.0:8443" 16 | - "--upstream=http://127.0.0.1:8080/" 17 | - "--logtostderr=true" 18 | - "--v=10" 19 | ports: 20 | - containerPort: 8443 21 | name: https 22 | - name: manager 23 | args: 24 | - "--metrics-addr=127.0.0.1:8080" 25 | - "--enable-leader-election" 26 | -------------------------------------------------------------------------------- /kubedr/config/webhook/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # the following config is for teaching kustomize where to look at when substituting vars. 2 | # It requires kustomize v2.1.0 or newer to work properly. 3 | nameReference: 4 | - kind: Service 5 | version: v1 6 | fieldSpecs: 7 | - kind: MutatingWebhookConfiguration 8 | group: admissionregistration.k8s.io 9 | path: webhooks/clientConfig/service/name 10 | - kind: ValidatingWebhookConfiguration 11 | group: admissionregistration.k8s.io 12 | path: webhooks/clientConfig/service/name 13 | 14 | namespace: 15 | - kind: MutatingWebhookConfiguration 16 | group: admissionregistration.k8s.io 17 | path: webhooks/clientConfig/service/namespace 18 | create: true 19 | - kind: ValidatingWebhookConfiguration 20 | group: admissionregistration.k8s.io 21 | path: webhooks/clientConfig/service/namespace 22 | create: true 23 | 24 | varReference: 25 | - path: metadata/annotations 26 | -------------------------------------------------------------------------------- /kubedr/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build the manager binary 2 | FROM golang:1.12.5 as builder 3 | 4 | WORKDIR /workspace 5 | # Copy the Go Modules manifests 6 | COPY go.mod go.mod 7 | COPY go.sum go.sum 8 | # cache deps before building and copying source so that we don't need to re-download as much 9 | # and so that source changes don't invalidate our downloaded layer 10 | RUN go mod download 11 | 12 | # Copy the go source 13 | COPY main.go main.go 14 | COPY api/ api/ 15 | COPY controllers/ controllers/ 16 | COPY metrics/ metrics/ 17 | 18 | # Build 19 | RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -o manager main.go 20 | 21 | # Use distroless as minimal base image to package the manager binary 22 | # Refer to https://github.com/GoogleContainerTools/distroless for more details 23 | FROM gcr.io/distroless/static:nonroot 24 | WORKDIR / 25 | COPY --from=builder /workspace/manager . 26 | USER nonroot:nonroot 27 | 28 | ENTRYPOINT ["/manager"] 29 | -------------------------------------------------------------------------------- /docs/userguide/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | DOCKER_PREFIX ?= catalogicsoftware/ 2 | 3 | DOCKER_DIR_BASE ?= kubedr 4 | 5 | DOCKER_KUBEDR_IMAGE_TAG ?= latest 6 | DOCKER_KUBEDR_IMAGE_NAME_SHORT ?= kubedr 7 | DOCKER_KUBEDR_IMAGE_NAME_LONG ?= ${DOCKER_PREFIX}${DOCKER_KUBEDR_IMAGE_NAME_SHORT} 8 | 9 | DOCKER_KUBEDRUTIL_IMAGE_TAG ?= 0.2.11 10 | DOCKER_KUBEDRUTIL_IMAGE_NAME_SHORT ?= kubedrutil 11 | DOCKER_KUBEDRUTIL_IMAGE_NAME_LONG ?= ${DOCKER_PREFIX}${DOCKER_KUBEDRUTIL_IMAGE_NAME_SHORT} 12 | 13 | build: manifests docker_build go_build 14 | 15 | manifests: 16 | cd ${DOCKER_DIR_BASE} && make manifests 17 | 18 | docker_build: 19 | cd ${DOCKER_DIR_BASE} && \ 20 | docker build \ 21 | --tag ${DOCKER_KUBEDR_IMAGE_NAME_LONG}:${DOCKER_KUBEDR_IMAGE_TAG} \ 22 | . 23 | 24 | go_build: 25 | cd kubedr/config/manager && \ 26 | kustomize edit set image controller=${DOCKER_KUBEDR_IMAGE_NAME_LONG}:${DOCKER_KUBEDR_IMAGE_TAG} 27 | cd kubedr && kustomize build config/default > kubedr.yaml 28 | sed -i 's##${DOCKER_KUBEDRUTIL_IMAGE_NAME_LONG}:${DOCKER_KUBEDRUTIL_IMAGE_TAG}#' kubedr/kubedr.yaml 29 | -------------------------------------------------------------------------------- /kubedr/config/certmanager/certificate.yaml: -------------------------------------------------------------------------------- 1 | # The following manifests contain a self-signed issuer CR and a certificate CR. 2 | # More document can be found at https://docs.cert-manager.io 3 | # WARNING: Targets CertManager 0.11 check https://docs.cert-manager.io/en/latest/tasks/upgrading/index.html for breaking changes 4 | apiVersion: cert-manager.io/v1alpha2 5 | kind: Issuer 6 | metadata: 7 | name: selfsigned-issuer 8 | namespace: system 9 | spec: 10 | selfSigned: {} 11 | --- 12 | apiVersion: cert-manager.io/v1alpha2 13 | kind: Certificate 14 | metadata: 15 | name: serving-cert # this name should match the one appeared in kustomizeconfig.yaml 16 | namespace: system 17 | spec: 18 | # $(SERVICE_NAME) and $(SERVICE_NAMESPACE) will be substituted by kustomize 19 | dnsNames: 20 | - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc 21 | - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc.cluster.local 22 | issuerRef: 23 | kind: Issuer 24 | name: selfsigned-issuer 25 | secretName: webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize 26 | -------------------------------------------------------------------------------- /kubedr/config/manager/manager.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | labels: 5 | control-plane: controller-manager 6 | name: system 7 | --- 8 | apiVersion: apps/v1 9 | kind: Deployment 10 | metadata: 11 | name: controller-manager 12 | namespace: system 13 | labels: 14 | control-plane: controller-manager 15 | spec: 16 | selector: 17 | matchLabels: 18 | control-plane: controller-manager 19 | replicas: 1 20 | template: 21 | metadata: 22 | labels: 23 | control-plane: controller-manager 24 | spec: 25 | containers: 26 | - command: 27 | - /manager 28 | args: 29 | - --enable-leader-election 30 | image: controller:latest 31 | name: manager 32 | env: 33 | - name: KUBEDR_UTIL_IMAGE 34 | value: 35 | resources: 36 | limits: 37 | cpu: 100m 38 | memory: 30Mi 39 | requests: 40 | cpu: 100m 41 | memory: 20Mi 42 | terminationGracePeriodSeconds: 10 43 | -------------------------------------------------------------------------------- /docs/userguide/source/uninstall.rst: -------------------------------------------------------------------------------- 1 | 2 | =========== 3 | Uninstall 4 | =========== 5 | 6 | To uninstall: 7 | 8 | - Delete all the CRs 9 | - Delete the namespace *kubedr-system* 10 | - Delete CRDs 11 | 12 | It is important to follow the order of deletions as otherwise, 13 | deletion of namespace may hang. Here are the commands to uninstall: 14 | 15 | 16 | .. code-block:: bash 17 | 18 | $ alias k="kubectl -n kubedr-system" 19 | 20 | # Delete all CRs 21 | $ k delete metadatabackuppolicy --all 22 | $ k delete metadatabackuprecord --all 23 | $ k delete backuplocation --all 24 | 25 | # Delete the namespace 26 | $ kubectl delete namespace kubedr-system 27 | 28 | # Delete CRDs 29 | $ kubectl delete crd metadatabackuppolicies.kubedr.catalogicsoftware.com 30 | $ kubectl delete crd metadatabackuprecords.kubedr.catalogicsoftware.com 31 | $ kubectl delete crd backuplocations.kubedr.catalogicsoftware.com 32 | 33 | If you don't need the backups any more, go ahead and delete the 34 | bucket on S3. 35 | 36 | .. note:: 37 | 38 | In the future, you will be able to install and uninstall using 39 | *Helm*. 40 | 41 | 42 | -------------------------------------------------------------------------------- /docs/devguide/source/intro.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | Introduction 3 | ============== 4 | 5 | The *Kubedr* project implements data protection for cluster data of 6 | Kubernetes stored in *etcd*. In addition, certificates can be backed 7 | up as well but that is optional. 8 | 9 | Documentation 10 | ============= 11 | 12 | This guide is built using `sphinx`_ and uses `Read the Docs`_ 13 | theme. 14 | 15 | Installation 16 | ------------ 17 | 18 | .. code-block:: bash 19 | 20 | $ python3 -m venv ~/venv/sphinx 21 | $ export PATH=~/venv/sphinx/bin:$PATH 22 | $ pip install sphinx sphinx_rtd_theme 23 | 24 | # For local builds, this helps in continuous build and refresh. 25 | $ pip install sphinx-autobuild 26 | 27 | Building 28 | -------- 29 | 30 | .. code-block:: bash 31 | 32 | $ cd docs/devguide 33 | $ make html 34 | 35 | This will generate HTML files in the directory ``html``. If you are 36 | making changes locally and would like to automatically build and 37 | refresh the generated files, use the following build command: 38 | 39 | .. code-block:: bash 40 | 41 | $ cd docs/devguide 42 | $ sphinx-autobuild source build/html 43 | 44 | 45 | .. _sphinx: http://www.sphinx-doc.org/en/master/index.html 46 | .. _Read the Docs: https://github.com/readthedocs/sphinx_rtd_theme 47 | -------------------------------------------------------------------------------- /kubedr/api/v1alpha1/groupversion_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Catalogic Software 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // Package v1alpha1 contains API Schema definitions for the kubedr v1alpha1 API group 18 | // +kubebuilder:object:generate=true 19 | // +groupName=kubedr.catalogicsoftware.com 20 | package v1alpha1 21 | 22 | import ( 23 | "k8s.io/apimachinery/pkg/runtime/schema" 24 | "sigs.k8s.io/controller-runtime/pkg/scheme" 25 | ) 26 | 27 | var ( 28 | // GroupVersion is group version used to register these objects 29 | GroupVersion = schema.GroupVersion{Group: "kubedr.catalogicsoftware.com", Version: "v1alpha1"} 30 | 31 | // SchemeBuilder is used to add go types to the GroupVersionKind scheme 32 | SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} 33 | 34 | // AddToScheme adds the types in this group-version to the given scheme. 35 | AddToScheme = SchemeBuilder.AddToScheme 36 | ) 37 | -------------------------------------------------------------------------------- /docs/userguide/source/pause_resume.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | Pausing backups 3 | ================= 4 | 5 | *Kubedr* supports pausing and resuming backups. 6 | 7 | To pause a backup, you need to patch the ``MetadataBackupPolicy`` 8 | resource by following standard Kubernetes way of `making partial 9 | changes`_ to a resource. 10 | 11 | First, create a file called ``suspend.yaml`` (you can choose any name 12 | you want) with the following contents: 13 | 14 | .. code-block:: yaml 15 | 16 | spec: 17 | suspend: true 18 | 19 | Replace ```` in the following command with the name of the policy 20 | resource and then run it: 21 | 22 | .. code-block:: bash 23 | 24 | $ kubectl -n kubedr-system patch \ 25 | metadatabackuppolicy.kubedr.catalogicsoftware.com/ \ 26 | --patch "$(cat suspend.yaml)" --type merge 27 | 28 | You can verify that the backups are indeed suspended by checking the 29 | cronjob resource as follows ("SUSPEND" column should show "True"): 30 | 31 | .. code-block:: bash 32 | 33 | $ kubectl -n kubedr-system get cronjobs 34 | NAME SCHEDULE SUSPEND ACTIVE LAST SCHEDULE AGE 35 | test-backup-new-backup-cronjob */2 * * * * True 0 5m59s 13m 36 | 37 | To resume backups, follow the same procedure as above but this time, 38 | use the following snippet: 39 | 40 | .. code-block:: yaml 41 | 42 | spec: 43 | suspend: false 44 | 45 | .. _making partial changes: https://kubernetes.io/docs/tasks/run-application/update-api-object-kubectl-patch/ 46 | -------------------------------------------------------------------------------- /kubedr/config/crd/kustomization.yaml: -------------------------------------------------------------------------------- 1 | # This kustomization.yaml is not intended to be run by itself, 2 | # since it depends on service name and namespace that are out of this kustomize package. 3 | # It should be run by config/default 4 | resources: 5 | - bases/kubedr.catalogicsoftware.com_backuplocations.yaml 6 | - bases/kubedr.catalogicsoftware.com_metadatabackuppolicies.yaml 7 | - bases/kubedr.catalogicsoftware.com_metadatabackuprecords.yaml 8 | - bases/kubedr.catalogicsoftware.com_metadatarestores.yaml 9 | # +kubebuilder:scaffold:crdkustomizeresource 10 | 11 | patchesStrategicMerge: 12 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. 13 | # patches here are for enabling the conversion webhook for each CRD 14 | # - patches/webhook_in_backuplocations.yaml 15 | # - patches/webhook_in_metadatabackuppolicies.yaml 16 | # - patches/webhook_in_metadatabackuprecords.yaml 17 | #- patches/webhook_in_metadatarestores.yaml 18 | # +kubebuilder:scaffold:crdkustomizewebhookpatch 19 | 20 | # [CERTMANAGER] To enable webhook, uncomment all the sections with [CERTMANAGER] prefix. 21 | # patches here are for enabling the CA injection for each CRD 22 | # - patches/cainjection_in_backuplocations.yaml 23 | # - patches/cainjection_in_metadatabackuppolicies.yaml 24 | # - patches/cainjection_in_metadatabackuprecords.yaml 25 | #- patches/cainjection_in_metadatarestores.yaml 26 | # +kubebuilder:scaffold:crdkustomizecainjectionpatch 27 | 28 | # the following config is for teaching kustomize how to do kustomization for CRDs. 29 | configurations: 30 | - kustomizeconfig.yaml 31 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # KubeDR Docs 2 | 3 | The documentation for *KubeDR* is divided into two guides 4 | 5 | - [User Guide](https://catalogicsoftware.com/clab-docs/kubedr/userguide) 6 | - [Developer Guide](https://catalogicsoftware.com/clab-docs/kubedr/devguide) 7 | 8 | We use [Sphinx](http://www.sphinx-doc.org/en/master/) to format and 9 | build the documentation. The guides use 10 | [Read the Docs](https://github.com/readthedocs/sphinx_rtd_theme) 11 | theme. 12 | 13 | ## Installation 14 | 15 | Here is one way to install Sphinx. 16 | 17 | ```bash 18 | $ python3 -m venv ~/venv/sphinx 19 | $ export PATH=~/venv/sphinx/bin:$PATH 20 | $ pip install sphinx sphinx_rtd_theme 21 | 22 | # For local builds, this helps in continuous build and refresh. 23 | $ pip install sphinx-autobuild 24 | ``` 25 | 26 | ## Build 27 | 28 | ```bash 29 | $ cd docs/devguide 30 | $ make html 31 | ``` 32 | 33 | This will generate HTML files in the directory ``html``. If you are 34 | making changes locally and would like to automatically build and 35 | refresh the generated files, use the following build command: 36 | 37 | ```bash 38 | $ cd docs/devguide 39 | $ sphinx-autobuild source build/html 40 | ``` 41 | 42 | ## Guidelines 43 | 44 | - The format for the documentation is 45 | [reStructuredText](http://www.sphinx-doc.org/en/master/usage/restructuredtext/index.html). 46 | 47 | - The source for docs should be readable in text form so please keep 48 | lines short (80 chars). This will also help in checking diffs. 49 | 50 | - Before checkin in or submitting a PR, please build locally and 51 | confirm that there are no errors or warnings from Sphinx. 52 | -------------------------------------------------------------------------------- /docs/userguide/source/overview.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | Overview 3 | ========== 4 | 5 | Kubernetes stores all its objects in *etcd* so backing up data in 6 | *etcd* is crucial for DR purposes. This project implements a tool that 7 | backs up *etcd* data and certificates to any S3 bucket. It follows the 8 | *operator* pattern that is popular in Kubernetes world. 9 | 10 | An operator is basically a combination of *custom resources (CRs)* 11 | coupled with *controllers* that manage the CRs. There would be one 12 | controller for each CR. In addition to controllers, operators can also 13 | contain *webhooks* that can be used to validate the data in resources 14 | as well as to set defaults when some fields are not set in the 15 | resource specs. Our operator uses webhooks for both these purposes. 16 | 17 | For data transfer to S3, we currently use a tool called `restic`_. In 18 | the future, it will be possible to change the specific backup tool in 19 | a backwards compatible manner. 20 | 21 | High level features of KubeDR 22 | ============================= 23 | 24 | - Backup of *etcd* data and certificates to S3. 25 | - Backups are encrypted and deduplicated. 26 | - Can pause and resume backups. 27 | - Can configure "retention" that controls how many backups are kept. 28 | 29 | Requirements 30 | ============ 31 | 32 | - Since direct access to etcd is needed, *Kubedr* currently works 33 | only for clusters where *etcd* is accessible and a snapshot can be 34 | taken. 35 | 36 | This includes on-prem clusters as well as those in the cloud that 37 | are explicitly set up on the compute instances. 38 | 39 | - Supported Versions: 1.13 - 1.17. 40 | 41 | .. _restic: https://restic.net 42 | -------------------------------------------------------------------------------- /docs/devguide/source/ci_builds.rst: -------------------------------------------------------------------------------- 1 | =========== 2 | CI Builds 3 | =========== 4 | 5 | .. note:: 6 | 7 | All KubeDR `Catalogic Software`_ CI/CD Builds are now handled 8 | by `Concourse CI`_. 9 | 10 | All artifacts are created using proper `Semantic Versioning`_ (`semver`) schemes 11 | and use S3 storage backends to store version history. 12 | 13 | Nothing runs on the base shell level on any host, even docker builds 14 | utilize docker-in-docker (DND) to build / push docker images from 15 | within a container. 16 | 17 | Pipeline Basics 18 | =============== 19 | 20 | All pipeline configuration is written in YAML in accordance with the 21 | Concourse CI pipeline specification. 22 | 23 | Currently the following automatically triggers a pipeline run: 24 | 25 | - Pushing to 'master' 26 | 27 | - Opening a new Pull Request 28 | 29 | - Committing to an open Pull Request 30 | 31 | Artifacts 32 | ========= 33 | 34 | In total, there are four artifacts being produced: 35 | 36 | 1. ``kubedr.yaml`` 37 | 2. ``kubedr`` Docker Image 38 | 3. userguide 39 | 4. devguide 40 | 41 | Of chief importance is the ``kubedr.yaml`` file, which holds the bundled 42 | operator resource definition for *KubeDR* and is applied against 43 | Kubernetes masters during the `kubedr-apply` job, and tested against in the 44 | `smoke-tests` job. 45 | 46 | Releases 47 | ======== 48 | 49 | Provided all smoke-tests are passing, if the release job is started 50 | from Concourse, the pipeline will continue on to package the appropriate 51 | semver-formatted release assets and trigger a GitHub release. 52 | 53 | .. _Semantic Versioning: https://semver.org 54 | .. _Concourse CI: https://concourse-ci.org 55 | .. _Catalogic Software: https://catalogicsoftware.com 56 | -------------------------------------------------------------------------------- /tests/src/common/util.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pprint 4 | import tempfile 5 | import time 6 | import traceback 7 | 8 | from common import kubeclient 9 | 10 | def ignore_errors(func): 11 | try: 12 | func() 13 | except: 14 | logging.error(traceback.format_exc()) 15 | 16 | def ignore_errors_pred(predicate, func): 17 | try: 18 | if predicate: 19 | func() 20 | except: 21 | logging.error(traceback.format_exc()) 22 | 23 | def timestamp(): 24 | return int(time.time()) 25 | 26 | def create_hostpath_pv(): 27 | pv_api = kubeclient.PersistentVolumeAPI() 28 | pv_name = "{}-{}".format("pv", timestamp()) 29 | pv_dir = tempfile.mkdtemp() 30 | 31 | pv_spec = { 32 | "accessModes": ["ReadWriteOnce"], 33 | "capacity": { 34 | "storage": "2Gi" 35 | }, 36 | "hostPath": { 37 | "path": pv_dir 38 | }, 39 | "persistentVolumeReclaimPolicy": "Delete", 40 | "storageClassName": "standard", 41 | "volumeMode": "Filesystem" 42 | } 43 | 44 | return pv_api.create(pv_name, pv_spec) 45 | 46 | def create_pvc_for_pv(pv): 47 | pprint.pprint(pv) 48 | pvc_api = kubeclient.PersistentVolumeClaimAPI(namespace="kubedr-system") 49 | name = "{}-{}".format("pvc", timestamp()) 50 | 51 | spec = { 52 | "accessModes": ["ReadWriteOnce"], 53 | "resources": { 54 | "requests": { 55 | "storage": pv.spec.capacity["storage"] 56 | } 57 | }, 58 | "storageClassName": "standard", 59 | "volumeMode": "Filesystem", 60 | "volumeName": pv.metadata.name 61 | } 62 | 63 | pvc = pvc_api.create(name, spec) 64 | 65 | # Wait till PVC is bound 66 | for i in range(30): 67 | time.sleep(1) 68 | 69 | pvc = pvc_api.get(name) 70 | if pvc.status.phase == "Bound": 71 | return pvc 72 | 73 | raise Exception("PVC {} did not change to 'Bound' status.".format(name)) 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /docs/devguide/source/monitoring.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Monitoring 3 | ============ 4 | 5 | Since *KubeDR* is built with `kubebuilder`_ framework, it 6 | automatically comes with a way to export `Prometheus`_ metrics. In 7 | fact, `controller runtime`_ exports several metrics dealing with 8 | internal implementation details but they are only relevant for 9 | developers. 10 | 11 | *KubeDR* adds several metrics that are of interest to the 12 | users. For details about these metrics, check the `user guide`_. 13 | 14 | Here are some high level details regarding the implementation and 15 | testing of metrics sub-system. 16 | 17 | - All metrics are defined in the file ``kubedr/metrics/metrics.go``. 18 | 19 | The metrics end point is protected by RBAC so until we figure out how 20 | to configure Prometheus in this setting, the feature was tested in the 21 | following way: 22 | 23 | - Remove RBAC by commenting out the line:: 24 | 25 | - manager_auth_proxy_patch.yaml 26 | 27 | and uncommenting the line:: 28 | 29 | - manager_prometheus_metrics_patch.yaml 30 | 31 | in the file:: 32 | 33 | kubedr/config/default/kustomization.yaml. 34 | 35 | You need to build *KubeDR* after this change. Once *KubeDR* is 36 | deployed after this change, run the following command to make metrics 37 | end point accessible on local host: 38 | 39 | .. code-block:: bash 40 | 41 | $ kubectl -n kubedr-system port-forward 8080:8080 42 | 43 | Here is an example: 44 | 45 | .. code-block:: bash 46 | 47 | $ kubectl -n kubedr-system port-forward kubedr-controller-manager-bd9f4467c-ljblq 8080:8080 48 | 49 | Now, the following command will show all the relevant metrics: 50 | 51 | .. code-block:: bash 52 | 53 | $ curl -s http://localhost:8080/metrics | grep kubedr_ 54 | 55 | .. _kubebuilder: https://book.kubebuilder.io/ 56 | .. _Prometheus: https://prometheus.io 57 | .. _controller runtime: https://github.com/kubernetes-sigs/controller-runtime 58 | .. _user guide: https://catalogicsoftware.com/clab-docs/kubedr/userguide 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /docs/userguide/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'KubeDR' 21 | copyright = '2020, Catalogic Software' 22 | author = 'Catalogic Software' 23 | 24 | 25 | # -- General configuration --------------------------------------------------- 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be 28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 29 | # ones. 30 | extensions = [ 31 | 'sphinx_rtd_theme', 32 | ] 33 | 34 | # Add any paths that contain templates here, relative to this directory. 35 | templates_path = ['_templates'] 36 | 37 | # List of patterns, relative to source directory, that match files and 38 | # directories to ignore when looking for source files. 39 | # This pattern also affects html_static_path and html_extra_path. 40 | exclude_patterns = [] 41 | 42 | 43 | # -- Options for HTML output ------------------------------------------------- 44 | 45 | # The theme to use for HTML and HTML Help pages. See the documentation for 46 | # a list of builtin themes. 47 | # 48 | html_theme = 'sphinx_rtd_theme' 49 | 50 | # Add any paths that contain custom static files (such as style sheets) here, 51 | # relative to this directory. They are copied after the builtin static files, 52 | # so a file named "default.css" will overwrite the builtin "default.css". 53 | html_static_path = ['_static'] 54 | 55 | html_css_files = [ 56 | 'css/custom.css', 57 | ] 58 | 59 | html_logo = "_static/images/logo-2.0-vert-xsmall.png" 60 | -------------------------------------------------------------------------------- /docs/userguide/source/installation.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | Installation 3 | ============== 4 | 5 | - Install `cert-manager`_. 6 | 7 | - Make sure that ``kubectl`` is set up to access your cluster. 8 | 9 | - Download `kubedr.yaml` from the 10 | `Releases page `_. 11 | 12 | .. note:: 13 | 14 | We are also working on supporting *Helm* installs in the future. 15 | 16 | - Apply the downloaded ``kubedr.yaml``, like so: 17 | 18 | .. code-block:: bash 19 | 20 | $ kubectl apply -f kubedr.yaml 21 | 22 | Note that the following two images are required for *Kubedr* to 23 | work. 24 | 25 | * catalogicsoftware/kubedrutil:0.1.0 26 | * catalogicsoftware/kubedr:0.1.0 27 | 28 | - Applying ``kubedr.yaml`` will create a new namespace called 29 | *kubedr-system* and starts all the necessary pods, services, 30 | webhooks, and deployments in that namespace. It also installs the 31 | following *Custom Resource Definitions (CRDs)*: 32 | 33 | * BackupLocation 34 | * MetadataBackupPolicy 35 | * MetadataBackupRecord 36 | 37 | - To verify that installation is successful, run the following command 38 | and ensure that all the resources are in running state. 39 | 40 | .. code-block:: bash 41 | 42 | $ kubectl -n kubedr-system get all 43 | 44 | NAME READY STATUS RESTARTS AGE 45 | pod/kubedr-controller-manager-7bc7dc96f6-h8v28 2/2 Running 0 4s 46 | 47 | NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE 48 | service/kubedr-controller-manager-metrics-service ClusterIP 10.104.87.59 8443/TCP 4s 49 | service/kubedr-webhook-service ClusterIP 10.109.153.83 443/TCP 4s 50 | 51 | NAME READY UP-TO-DATE AVAILABLE AGE 52 | deployment.apps/kubedr-controller-manager 1/1 1 1 4s 53 | 54 | NAME DESIRED CURRENT READY AGE 55 | replicaset.apps/kubedr-controller-manager-7bc7dc96f6 1 1 1 4s 56 | 57 | .. _cert-manager: https://cert-manager.io/ 58 | -------------------------------------------------------------------------------- /docs/devguide/source/impl.rst: -------------------------------------------------------------------------------- 1 | ================ 2 | Implementation 3 | ================ 4 | 5 | - The project uses `kubebuilder`_ tool to scaffold new controllers and 6 | types. 7 | 8 | - Before deciding on `kubebuilder`_, `opsdk`_ was considered but 9 | `kubebuilder`_ is more active and has better documentation. Moreover, 10 | it has better support for webhooks. Also, there is an effort 11 | underway to integrate these two projects as far as Go operators are 12 | concerned. 13 | 14 | More details about the implementation will be added soon. 15 | 16 | .. _kubebuilder: https://book.kubebuilder.io/ 17 | .. _opsdk: https://github.com/operator-framework/operator-sdk 18 | 19 | Status updates 20 | ============== 21 | 22 | By default, any updates to status of a resource results in 23 | reconciling. This is a problem if status is being updated in the 24 | controller and might result in infinite loop of update followed by a 25 | reconcile. In many cases, you just want to update the status and don't 26 | want to have to process that update again. 27 | 28 | One way to achieve that is by checking to see if the "generation" 29 | number of a resource changed. This number is bumped up only if the 30 | "spec" of a resource changes. So if we skip reconcile in case the 31 | "generation" number hasn't changed, we will be avoiding reconciles 32 | triggered by status updates. But keep in mind that you may miss other 33 | changes to the metadata (such as changes to annotations) as well. 34 | 35 | To implement this technique, do the following (using ``backupLoc`` as 36 | the resource in the examples below): 37 | 38 | - Add the following field to the status struct: 39 | 40 | .. code-block:: 41 | 42 | // +kubebuilder:validation:Optional 43 | ObservedGeneration int64 `json:"observedGeneration"` 44 | 45 | - Set the generation number in status (do this whether or not the 46 | current operation succeeds). 47 | 48 | .. code-block:: python 49 | 50 | backupLoc.Status.ObservedGeneration = backupLoc.ObjectMeta.Generation 51 | 52 | - Do the following check in the relevant controller: 53 | 54 | .. code-block:: go 55 | 56 | if backupLoc.Status.ObservedGeneration == backupLoc.ObjectMeta.Generation { 57 | return ctrl.Result{}, nil 58 | } 59 | 60 | -------------------------------------------------------------------------------- /docs/devguide/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'KubeDR' 21 | copyright = '2020, Catalogic Software' 22 | author = 'Catalogic Software' 23 | 24 | 25 | # -- General configuration --------------------------------------------------- 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be 28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 29 | # ones. 30 | extensions = [ 31 | 'sphinx_rtd_theme', 32 | ] 33 | 34 | # Add any paths that contain templates here, relative to this directory. 35 | templates_path = ['_templates'] 36 | 37 | # List of patterns, relative to source directory, that match files and 38 | # directories to ignore when looking for source files. 39 | # This pattern also affects html_static_path and html_extra_path. 40 | exclude_patterns = [] 41 | 42 | 43 | # -- Options for HTML output ------------------------------------------------- 44 | 45 | # The theme to use for HTML and HTML Help pages. See the documentation for 46 | # a list of builtin themes. 47 | # 48 | html_theme = 'sphinx_rtd_theme' 49 | # html_theme = 'alabaster' 50 | 51 | # Add any paths that contain custom static files (such as style sheets) here, 52 | # relative to this directory. They are copied after the builtin static files, 53 | # so a file named "default.css" will overwrite the builtin "default.css". 54 | html_static_path = ['_static'] 55 | 56 | html_css_files = [ 57 | 'css/custom.css', 58 | ] 59 | 60 | html_logo = "_static/images/logo-2.0-vert-xsmall.png" 61 | -------------------------------------------------------------------------------- /docs/userguide/source/troubleshooting.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | Troubleshooting 3 | ================= 4 | 5 | Collecting Info 6 | =============== 7 | 8 | In case of any problems, please get output of the following commands. 9 | 10 | .. code-block:: bash 11 | 12 | $ alias k="kubectl -n kubedr-system" 13 | 14 | $ k get all 15 | 16 | $ k describe all 17 | 18 | # For pods that show errors 19 | $ k logs --all-containers 20 | 21 | Artifacts 22 | ========= 23 | 24 | This section is aimed at cluster admins who want to or need to know 25 | all the artifacts that comprise of *Kubedr*. 26 | 27 | Custom Resources 28 | ---------------- 29 | 30 | BackupLocation 31 | Represents a S3 backup target. 32 | 33 | MetadataBackupPolicy 34 | Describes the backup policy. 35 | 36 | MetadataBackupRecord 37 | Created after every successful backup. 38 | 39 | MetadataRestore 40 | Creation of this resource triggers a restore. 41 | 42 | Kubernetes Resources 43 | -------------------- 44 | 45 | Controller Manager Pod 46 | This has controllers for all the custom resources. In addition, it 47 | also serves metrics and implements webhook end points (used for 48 | validation and initialization of unset fields). 49 | 50 | Corresponding to this pod, there is a Replica Set, Deployment, and 51 | two services. 52 | 53 | Cronjobs 54 | There will be one `cronjob`_ for each backup policy. 55 | 56 | Job 57 | One job created for each backup instance (managed by "Cronjob"). 58 | 59 | Repo initialization pod 60 | When a new ``BackupLocation`` is added, a pod is created that 61 | initializes the repo. It is named as "-init-pod" where 62 | "" is the name of ``BackupLocation`` resource. 63 | 64 | This pod is not deleted currently but in the future, it will be 65 | cleaned up. 66 | 67 | Snapshot deletion pods 68 | In order to support retention setting and clean up expired 69 | snapshots, a pod is created that deletes the backup snapshot. Such 70 | pods are named "mbr--del". 71 | 72 | At most three such deletion pods are kept and others are cleaned 73 | up. 74 | 75 | .. _cronjob: https://kubernetes.io/docs/tasks/job/automated-tasks-with-cron-jobs 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /kubedr/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # Image URL to use all building/pushing image targets 3 | IMG ?= controller:latest 4 | # Produce CRDs that work back to Kubernetes 1.11 (no version conversion) 5 | CRD_OPTIONS ?= "crd:trivialVersions=true" 6 | 7 | # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) 8 | ifeq (,$(shell go env GOBIN)) 9 | GOBIN=$(shell go env GOPATH)/bin 10 | else 11 | GOBIN=$(shell go env GOBIN) 12 | endif 13 | 14 | all: manager 15 | 16 | # Run tests 17 | test: generate fmt vet manifests 18 | go test ./... -coverprofile cover.out 19 | 20 | # Build manager binary 21 | manager: generate fmt vet 22 | go build -o bin/manager main.go 23 | 24 | # Run against the configured Kubernetes cluster in ~/.kube/config 25 | run: generate fmt vet manifests 26 | go run ./main.go --metrics-addr 0 27 | 28 | # Install CRDs into a cluster 29 | install: manifests 30 | kustomize build config/crd | kubectl apply -f - 31 | 32 | # Deploy controller in the configured Kubernetes cluster in ~/.kube/config 33 | deploy: manifests 34 | cd config/manager && kustomize edit set image controller=${IMG} 35 | kustomize build config/default | kubectl apply -f - 36 | 37 | # Generate manifests e.g. CRD, RBAC etc. 38 | manifests: controller-gen 39 | $(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=manager-role webhook paths="./..." output:crd:artifacts:config=config/crd/bases 40 | 41 | # Run go fmt against code 42 | fmt: 43 | go fmt ./... 44 | 45 | # Run go vet against code 46 | vet: 47 | go vet ./... 48 | 49 | # Generate code 50 | generate: controller-gen 51 | $(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths="./..." 52 | 53 | # Build the docker image 54 | docker-build: test 55 | docker build . -t ${IMG} 56 | 57 | # Push the docker image 58 | docker-push: 59 | docker push ${IMG} 60 | 61 | # find or download controller-gen 62 | # download controller-gen if necessary 63 | controller-gen: 64 | ifeq (, $(shell which controller-gen)) 65 | @{ \ 66 | set -e ;\ 67 | CONTROLLER_GEN_TMP_DIR=$$(mktemp -d) ;\ 68 | cd $$CONTROLLER_GEN_TMP_DIR ;\ 69 | go mod init tmp ;\ 70 | go get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.2.2 ;\ 71 | rm -rf $$CONTROLLER_GEN_TMP_DIR ;\ 72 | } 73 | CONTROLLER_GEN=$(GOBIN)/controller-gen 74 | else 75 | CONTROLLER_GEN=$(shell which controller-gen) 76 | endif 77 | -------------------------------------------------------------------------------- /kubedr/config/webhook/manifests.yaml: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | apiVersion: admissionregistration.k8s.io/v1beta1 4 | kind: MutatingWebhookConfiguration 5 | metadata: 6 | creationTimestamp: null 7 | name: mutating-webhook-configuration 8 | webhooks: 9 | - clientConfig: 10 | caBundle: Cg== 11 | service: 12 | name: webhook-service 13 | namespace: system 14 | path: /mutate-kubedr-catalogicsoftware-com-v1alpha1-backuplocation 15 | failurePolicy: Fail 16 | name: mbackuplocation.kb.io 17 | rules: 18 | - apiGroups: 19 | - kubedr.catalogicsoftware.com 20 | apiVersions: 21 | - v1alpha1 22 | operations: 23 | - CREATE 24 | - UPDATE 25 | resources: 26 | - backuplocations 27 | - clientConfig: 28 | caBundle: Cg== 29 | service: 30 | name: webhook-service 31 | namespace: system 32 | path: /mutate-kubedr-catalogicsoftware-com-v1alpha1-metadatabackuppolicy 33 | failurePolicy: Fail 34 | name: mutatemetadatabackuppolicy.kb.io 35 | rules: 36 | - apiGroups: 37 | - kubedr.catalogicsoftware.com 38 | apiVersions: 39 | - v1alpha1 40 | operations: 41 | - CREATE 42 | - UPDATE 43 | resources: 44 | - metadatabackuppolicies 45 | 46 | --- 47 | apiVersion: admissionregistration.k8s.io/v1beta1 48 | kind: ValidatingWebhookConfiguration 49 | metadata: 50 | creationTimestamp: null 51 | name: validating-webhook-configuration 52 | webhooks: 53 | - clientConfig: 54 | caBundle: Cg== 55 | service: 56 | name: webhook-service 57 | namespace: system 58 | path: /validate-kubedr-catalogicsoftware-com-v1alpha1-backuplocation 59 | failurePolicy: Fail 60 | name: vbackuplocation.kb.io 61 | rules: 62 | - apiGroups: 63 | - kubedr.catalogicsoftware.com 64 | apiVersions: 65 | - v1alpha1 66 | operations: 67 | - CREATE 68 | - UPDATE 69 | resources: 70 | - backuplocations 71 | - clientConfig: 72 | caBundle: Cg== 73 | service: 74 | name: webhook-service 75 | namespace: system 76 | path: /validate-kubedr-catalogicsoftware-com-v1alpha1-metadatabackuppolicy 77 | failurePolicy: Fail 78 | name: vmetadatabackuppolicy.kb.io 79 | rules: 80 | - apiGroups: 81 | - kubedr.catalogicsoftware.com 82 | apiVersions: 83 | - v1alpha1 84 | operations: 85 | - CREATE 86 | - UPDATE 87 | resources: 88 | - metadatabackuppolicies 89 | -------------------------------------------------------------------------------- /kubedr/api/v1alpha1/metadatabackuprecord_types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Catalogic Software 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha1 18 | 19 | import ( 20 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 21 | ) 22 | 23 | // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! 24 | // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. 25 | 26 | // MetadataBackupRecordSpec defines the desired state of MetadataBackupRecord 27 | type MetadataBackupRecordSpec struct { 28 | // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster 29 | // Important: Run "make" to regenerate code after modifying this file 30 | 31 | // kubebuilder:validation:MinLength:=1 32 | SnapshotId string `json:"snapshotId"` 33 | 34 | // kubebuilder:validation:MinLength:=1 35 | Policy string `json:"policy"` 36 | 37 | // kubebuilder:validation:MinLength:=1 38 | Backuploc string `json:"backuploc"` 39 | } 40 | 41 | // MetadataBackupRecordStatus defines the observed state of MetadataBackupRecord 42 | type MetadataBackupRecordStatus struct { 43 | // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster 44 | // Important: Run "make" to regenerate code after modifying this file 45 | } 46 | 47 | // +kubebuilder:object:root=true 48 | 49 | // MetadataBackupRecord is the Schema for the metadatabackuprecords API 50 | type MetadataBackupRecord struct { 51 | metav1.TypeMeta `json:",inline"` 52 | metav1.ObjectMeta `json:"metadata,omitempty"` 53 | 54 | Spec MetadataBackupRecordSpec `json:"spec,omitempty"` 55 | Status MetadataBackupRecordStatus `json:"status,omitempty"` 56 | } 57 | 58 | // +kubebuilder:object:root=true 59 | 60 | // MetadataBackupRecordList contains a list of MetadataBackupRecord 61 | type MetadataBackupRecordList struct { 62 | metav1.TypeMeta `json:",inline"` 63 | metav1.ListMeta `json:"metadata,omitempty"` 64 | Items []MetadataBackupRecord `json:"items"` 65 | } 66 | 67 | func init() { 68 | SchemeBuilder.Register(&MetadataBackupRecord{}, &MetadataBackupRecordList{}) 69 | } 70 | -------------------------------------------------------------------------------- /kubedr/config/crd/bases/kubedr.catalogicsoftware.com_metadatabackuprecords.yaml: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | annotations: 7 | controller-gen.kubebuilder.io/version: v0.2.2 8 | creationTimestamp: null 9 | name: metadatabackuprecords.kubedr.catalogicsoftware.com 10 | spec: 11 | group: kubedr.catalogicsoftware.com 12 | names: 13 | kind: MetadataBackupRecord 14 | listKind: MetadataBackupRecordList 15 | plural: metadatabackuprecords 16 | singular: metadatabackuprecord 17 | scope: "" 18 | validation: 19 | openAPIV3Schema: 20 | description: MetadataBackupRecord is the Schema for the metadatabackuprecords 21 | API 22 | properties: 23 | apiVersion: 24 | description: 'APIVersion defines the versioned schema of this representation 25 | of an object. Servers should convert recognized schemas to the latest 26 | internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' 27 | type: string 28 | kind: 29 | description: 'Kind is a string value representing the REST resource this 30 | object represents. Servers may infer this from the endpoint the client 31 | submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' 32 | type: string 33 | metadata: 34 | type: object 35 | spec: 36 | description: MetadataBackupRecordSpec defines the desired state of MetadataBackupRecord 37 | properties: 38 | backuploc: 39 | description: kubebuilder:validation:MinLength:=1 40 | type: string 41 | policy: 42 | description: kubebuilder:validation:MinLength:=1 43 | type: string 44 | snapshotId: 45 | description: kubebuilder:validation:MinLength:=1 46 | type: string 47 | required: 48 | - backuploc 49 | - policy 50 | - snapshotId 51 | type: object 52 | status: 53 | description: MetadataBackupRecordStatus defines the observed state of MetadataBackupRecord 54 | type: object 55 | type: object 56 | version: v1alpha1 57 | versions: 58 | - name: v1alpha1 59 | served: true 60 | storage: true 61 | status: 62 | acceptedNames: 63 | kind: "" 64 | plural: "" 65 | conditions: [] 66 | storedVersions: [] 67 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | 2 | import configparser 3 | import json 4 | import os 5 | import pprint 6 | 7 | import pytest 8 | 9 | from src.common import kubeclient 10 | 11 | env_config_data = [ 12 | ("wait_for_res_to_appear_num_attempts", 15, int), 13 | ("wait_for_res_to_appear_interval_secs", 1, int), 14 | ("wait_for_pod_to_be_done_num_attempts", 100, int), 15 | ("wait_for_pod_to_be_done_interval_secs", 3, int) 16 | ] 17 | 18 | # This class encapsulates all the parameters that can be controlled 19 | # using env variables. 20 | class EnvConfig: 21 | def __init__(self): 22 | for name, default_val, factory in env_config_data: 23 | self._set_env_config(name, default_val, factory) 24 | 25 | def _set_env_config(self, name, default_val, factory): 26 | setattr(self, name, factory(os.environ.get(name.upper(), default_val))) 27 | 28 | class GlobalConfig: 29 | def __init__(self, envconfig): 30 | self.envconfig = envconfig 31 | self.restic_password = "testpass" 32 | 33 | self.rootdir = os.environ['TESTS_ROOTDIR'] 34 | 35 | iniconfig = configparser.ConfigParser() 36 | # pytest has a way of finding the path of "pytest.ini" using "config" 37 | # object but it is not very well documented. So for now, directly 38 | # construct the path. 39 | iniconfig.read(os.path.join(self.rootdir, "pytest.ini")) 40 | self.iniconfig = iniconfig 41 | 42 | self.configdir = os.path.join(self.rootdir, "config") 43 | 44 | self.testenv = None 45 | testenv_f = os.path.join(self.configdir, "testenv.json") 46 | if os.path.exists(testenv_f): 47 | self.testenv = json.load(open(testenv_f)) 48 | 49 | self._init_apis() 50 | 51 | def _init_apis(self): 52 | self.namespace = "kubedr-system" 53 | self.pod_api = kubeclient.PodAPI(self.namespace) 54 | self.backuploc_api = kubeclient.BackupLocationAPI(self.namespace) 55 | self.mbp_api = kubeclient.MetadataBackupPolicyAPI(self.namespace) 56 | self.mr_api = kubeclient.MetadataRestoreAPI(self.namespace) 57 | self.secret_api = kubeclient.SecretAPI(self.namespace) 58 | self.pvc_api = kubeclient.PersistentVolumeClaimAPI(self.namespace) 59 | self.pv_api = kubeclient.PersistentVolumeAPI() 60 | 61 | # This is being set as a global variable so that library code 62 | # such as "kubeclient" can easily access the configuration set 63 | # through env variables. 64 | envconfig = EnvConfig() 65 | 66 | @pytest.fixture(scope = "session") 67 | def globalconfig(): 68 | kubeclient.init() 69 | pprint.pprint(envconfig.__dict__) 70 | return GlobalConfig(envconfig) 71 | 72 | -------------------------------------------------------------------------------- /kubedr/controllers/suite_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | package controllers 17 | 18 | import ( 19 | "path/filepath" 20 | "testing" 21 | 22 | . "github.com/onsi/ginkgo" 23 | . "github.com/onsi/gomega" 24 | 25 | kubedrv1alpha1 "kubedr/api/v1alpha1" 26 | 27 | "k8s.io/client-go/kubernetes/scheme" 28 | "k8s.io/client-go/rest" 29 | "sigs.k8s.io/controller-runtime/pkg/client" 30 | "sigs.k8s.io/controller-runtime/pkg/envtest" 31 | logf "sigs.k8s.io/controller-runtime/pkg/log" 32 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 33 | // +kubebuilder:scaffold:imports 34 | ) 35 | 36 | // These tests use Ginkgo (BDD-style Go testing framework). Refer to 37 | // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. 38 | 39 | var cfg *rest.Config 40 | var k8sClient client.Client 41 | var testEnv *envtest.Environment 42 | 43 | func TestAPIs(t *testing.T) { 44 | RegisterFailHandler(Fail) 45 | 46 | RunSpecsWithDefaultAndCustomReporters(t, 47 | "Controller Suite", 48 | []Reporter{envtest.NewlineReporter{}}) 49 | } 50 | 51 | var _ = BeforeSuite(func(done Done) { 52 | logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) 53 | 54 | By("bootstrapping test environment") 55 | testEnv = &envtest.Environment{ 56 | CRDDirectoryPaths: []string{filepath.Join("..", "config", "crd", "bases")}, 57 | } 58 | 59 | var err error 60 | cfg, err = testEnv.Start() 61 | Expect(err).ToNot(HaveOccurred()) 62 | Expect(cfg).ToNot(BeNil()) 63 | 64 | err = kubedrv1alpha1.AddToScheme(scheme.Scheme) 65 | Expect(err).NotTo(HaveOccurred()) 66 | 67 | err = kubedrv1alpha1.AddToScheme(scheme.Scheme) 68 | Expect(err).NotTo(HaveOccurred()) 69 | 70 | err = kubedrv1alpha1.AddToScheme(scheme.Scheme) 71 | Expect(err).NotTo(HaveOccurred()) 72 | 73 | err = kubedrv1alpha1.AddToScheme(scheme.Scheme) 74 | Expect(err).NotTo(HaveOccurred()) 75 | 76 | // +kubebuilder:scaffold:scheme 77 | 78 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 79 | Expect(err).ToNot(HaveOccurred()) 80 | Expect(k8sClient).ToNot(BeNil()) 81 | 82 | close(done) 83 | }, 60) 84 | 85 | var _ = AfterSuite(func() { 86 | By("tearing down the test environment") 87 | err := testEnv.Stop() 88 | Expect(err).ToNot(HaveOccurred()) 89 | }) 90 | -------------------------------------------------------------------------------- /kubedr/api/v1alpha1/backuplocation_types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Catalogic Software 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha1 18 | 19 | import ( 20 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 21 | ) 22 | 23 | // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! 24 | // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. 25 | 26 | // BackupLocationSpec defines the desired state of BackupLocation 27 | type BackupLocationSpec struct { 28 | // Important: Run "make" to regenerate code after modifying this file 29 | 30 | // kubebuilder:validation:MinLength:=1 31 | Url string `json:"url"` 32 | // kubebuilder:validation:MinLength:=1 33 | BucketName string `json:"bucketName"` 34 | 35 | // name of the secret 36 | // kubebuilder:validation:MinLength:=1 37 | Credentials string `json:"credentials"` 38 | } 39 | 40 | // BackupLocationStatus defines the observed state of BackupLocation 41 | type BackupLocationStatus struct { 42 | // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster 43 | // Important: Run "make" to regenerate code after modifying this file 44 | 45 | // +kubebuilder:validation:Optional 46 | ObservedGeneration int64 `json:"observedGeneration"` 47 | 48 | InitStatus string `json:"initStatus"` 49 | 50 | // +kubebuilder:validation:Optional 51 | InitErrorMessage string `json:"initErrorMessage"` 52 | 53 | InitTime string `json:"initTime"` 54 | } 55 | 56 | // +kubebuilder:object:root=true 57 | // +kubebuilder:subresource:status 58 | 59 | // BackupLocation is the Schema for the backuplocations API 60 | type BackupLocation struct { 61 | metav1.TypeMeta `json:",inline"` 62 | metav1.ObjectMeta `json:"metadata,omitempty"` 63 | 64 | Spec BackupLocationSpec `json:"spec,omitempty"` 65 | Status BackupLocationStatus `json:"status,omitempty"` 66 | } 67 | 68 | // +kubebuilder:object:root=true 69 | 70 | // BackupLocationList contains a list of BackupLocation 71 | type BackupLocationList struct { 72 | metav1.TypeMeta `json:",inline"` 73 | metav1.ListMeta `json:"metadata,omitempty"` 74 | Items []BackupLocation `json:"items"` 75 | } 76 | 77 | func init() { 78 | SchemeBuilder.Register(&BackupLocation{}, &BackupLocationList{}) 79 | } 80 | -------------------------------------------------------------------------------- /kubedr/config/crd/bases/kubedr.catalogicsoftware.com_metadatarestores.yaml: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | annotations: 7 | controller-gen.kubebuilder.io/version: v0.2.2 8 | creationTimestamp: null 9 | name: metadatarestores.kubedr.catalogicsoftware.com 10 | spec: 11 | group: kubedr.catalogicsoftware.com 12 | names: 13 | kind: MetadataRestore 14 | listKind: MetadataRestoreList 15 | plural: metadatarestores 16 | singular: metadatarestore 17 | scope: "" 18 | subresources: 19 | status: {} 20 | validation: 21 | openAPIV3Schema: 22 | description: MetadataRestore is the Schema for the metadatarestores API 23 | properties: 24 | apiVersion: 25 | description: 'APIVersion defines the versioned schema of this representation 26 | of an object. Servers should convert recognized schemas to the latest 27 | internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' 28 | type: string 29 | kind: 30 | description: 'Kind is a string value representing the REST resource this 31 | object represents. Servers may infer this from the endpoint the client 32 | submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' 33 | type: string 34 | metadata: 35 | type: object 36 | spec: 37 | description: MetadataRestoreSpec defines the desired state of MetadataRestore 38 | properties: 39 | mbrName: 40 | description: kubebuilder:validation:MinLength:=1 41 | type: string 42 | pvcName: 43 | description: kubebuilder:validation:MinLength:=1 44 | type: string 45 | required: 46 | - mbrName 47 | - pvcName 48 | type: object 49 | status: 50 | description: MetadataRestoreStatus defines the observed state of MetadataRestore 51 | properties: 52 | observedGeneration: 53 | format: int64 54 | type: integer 55 | restoreErrorMessage: 56 | type: string 57 | restoreStatus: 58 | type: string 59 | restoreTime: 60 | type: string 61 | required: 62 | - restoreStatus 63 | - restoreTime 64 | type: object 65 | type: object 66 | version: v1alpha1 67 | versions: 68 | - name: v1alpha1 69 | served: true 70 | storage: true 71 | status: 72 | acceptedNames: 73 | kind: "" 74 | plural: "" 75 | conditions: [] 76 | storedVersions: [] 77 | -------------------------------------------------------------------------------- /tests/src/test_backuploc.py: -------------------------------------------------------------------------------- 1 | 2 | import pprint 3 | import subprocess 4 | import time 5 | 6 | from kubernetes import client 7 | import pytest 8 | 9 | from common import kubeclient, util 10 | 11 | def timestamp(): 12 | return int(time.time()) 13 | 14 | # We don't want to test with incorrect IP as repo init will take more than 15 | # a minute to time out, thus adding to test time. 16 | def test_creating_backuplocation_with_invalid_credentials(globalconfig): 17 | if not globalconfig.testenv: 18 | pytest.skip("Test environment data is not given, skipping...") 19 | 20 | backuploc_creds_created = False 21 | backuploc_created = False 22 | init_annotation = "initialized.annotations.kubedr.catalogicsoftware.com" 23 | backuploc_creds = "{}-{}".format("s3creds", timestamp()) 24 | backuploc_name = "{}-{}".format("tests3", timestamp()) 25 | backup_loc = None 26 | 27 | backuploc = globalconfig.testenv["backuploc"] 28 | endpoint = globalconfig.testenv["backuploc"]["endpoint"] 29 | 30 | bucket_name = "{}-{}".format( 31 | globalconfig.testenv["backuploc"]["bucket_name_prefix"], 32 | timestamp()) 33 | 34 | backuploc_spec = { 35 | "url": endpoint, 36 | "bucketName": bucket_name, 37 | "credentials": backuploc_creds 38 | } 39 | 40 | try: 41 | kubeclient.create_backuploc_creds(backuploc_creds, backuploc["access_key"], backuploc["secret_key"]+"s", 42 | globalconfig.restic_password) 43 | backuploc_creds_created = True 44 | 45 | globalconfig.backuploc_api.create(backuploc_name, backuploc_spec) 46 | backuploc_created = True 47 | 48 | label_selector='kubedr.type=backuploc-init,kubedr.backuploc={}'.format(backuploc_name) 49 | pods = kubeclient.wait_for_pod_to_appear(label_selector) 50 | 51 | assert len(pods.items) == 1, "Found pods: ({})".format(", ".join([x.metadata.name for x in pods.items])) 52 | pod_name = pods.items[0].metadata.name 53 | 54 | pod = kubeclient.wait_for_pod_to_be_done(pod_name) 55 | 56 | backup_loc = globalconfig.backuploc_api.get(backuploc_name) 57 | assert backup_loc 58 | 59 | # We expect backup location initialization to fail so init annotation 60 | # should not be set. 61 | assert ("annotations" not in backup_loc["metadata"] or 62 | init_annotation not in backup_loc["metadata"]["annotations"] or 63 | backup_loc["metadata"]["annotations"][init_annotation] == "false") 64 | finally: 65 | if backup_loc: 66 | pprint.pprint(backup_loc) 67 | util.ignore_errors_pred(backuploc_created, lambda: globalconfig.backuploc_api.delete(backuploc_name)) 68 | util.ignore_errors_pred(backuploc_creds_created, lambda: globalconfig.secret_api.delete(backuploc_creds)) 69 | 70 | -------------------------------------------------------------------------------- /docs/userguide/source/configuration.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Configuration 3 | =============== 4 | 5 | Master nodes 6 | ============ 7 | 8 | Before backups are run, make sure that one or more master nodes has 9 | a label identifying them (if not already present). Some clusters are 10 | set up with *node-role.kubernetes.io/master* label on the master 11 | nodes. If this is the case, nothing more needs to be done. If this 12 | label is not present, create it. 13 | 14 | If it is not possible to use the above name for label, choose any name 15 | and pass that name in policy options (as described below). 16 | 17 | S3 end point 18 | ============ 19 | 20 | Before defining and running backups, you need to configure a S3 end 21 | point that acts as target for the backups. 22 | 23 | To define the S3 target, you need to create a custom resource called 24 | ``BackupLocation``. 25 | 26 | A sample resource: 27 | 28 | .. code-block:: yaml 29 | 30 | apiVersion: kubedr.catalogicsoftware.com/v1alpha1 31 | kind: BackupLocation 32 | metadata: 33 | name: remote-minio 34 | spec: 35 | url: http://10.96.57.3:9000 36 | bucketName: testbucket 37 | credentials: minio-creds 38 | 39 | name 40 | Logical name of the resource. 41 | 42 | url 43 | S3 end point 44 | 45 | bucketName 46 | Name of the S3 bucket. It will be created if it doesn't exist. 47 | 48 | credentials 49 | Name of the Kubernetes "secret" resource containing S3 50 | credentials. 51 | 52 | The secret should contain three pieces of information. Here is the 53 | description of each item in the secret and the key with which they 54 | should be created. 55 | 56 | * S3 access key ("access_key") 57 | * S3 secret key ("secret_key") 58 | * Password to be used to encrypt backups ("restic_repo_password"). 59 | 60 | Here is one way to create such a secret: 61 | 62 | .. code-block:: bash 63 | 64 | $ echo -n 'sample_access_key' > access_key 65 | $ echo -n 'sample_secret_key' > secret_key 66 | $ echo -n 'sample_restic_repo_password' > restic_repo_password 67 | 68 | $ kubectl -n kubedr-system create secret generic minio-creds \ 69 | --from-file=access_key --from-file=secret_key \ 70 | --from-file restic_repo_password 71 | 72 | Note that the secret must be created in the namespace 73 | *kubedr-system*. 74 | 75 | Assuming you defined the ``BackupLocation`` resource in a file called 76 | ``backuplocation.yaml``, create the resource by running the command: 77 | 78 | .. code-block:: bash 79 | 80 | $ kubectl -n kubedr-system apply -f backuplocation.yaml 81 | 82 | At this time, *Kubedr* will initialize a backup repository at the 83 | configured bucket (creating the bucket if necessary). To verify that 84 | initialization is successful, run the following command and ensure 85 | that status is "Completed". 86 | 87 | .. code-block:: bash 88 | 89 | $ kubectl -n kubedr-system get pod/-init-pod 90 | 91 | -------------------------------------------------------------------------------- /kubedr/config/crd/bases/kubedr.catalogicsoftware.com_backuplocations.yaml: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | annotations: 7 | controller-gen.kubebuilder.io/version: v0.2.2 8 | creationTimestamp: null 9 | name: backuplocations.kubedr.catalogicsoftware.com 10 | spec: 11 | group: kubedr.catalogicsoftware.com 12 | names: 13 | kind: BackupLocation 14 | listKind: BackupLocationList 15 | plural: backuplocations 16 | singular: backuplocation 17 | scope: "" 18 | subresources: 19 | status: {} 20 | validation: 21 | openAPIV3Schema: 22 | description: BackupLocation is the Schema for the backuplocations API 23 | properties: 24 | apiVersion: 25 | description: 'APIVersion defines the versioned schema of this representation 26 | of an object. Servers should convert recognized schemas to the latest 27 | internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' 28 | type: string 29 | kind: 30 | description: 'Kind is a string value representing the REST resource this 31 | object represents. Servers may infer this from the endpoint the client 32 | submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' 33 | type: string 34 | metadata: 35 | type: object 36 | spec: 37 | description: BackupLocationSpec defines the desired state of BackupLocation 38 | properties: 39 | bucketName: 40 | description: kubebuilder:validation:MinLength:=1 41 | type: string 42 | credentials: 43 | description: name of the secret kubebuilder:validation:MinLength:=1 44 | type: string 45 | url: 46 | description: kubebuilder:validation:MinLength:=1 47 | type: string 48 | required: 49 | - bucketName 50 | - credentials 51 | - url 52 | type: object 53 | status: 54 | description: BackupLocationStatus defines the observed state of BackupLocation 55 | properties: 56 | initErrorMessage: 57 | type: string 58 | initStatus: 59 | type: string 60 | initTime: 61 | type: string 62 | observedGeneration: 63 | format: int64 64 | type: integer 65 | required: 66 | - initStatus 67 | - initTime 68 | type: object 69 | type: object 70 | version: v1alpha1 71 | versions: 72 | - name: v1alpha1 73 | served: true 74 | storage: true 75 | status: 76 | acceptedNames: 77 | kind: "" 78 | plural: "" 79 | conditions: [] 80 | storedVersions: [] 81 | -------------------------------------------------------------------------------- /kubedr/config/default/kustomization.yaml: -------------------------------------------------------------------------------- 1 | # Adds namespace to all resources. 2 | namespace: kubedr-system 3 | 4 | # Value of this field is prepended to the 5 | # names of all resources, e.g. a deployment named 6 | # "wordpress" becomes "alices-wordpress". 7 | # Note that it should also match with the prefix (text before '-') of the namespace 8 | # field above. 9 | namePrefix: kubedr- 10 | 11 | # Labels to add to all resources and selectors. 12 | # commonLabels: 13 | # someName: someValue 14 | 15 | bases: 16 | - ../crd 17 | - ../rbac 18 | - ../manager 19 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in crd/kustomization.yaml 20 | - ../webhook 21 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. 22 | - ../certmanager 23 | # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. 24 | # - ../prometheus 25 | 26 | patchesStrategicMerge: 27 | # Protect the /metrics endpoint by putting it behind auth. 28 | # Only one of manager_auth_proxy_patch.yaml and 29 | # manager_prometheus_metrics_patch.yaml should be enabled. 30 | - manager_auth_proxy_patch.yaml 31 | # If you want your controller-manager to expose the /metrics 32 | # endpoint w/o any authn/z, uncomment the following line and 33 | # comment manager_auth_proxy_patch.yaml. 34 | # Only one of manager_auth_proxy_patch.yaml and 35 | # manager_prometheus_metrics_patch.yaml should be enabled. 36 | # - manager_prometheus_metrics_patch.yaml 37 | 38 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in crd/kustomization.yaml 39 | - manager_webhook_patch.yaml 40 | 41 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 42 | # Uncomment 'CERTMANAGER' sections in crd/kustomization.yaml to enable the CA injection in the admission webhooks. 43 | # 'CERTMANAGER' needs to be enabled to use ca injection 44 | - webhookcainjection_patch.yaml 45 | 46 | # the following config is for teaching kustomize how to do var substitution 47 | vars: 48 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. 49 | - name: CERTIFICATE_NAMESPACE # namespace of the certificate CR 50 | objref: 51 | kind: Certificate 52 | group: cert-manager.io 53 | version: v1alpha2 54 | name: serving-cert # this name should match the one in certificate.yaml 55 | fieldref: 56 | fieldpath: metadata.namespace 57 | - name: CERTIFICATE_NAME 58 | objref: 59 | kind: Certificate 60 | group: cert-manager.io 61 | version: v1alpha2 62 | name: serving-cert # this name should match the one in certificate.yaml 63 | - name: SERVICE_NAMESPACE # namespace of the service 64 | objref: 65 | kind: Service 66 | version: v1 67 | name: webhook-service 68 | fieldref: 69 | fieldpath: metadata.namespace 70 | - name: SERVICE_NAME 71 | objref: 72 | kind: Service 73 | version: v1 74 | name: webhook-service 75 | -------------------------------------------------------------------------------- /kubedr/api/v1alpha1/metadatarestore_types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Catalogic Software 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha1 18 | 19 | import ( 20 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 21 | ) 22 | 23 | // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! 24 | // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. 25 | 26 | // MetadataRestoreSpec defines the desired state of MetadataRestore 27 | type MetadataRestoreSpec struct { 28 | // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster 29 | // Important: Run "make" to regenerate code after modifying this file 30 | 31 | // kubebuilder:validation:MinLength:=1 32 | MBRName string `json:"mbrName"` 33 | 34 | // kubebuilder:validation:MinLength:=1 35 | PVCName string `json:"pvcName"` 36 | } 37 | 38 | // MetadataRestoreStatus defines the observed state of MetadataRestore 39 | type MetadataRestoreStatus struct { 40 | // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster 41 | // Important: Run "make" to regenerate code after modifying this file 42 | 43 | // +kubebuilder:validation:Optional 44 | ObservedGeneration int64 `json:"observedGeneration"` 45 | 46 | RestoreStatus string `json:"restoreStatus"` 47 | 48 | // +kubebuilder:validation:Optional 49 | RestoreErrorMessage string `json:"restoreErrorMessage"` 50 | 51 | RestoreTime string `json:"restoreTime"` 52 | } 53 | 54 | // The creation of this resource triggers full restore of the data 55 | // (etcd snapshot and certificates (if they were part of the backup). 56 | // It would have been ideal to use a custom subresource (such as 57 | // "/restore" but custom subresources are not yet supported for 58 | // custom resources. 59 | 60 | // +kubebuilder:object:root=true 61 | // +kubebuilder:subresource:status 62 | 63 | // MetadataRestore is the Schema for the metadatarestores API 64 | type MetadataRestore struct { 65 | metav1.TypeMeta `json:",inline"` 66 | metav1.ObjectMeta `json:"metadata,omitempty"` 67 | 68 | Spec MetadataRestoreSpec `json:"spec,omitempty"` 69 | Status MetadataRestoreStatus `json:"status,omitempty"` 70 | } 71 | 72 | // +kubebuilder:object:root=true 73 | 74 | // MetadataRestoreList contains a list of MetadataRestore 75 | type MetadataRestoreList struct { 76 | metav1.TypeMeta `json:",inline"` 77 | metav1.ListMeta `json:"metadata,omitempty"` 78 | Items []MetadataRestore `json:"items"` 79 | } 80 | 81 | func init() { 82 | SchemeBuilder.Register(&MetadataRestore{}, &MetadataRestoreList{}) 83 | } 84 | -------------------------------------------------------------------------------- /kubedr/api/v1alpha1/backuplocation_webhook.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Catalogic Software 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha1 18 | 19 | import ( 20 | "k8s.io/apimachinery/pkg/runtime" 21 | ctrl "sigs.k8s.io/controller-runtime" 22 | logf "sigs.k8s.io/controller-runtime/pkg/log" 23 | "sigs.k8s.io/controller-runtime/pkg/webhook" 24 | ) 25 | 26 | // log is for logging in this package. 27 | var backuplocationlog = logf.Log.WithName("backuplocation-resource") 28 | 29 | // SetupWebhookWithManager configures the web hook with the manager. 30 | func (r *BackupLocation) SetupWebhookWithManager(mgr ctrl.Manager) error { 31 | return ctrl.NewWebhookManagedBy(mgr). 32 | For(r). 33 | Complete() 34 | } 35 | 36 | // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! 37 | 38 | // +kubebuilder:webhook:path=/mutate-kubedr-catalogicsoftware-com-v1alpha1-backuplocation,mutating=true,failurePolicy=fail,groups=kubedr.catalogicsoftware.com,resources=backuplocations,verbs=create;update,versions=v1alpha1,name=mbackuplocation.kb.io 39 | 40 | var _ webhook.Defaulter = &BackupLocation{} 41 | 42 | // Default implements webhook.Defaulter so a webhook will be registered for the type 43 | func (r *BackupLocation) Default() { 44 | backuplocationlog.Info("default", "name", r.Name) 45 | 46 | // TODO(user): fill in your defaulting logic. 47 | } 48 | 49 | // TODO(user): change verbs to "verbs=create;update;delete" if you want to enable deletion validation. 50 | // +kubebuilder:webhook:verbs=create;update,path=/validate-kubedr-catalogicsoftware-com-v1alpha1-backuplocation,mutating=false,failurePolicy=fail,groups=kubedr.catalogicsoftware.com,resources=backuplocations,versions=v1alpha1,name=vbackuplocation.kb.io 51 | 52 | var _ webhook.Validator = &BackupLocation{} 53 | 54 | // ValidateCreate implements webhook.Validator so a webhook will be registered for the type 55 | func (r *BackupLocation) ValidateCreate() error { 56 | backuplocationlog.Info("validate create", "name", r.Name) 57 | 58 | // We need to validate that Credentials are correct. 59 | // 60 | // The quickest way for now is to actually try and initialize the repo. 61 | // The command will fail if credentials are wrong. 62 | 63 | return nil 64 | } 65 | 66 | // ValidateUpdate implements webhook.Validator so a webhook will be registered for the type 67 | func (r *BackupLocation) ValidateUpdate(old runtime.Object) error { 68 | backuplocationlog.Info("validate update", "name", r.Name) 69 | 70 | // TODO(user): fill in your validation logic upon object update. 71 | return nil 72 | } 73 | 74 | // ValidateDelete implements webhook.Validator so a webhook will be registered for the type 75 | func (r *BackupLocation) ValidateDelete() error { 76 | backuplocationlog.Info("validate delete", "name", r.Name) 77 | 78 | // TODO(user): fill in your validation logic upon object deletion. 79 | return nil 80 | } 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Go Report Card](https://goreportcard.com/badge/github.com/catalogicsoftware/kubedr)](https://goreportcard.com/report/github.com/catalogicsoftware/kubedr) 2 | [![Discuss at kubedr-discuss@googlegroups.com](https://img.shields.io/badge/discuss-kubedr--discuss%40googlegroups.com-blue)](https://groups.google.com/d/forum/kubedr-discuss) 3 | ![Docker Pulls](https://img.shields.io/docker/pulls/catalogicsoftware/kubedr) 4 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 5 | [![Open Source Love svg2](https://badges.frapsoft.com/os/v2/open-source.svg?v=103)](https://github.com/ellerbrock/open-source-badges/) 6 | 7 | # Kubernetes Cluster Backup 8 | 9 | Welcome to KubeDR! 10 | 11 | ![catalogic Logo](logos/logo-2.5-horiz-small.png) 12 | 13 | ## Overview 14 | 15 | Kubernetes stores all the cluster data (such as resource specs) in 16 | [etcd](https://etcd.io/). The *KubeDR* project implements data 17 | protection for this data. In addition, certificates can be backed up 18 | as well but that is optional. 19 | 20 | **The project is currently in Alpha state and hence is not meant for 21 | production use.** 22 | 23 | ## Rationale 24 | 25 | There are projects and products that backup application data (stored 26 | in *Persistent Volumes*) but there is no project that provides same 27 | first class backup support for the very important Kubernetes cluster 28 | data stored in *etcd*. 29 | 30 | For sure, there are recipes on how to take *etcd* snapshot but the 31 | *KubeDR* project from 32 | [Catalogic Software](https://www.catalogicsoftware.com/) 33 | aims to provide complete end to end data protection for Kubernetes 34 | data stored in *etcd*. In addition, this project will backup 35 | certificates as well so if a master needs to be rebuilt, all the data 36 | is available. 37 | 38 | ## Features 39 | 40 | Here is a list of high level features provided by *KubeDR*. For more 41 | details, please see 42 | [User Guide](https://www.catalogicsoftware.com/clab-docs/kubedr/userguide). 43 | 44 | - Backup cluster data in *etcd* to any S3 compatible storage. 45 | - Backup certificates 46 | - Pause and resume backups 47 | - Clean up older snapshots based on a retention setting. 48 | - Restore *etcd* snapshot 49 | - Restore certificates 50 | 51 | ## Roadmap 52 | 53 | The following list shows many items that are planned for 54 | *KubeDR*. Some of them are improvements while others are new 55 | features. 56 | 57 | - Improve monitoring/reporting. 58 | - Support *Helm* installs. 59 | - Implement referential integrity semantics. 60 | - Improve restore capabilities. 61 | - Support file system as a target for backups. 62 | 63 | ## Documentation 64 | 65 | We use [Sphinx](http://www.sphinx-doc.org/en/master/) for docs. Source 66 | for the documentation is in "docs" directory. For built documentation, 67 | see below: 68 | 69 | - [User Guide](https://catalogicsoftware.com/clab-docs/kubedr/userguide) 70 | - [Developer Guide](https://catalogicsoftware.com/clab-docs/kubedr/devguide) 71 | 72 | ## Feedback 73 | 74 | We would love to hear feedback from our users. Please feel free to open 75 | issues for bugs as well as for any feature requests. 76 | 77 | For any questions and discussions, please join us over at our Google Group: 78 | [kubedr-discuss](https://groups.google.com/d/forum/kubedr-discuss). 79 | 80 | Please note that the project is in Alpha so there may be many 81 | corner cases where things may not work as expected. We are actively 82 | working on fixing any bugs and on adding new features. -------------------------------------------------------------------------------- /docs/devguide/source/automated_tests.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | Automated Tests 3 | ================= 4 | 5 | It is very important that developers write automated tests at all 6 | levels, including: 7 | 8 | - controller level. 9 | 10 | - Integration tests that verify functionality from the user's point of 11 | view. 12 | 13 | At present, the project has integration tests that are implemented 14 | using the `pytest`_ framework. 15 | 16 | Integration Tests 17 | ================= 18 | 19 | Setup 20 | ----- 21 | 22 | - Create a virtual environment and install pytest and other required 23 | modules. 24 | 25 | .. code-block:: bash 26 | 27 | $ python3 -m venv ~/venv/kubedr 28 | $ export PATH=~/venv/kubedr/bin:$PATH 29 | $ pip install pytest pytest-dependency kubernetes 30 | 31 | - Set up a cluster that is accessible by the ``kubectl`` command. 32 | 33 | - Install *KubeDR* and all its dependencies. More instructions will be 34 | provided later. For now, check "User Guide". 35 | 36 | - The tests use `Python Kubernetes Client`_ to interact with the 37 | cluster. Currently, the tests load the ``kubeconfig`` set up 38 | locally. So they work fine if ``kubectl`` works. 39 | 40 | Running tests 41 | ------------- 42 | 43 | Follow instructions above and make sure ``pytest`` is in ``PATH``. 44 | 45 | Since the tests work with an existing cluster, they need to be provided 46 | some config data (such as *S3* and *etcd* details). Such data is 47 | passed in a file called ``tests/config/testenv.json``. For an example, 48 | take a look at ``tests/config/sample_testenv.json``. 49 | 50 | Here is a sample: 51 | 52 | .. code-block:: json 53 | 54 | { 55 | "backuploc": { 56 | "endpoint": "http://10.106.44.180:9000", 57 | "access_key": "minio", 58 | "secret_key": "minio123" 59 | }, 60 | "etcd_data": { 61 | "ca.crt": "/tmp/ca.crt", 62 | "client.crt": "/tmp/client.crt", 63 | "client.key": "/tmp/client.key" 64 | } 65 | } 66 | 67 | If the config data is not provided, the tests will be skipped. If only 68 | "backuploc" is provided, the tests will add the location and verify 69 | that the bucket is properly initialized but skip the backup test. 70 | 71 | Once the config data is ready, run the tests as follows: 72 | 73 | .. code-block:: bash 74 | 75 | $ cd tests 76 | $ ./runtests 77 | 78 | Please note that the tests will take noticeable time. This is because 79 | the backup test needs to wait for at least one minute before it can 80 | verify that backup pod is created and backup is done. 81 | 82 | Configuring tests 83 | ----------------- 84 | 85 | The behavior of tests can be controlled by various environment 86 | variables. Currently, this facility is used to configure how tests 87 | check k8s resources created by them. 88 | 89 | Waiting for resources to appear 90 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 91 | 92 | The tests need to wait for some resources such as *pods* and 93 | *cronjobs* to appear. This is done by periodically polling to see if 94 | the resource shows up. There are two env variables that control how 95 | many times the polling is done and the interval between each such 96 | attempt. 97 | 98 | Here are the relevant env variables. 99 | 100 | WAIT_FOR_RES_TO_APPEAR_NUM_ATTEMPTS 101 | Number of times the resource is checked. Default: 15. 102 | 103 | WAIT_FOR_RES_TO_APPEAR_INTERVAL_SECS 104 | Interval between each poll attempt. Default: 1 second. 105 | 106 | Waiting for Pod to be done 107 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 108 | 109 | In many cases, the tests need to wait for a Pod to be done (say, 110 | backup). The following two env variables control this waiting. 111 | 112 | WAIT_FOR_POD_TO_BE_DONE_NUM_ATTEMPTS 113 | Number of times the Pod status is checked. Default: 100. 114 | 115 | WAIT_FOR_POD_TO_BE_DONE_INTERVAL_SECS 116 | Interval between each poll attempt. Default: 3 seconds. 117 | 118 | 119 | .. _pytest: https://docs.pytest.org/en/latest/ 120 | .. _Python Kubernetes Client: https://github.com/kubernetes-client/python 121 | 122 | -------------------------------------------------------------------------------- /kubedr/config/crd/bases/kubedr.catalogicsoftware.com_metadatabackuppolicies.yaml: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | annotations: 7 | controller-gen.kubebuilder.io/version: v0.2.2 8 | creationTimestamp: null 9 | name: metadatabackuppolicies.kubedr.catalogicsoftware.com 10 | spec: 11 | group: kubedr.catalogicsoftware.com 12 | names: 13 | kind: MetadataBackupPolicy 14 | listKind: MetadataBackupPolicyList 15 | plural: metadatabackuppolicies 16 | singular: metadatabackuppolicy 17 | scope: "" 18 | subresources: 19 | status: {} 20 | validation: 21 | openAPIV3Schema: 22 | description: MetadataBackupPolicy is the Schema for the metadatabackuppolicies 23 | API 24 | properties: 25 | apiVersion: 26 | description: 'APIVersion defines the versioned schema of this representation 27 | of an object. Servers should convert recognized schemas to the latest 28 | internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' 29 | type: string 30 | kind: 31 | description: 'Kind is a string value representing the REST resource this 32 | object represents. Servers may infer this from the endpoint the client 33 | submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' 34 | type: string 35 | metadata: 36 | type: object 37 | spec: 38 | description: MetadataBackupPolicySpec defines the desired state of MetadataBackupPolicy 39 | properties: 40 | certsDir: 41 | description: Optional. If not provided, certificates will not be backed 42 | up. 43 | type: string 44 | destination: 45 | description: Name of the S3 BackupLocation resource kubebuilder:validation:MinLength:=1 46 | type: string 47 | etcdCreds: 48 | description: Name of the "secret" containing etcd certificates. If not 49 | provided, "etcd-creds" is used as the name of the secret comprising 50 | of credentials. 51 | type: string 52 | etcdEndpoint: 53 | description: If not provided, "https://127.0.0.1:2379" will be used. 54 | type: string 55 | options: 56 | additionalProperties: 57 | type: string 58 | description: Refers to name of a configmap containing list of key=value 59 | pairs. Options string `json:"options"` 60 | type: object 61 | retainNumBackups: 62 | description: Should we even have default? 63 | format: int64 64 | type: integer 65 | schedule: 66 | description: The value of this field should be same as "schedule" in 67 | "cronjob". 68 | type: string 69 | suspend: 70 | type: boolean 71 | required: 72 | - destination 73 | - schedule 74 | type: object 75 | status: 76 | description: MetadataBackupPolicyStatus defines the observed state of MetadataBackupPolicy 77 | properties: 78 | backupErrorMessage: 79 | type: string 80 | backupPod: 81 | description: Name of the pod that performed the backup. 82 | type: string 83 | backupStatus: 84 | type: string 85 | backupTime: 86 | type: string 87 | dataAdded: 88 | format: int64 89 | type: integer 90 | filesChanged: 91 | type: integer 92 | filesNew: 93 | type: integer 94 | mbrName: 95 | type: string 96 | snapshotId: 97 | type: string 98 | totalBytesProcessed: 99 | format: int64 100 | type: integer 101 | totalDurationSecs: 102 | type: string 103 | required: 104 | - backupStatus 105 | - backupTime 106 | type: object 107 | type: object 108 | version: v1alpha1 109 | versions: 110 | - name: v1alpha1 111 | served: true 112 | storage: true 113 | status: 114 | acceptedNames: 115 | kind: "" 116 | plural: "" 117 | conditions: [] 118 | storedVersions: [] 119 | -------------------------------------------------------------------------------- /kubedr/api/v1alpha1/metadatabackuppolicy_types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Catalogic Software 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha1 18 | 19 | import ( 20 | "k8s.io/apimachinery/pkg/api/resource" 21 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 22 | ) 23 | 24 | // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! 25 | // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. 26 | 27 | // MetadataBackupPolicySpec defines the desired state of MetadataBackupPolicy 28 | type MetadataBackupPolicySpec struct { 29 | // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster 30 | // Important: Run "make" to regenerate code after modifying this file 31 | 32 | // Name of the S3 BackupLocation resource 33 | // kubebuilder:validation:MinLength:=1 34 | Destination string `json:"destination"` 35 | 36 | // Optional. If not provided, certificates will not be backed up. 37 | // +kubebuilder:validation:Optional 38 | CertsDir string `json:"certsDir,omitempty"` 39 | 40 | // +kubebuilder:validation:Optional 41 | // If not provided, "https://127.0.0.1:2379" will be used. 42 | EtcdEndpoint string `json:"etcdEndpoint,omitempty"` 43 | 44 | // Name of the "secret" containing etcd certificates. 45 | // +kubebuilder:validation:Optional 46 | // If not provided, "etcd-creds" is used as the name of the secret comprising of 47 | // credentials. 48 | EtcdCreds string `json:"etcdCreds,omitempty"` 49 | 50 | // The value of this field should be same as "schedule" in "cronjob". 51 | Schedule string `json:"schedule"` 52 | 53 | // Refers to name of a configmap containing list of key=value pairs. 54 | // Options string `json:"options"` 55 | // +kubebuilder:validation:Optional 56 | Options map[string]string `json:"options,omitempty"` 57 | 58 | // Props map[string]string `json:"props"` 59 | 60 | // Should we even have default? 61 | // +kubebuilder:validation:Optional 62 | RetainNumBackups *int64 `json:"retainNumBackups,omitempty"` 63 | 64 | // +kubebuilder:validation:Optional 65 | Suspend *bool `json:"suspend,omitempty"` 66 | } 67 | 68 | // MetadataBackupPolicyStatus defines the observed state of MetadataBackupPolicy 69 | type MetadataBackupPolicyStatus struct { 70 | BackupTime string `json:"backupTime"` 71 | BackupStatus string `json:"backupStatus"` 72 | 73 | // +kubebuilder:validation:Optional 74 | BackupErrorMessage string `json:"backupErrorMessage"` 75 | 76 | // +kubebuilder:validation:Optional 77 | FilesNew uint `json:"filesNew"` 78 | 79 | // +kubebuilder:validation:Optional 80 | FilesChanged uint `json:"filesChanged"` 81 | 82 | // +kubebuilder:validation:Optional 83 | DataAdded uint64 `json:"dataAdded"` 84 | 85 | // +kubebuilder:validation:Optional 86 | TotalBytesProcessed uint64 `json:"totalBytesProcessed"` 87 | 88 | // +kubebuilder:validation:Optional 89 | TotalDurationSecs resource.Quantity `json:"totalDurationSecs"` 90 | 91 | // +kubebuilder:validation:Optional 92 | SnapshotID string `json:"snapshotId"` 93 | 94 | // Name of the pod that performed the backup. 95 | // +kubebuilder:validation:Optional 96 | BackupPod string `json:"backupPod"` 97 | 98 | // +kubebuilder:validation:Optional 99 | MBRName string `json:"mbrName"` 100 | } 101 | 102 | // +kubebuilder:object:root=true 103 | // +kubebuilder:subresource:status 104 | 105 | // MetadataBackupPolicy is the Schema for the metadatabackuppolicies API 106 | type MetadataBackupPolicy struct { 107 | metav1.TypeMeta `json:",inline"` 108 | metav1.ObjectMeta `json:"metadata,omitempty"` 109 | 110 | Spec MetadataBackupPolicySpec `json:"spec,omitempty"` 111 | Status MetadataBackupPolicyStatus `json:"status,omitempty"` 112 | } 113 | 114 | // +kubebuilder:object:root=true 115 | 116 | // MetadataBackupPolicyList contains a list of MetadataBackupPolicy 117 | type MetadataBackupPolicyList struct { 118 | metav1.TypeMeta `json:",inline"` 119 | metav1.ListMeta `json:"metadata,omitempty"` 120 | Items []MetadataBackupPolicy `json:"items"` 121 | } 122 | 123 | func init() { 124 | SchemeBuilder.Register(&MetadataBackupPolicy{}, &MetadataBackupPolicyList{}) 125 | } 126 | -------------------------------------------------------------------------------- /kubedr/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Catalogic Software 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "flag" 21 | "os" 22 | 23 | kubedrv1alpha1 "kubedr/api/v1alpha1" 24 | 25 | "kubedr/controllers" 26 | "kubedr/metrics" 27 | 28 | "k8s.io/apimachinery/pkg/runtime" 29 | clientgoscheme "k8s.io/client-go/kubernetes/scheme" 30 | _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" 31 | ctrl "sigs.k8s.io/controller-runtime" 32 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 33 | // +kubebuilder:scaffold:imports 34 | ) 35 | 36 | var ( 37 | scheme = runtime.NewScheme() 38 | setupLog = ctrl.Log.WithName("setup") 39 | metricsInfo *metrics.MetricsInfo 40 | ) 41 | 42 | func init() { 43 | _ = clientgoscheme.AddToScheme(scheme) 44 | 45 | _ = kubedrv1alpha1.AddToScheme(scheme) 46 | // +kubebuilder:scaffold:scheme 47 | 48 | metricsInfo = metrics.NewMetricsInfo() 49 | metricsInfo.RegisterAllMetrics() 50 | } 51 | 52 | func main() { 53 | var metricsAddr string 54 | var enableLeaderElection bool 55 | flag.StringVar(&metricsAddr, "metrics-addr", ":8080", "The address the metric endpoint binds to.") 56 | flag.BoolVar(&enableLeaderElection, "enable-leader-election", false, 57 | "Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.") 58 | flag.Parse() 59 | 60 | ctrl.SetLogger(zap.New(func(o *zap.Options) { 61 | o.Development = true 62 | })) 63 | 64 | mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ 65 | Scheme: scheme, 66 | MetricsBindAddress: metricsAddr, 67 | LeaderElection: enableLeaderElection, 68 | Port: 9443, 69 | }) 70 | if err != nil { 71 | setupLog.Error(err, "unable to start manager") 72 | os.Exit(1) 73 | } 74 | 75 | if err = (&controllers.BackupLocationReconciler{ 76 | Client: mgr.GetClient(), 77 | Log: ctrl.Log.WithName("controllers").WithName("BackupLocation"), 78 | Scheme: mgr.GetScheme(), 79 | }).SetupWithManager(mgr); err != nil { 80 | setupLog.Error(err, "unable to create controller", "controller", "BackupLocation") 81 | os.Exit(1) 82 | } 83 | 84 | if os.Getenv("ENABLE_WEBHOOKS") != "false" { 85 | if err = (&kubedrv1alpha1.BackupLocation{}).SetupWebhookWithManager(mgr); err != nil { 86 | setupLog.Error(err, "unable to create webhook", "webhook", "BackupLocation") 87 | os.Exit(1) 88 | } 89 | } 90 | 91 | if err = (&controllers.MetadataBackupPolicyReconciler{ 92 | Client: mgr.GetClient(), 93 | Log: ctrl.Log.WithName("controllers").WithName("MetadataBackupPolicy"), 94 | Scheme: mgr.GetScheme(), 95 | MetricsInfo: metricsInfo, 96 | }).SetupWithManager(mgr); err != nil { 97 | setupLog.Error(err, "unable to create controller", "controller", "MetadataBackupPolicy") 98 | os.Exit(1) 99 | } 100 | 101 | if os.Getenv("ENABLE_WEBHOOKS") != "false" { 102 | if err = (&kubedrv1alpha1.MetadataBackupPolicy{}).SetupWebhookWithManager(mgr); err != nil { 103 | setupLog.Error(err, "unable to create webhook", "webhook", "MetadataBackupPolicy") 104 | os.Exit(1) 105 | } 106 | } 107 | if err = (&controllers.MetadataBackupRecordReconciler{ 108 | Client: mgr.GetClient(), 109 | Log: ctrl.Log.WithName("controllers").WithName("MetadataBackupRecord"), 110 | Scheme: mgr.GetScheme(), 111 | }).SetupWithManager(mgr); err != nil { 112 | setupLog.Error(err, "unable to create controller", "controller", "MetadataBackupRecord") 113 | os.Exit(1) 114 | } 115 | /* 116 | if err = (&kubedrv1alpha1.MetadataBackupRecord{}).SetupWebhookWithManager(mgr); err != nil { 117 | setupLog.Error(err, "unable to create webhook", "webhook", "MetadataBackupRecord") 118 | os.Exit(1) 119 | } 120 | */ 121 | if err = (&controllers.MetadataRestoreReconciler{ 122 | Client: mgr.GetClient(), 123 | Log: ctrl.Log.WithName("controllers").WithName("MetadataRestore"), 124 | Scheme: mgr.GetScheme(), 125 | }).SetupWithManager(mgr); err != nil { 126 | setupLog.Error(err, "unable to create controller", "controller", "MetadataRestore") 127 | os.Exit(1) 128 | } 129 | // +kubebuilder:scaffold:builder 130 | 131 | setupLog.Info("starting manager") 132 | if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { 133 | setupLog.Error(err, "problem running manager") 134 | os.Exit(1) 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /kubedr/api/v1alpha1/metadatabackuppolicy_webhook.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Catalogic Software 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha1 18 | 19 | import ( 20 | "github.com/robfig/cron" 21 | 22 | apierrors "k8s.io/apimachinery/pkg/api/errors" 23 | "k8s.io/apimachinery/pkg/runtime" 24 | "k8s.io/apimachinery/pkg/runtime/schema" 25 | "k8s.io/apimachinery/pkg/util/validation/field" 26 | ctrl "sigs.k8s.io/controller-runtime" 27 | logf "sigs.k8s.io/controller-runtime/pkg/log" 28 | "sigs.k8s.io/controller-runtime/pkg/webhook" 29 | ) 30 | 31 | // log is for logging in this package. 32 | var log = logf.Log.WithName("metadatabackuppolicy-resource") 33 | 34 | // SetupWebhookWithManager configures the web hook with the manager. 35 | func (r *MetadataBackupPolicy) SetupWebhookWithManager(mgr ctrl.Manager) error { 36 | return ctrl.NewWebhookManagedBy(mgr). 37 | For(r). 38 | Complete() 39 | } 40 | 41 | // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! 42 | 43 | // +kubebuilder:webhook:verbs=create;update,path=/mutate-kubedr-catalogicsoftware-com-v1alpha1-metadatabackuppolicy,mutating=true,failurePolicy=fail,groups=kubedr.catalogicsoftware.com,resources=metadatabackuppolicies,versions=v1alpha1,name=mutatemetadatabackuppolicy.kb.io 44 | 45 | var _ webhook.Defaulter = &MetadataBackupPolicy{} 46 | 47 | // Default implements webhook.Defaulter so a webhook will be registered for the type 48 | func (r *MetadataBackupPolicy) Default() { 49 | log.Info("default", "name", r.Name) 50 | 51 | if r.Spec.EtcdEndpoint == "" { 52 | log.Info("Initializing EtcdEndpoint") 53 | r.Spec.EtcdEndpoint = "https://127.0.0.1:2379" 54 | } 55 | 56 | if r.Spec.EtcdCreds == "" { 57 | log.Info("Initializing EtcdCreds") 58 | r.Spec.EtcdCreds = "etcd-creds" 59 | } 60 | 61 | if r.Spec.RetainNumBackups == nil || *r.Spec.RetainNumBackups == 0 { 62 | log.Info("Initializing RetainNumBackups") 63 | r.Spec.RetainNumBackups = new(int64) 64 | *r.Spec.RetainNumBackups = 120 65 | } 66 | 67 | if r.Spec.Suspend == nil { 68 | log.Info("Initializing 'Suspend'") 69 | // Initialized to false. 70 | r.Spec.Suspend = new(bool) 71 | } 72 | } 73 | 74 | // TODO(user): change verbs to "verbs=create;update;delete" if you want to enable deletion validation. 75 | // +kubebuilder:webhook:verbs=create;update,path=/validate-kubedr-catalogicsoftware-com-v1alpha1-metadatabackuppolicy,mutating=false,failurePolicy=fail,groups=kubedr.catalogicsoftware.com,resources=metadatabackuppolicies,versions=v1alpha1,name=vmetadatabackuppolicy.kb.io 76 | 77 | var _ webhook.Validator = &MetadataBackupPolicy{} 78 | 79 | func validateScheduleFormat(schedule string, fldPath *field.Path) *field.Error { 80 | if _, err := cron.ParseStandard(schedule); err != nil { 81 | return field.Invalid(fldPath, schedule, err.Error()) 82 | } 83 | return nil 84 | } 85 | 86 | func (r *MetadataBackupPolicy) validateCronJobSpec() *field.Error { 87 | return validateScheduleFormat( 88 | r.Spec.Schedule, 89 | field.NewPath("spec").Child("schedule")) 90 | } 91 | 92 | func (r *MetadataBackupPolicy) validatePolicy() error { 93 | var allErrs field.ErrorList 94 | 95 | if err := r.validateCronJobSpec(); err != nil { 96 | allErrs = append(allErrs, err) 97 | } 98 | 99 | // Validate destination 100 | // Verify that the resource exists. 101 | 102 | // Validate etcd endpoint and creds 103 | // Connect and issue a dummy command 104 | 105 | // TODO: How to validate certs dir? 106 | 107 | if len(allErrs) == 0 { 108 | return nil 109 | } 110 | 111 | return apierrors.NewInvalid( 112 | schema.GroupKind{Group: "kubedr.catalogicsoftware.com/v1alpha1", Kind: "MetadataBackupPolicy"}, 113 | r.Name, allErrs) 114 | } 115 | 116 | // ValidateCreate implements webhook.Validator so a webhook will be registered for the type 117 | func (r *MetadataBackupPolicy) ValidateCreate() error { 118 | log.Info("validate create", "name", r.Name) 119 | 120 | return r.validatePolicy() 121 | } 122 | 123 | // ValidateUpdate implements webhook.Validator so a webhook will be registered for the type 124 | func (r *MetadataBackupPolicy) ValidateUpdate(old runtime.Object) error { 125 | log.Info("validate update", "name", r.Name) 126 | return r.validatePolicy() 127 | } 128 | 129 | // ValidateDelete implements webhook.Validator so a webhook will be registered for the type 130 | func (r *MetadataBackupPolicy) ValidateDelete() error { 131 | log.Info("validate delete", "name", r.Name) 132 | 133 | // TODO(user): fill in your validation logic upon object deletion. 134 | return nil 135 | } 136 | -------------------------------------------------------------------------------- /kubedr/metrics/metrics.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Catalogic Software 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package metrics 18 | 19 | import ( 20 | "time" 21 | 22 | "github.com/prometheus/client_golang/prometheus" 23 | crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" 24 | ) 25 | 26 | // Metrics contains Prometheus metrics for KubeDR. 27 | type MetricsInfo struct { 28 | metrics map[string]prometheus.Collector 29 | } 30 | 31 | const ( 32 | backupSizeBytesKey = "kubedr_backup_size_bytes" 33 | numBackupsKey = "kubedr_num_backups" 34 | numSuccessfulBackupsKey = "kubedr_num_successful_backups" 35 | numFailedBackupsKey = "kubedr_num_failed_backups" 36 | backupDurationSecondsKey = "kubedr_backup_duration_seconds" 37 | 38 | policyLabel = "policyName" 39 | ) 40 | 41 | // NewMetricsInfo creates a new metrics structure to be used by controllers. 42 | func NewMetricsInfo() *MetricsInfo { 43 | return &MetricsInfo{ 44 | metrics: map[string]prometheus.Collector{ 45 | backupSizeBytesKey: prometheus.NewGaugeVec( 46 | prometheus.GaugeOpts{ 47 | Name: backupSizeBytesKey, 48 | Help: "Size of a backup in bytes", 49 | }, 50 | []string{policyLabel}, 51 | ), 52 | 53 | numBackupsKey: prometheus.NewCounterVec( 54 | prometheus.CounterOpts{ 55 | Name: numBackupsKey, 56 | Help: "Total number of backups", 57 | }, 58 | []string{policyLabel}, 59 | ), 60 | 61 | numSuccessfulBackupsKey: prometheus.NewCounterVec( 62 | prometheus.CounterOpts{ 63 | Name: numSuccessfulBackupsKey, 64 | Help: "Total number of successful backups", 65 | }, 66 | []string{policyLabel}, 67 | ), 68 | 69 | numFailedBackupsKey: prometheus.NewCounterVec( 70 | prometheus.CounterOpts{ 71 | Name: numFailedBackupsKey, 72 | Help: "Total number of failed backups", 73 | }, 74 | []string{policyLabel}, 75 | ), 76 | 77 | backupDurationSecondsKey: prometheus.NewHistogramVec( 78 | prometheus.HistogramOpts{ 79 | Name: backupDurationSecondsKey, 80 | Help: "Time taken to complete backup, in seconds", 81 | Buckets: []float64{ 82 | 15.0, 83 | 30.0, 84 | toSeconds(1 * time.Minute), 85 | toSeconds(5 * time.Minute), 86 | toSeconds(10 * time.Minute), 87 | toSeconds(15 * time.Minute), 88 | toSeconds(30 * time.Minute), 89 | toSeconds(1 * time.Hour), 90 | toSeconds(2 * time.Hour), 91 | toSeconds(3 * time.Hour), 92 | toSeconds(4 * time.Hour), 93 | toSeconds(5 * time.Hour), 94 | toSeconds(6 * time.Hour), 95 | toSeconds(7 * time.Hour), 96 | toSeconds(8 * time.Hour), 97 | toSeconds(9 * time.Hour), 98 | toSeconds(10 * time.Hour), 99 | }, 100 | }, 101 | []string{policyLabel}, 102 | ), 103 | }, 104 | } 105 | } 106 | 107 | // RegisterAllMetrics registers all prometheus metrics. 108 | func (m *MetricsInfo) RegisterAllMetrics() { 109 | for _, pm := range m.metrics { 110 | crmetrics.Registry.MustRegister(pm) 111 | } 112 | } 113 | 114 | // SetBackupSizeBytes records the size of a backup. 115 | func (m *MetricsInfo) SetBackupSizeBytes(policy string, size uint64) { 116 | if pm, ok := m.metrics[backupSizeBytesKey].(*prometheus.GaugeVec); ok { 117 | pm.WithLabelValues(policy).Set(float64(size)) 118 | } 119 | } 120 | 121 | // RecordBackup updates the total number of backups. 122 | func (m *MetricsInfo) RecordBackup(policy string) { 123 | if pm, ok := m.metrics[numBackupsKey].(*prometheus.CounterVec); ok { 124 | pm.WithLabelValues(policy).Inc() 125 | } 126 | } 127 | 128 | // RecordSuccessfulBackup updates the total number of successful backups. 129 | func (m *MetricsInfo) RecordSuccessfulBackup(policy string) { 130 | if pm, ok := m.metrics[numSuccessfulBackupsKey].(*prometheus.CounterVec); ok { 131 | pm.WithLabelValues(policy).Inc() 132 | } 133 | } 134 | 135 | // RecordFailedBackup updates the total number of failed backups. 136 | func (m *MetricsInfo) RecordFailedBackup(policy string) { 137 | if pm, ok := m.metrics[numFailedBackupsKey].(*prometheus.CounterVec); ok { 138 | pm.WithLabelValues(policy).Inc() 139 | } 140 | } 141 | 142 | // RecordBackupDuration records the number of seconds taken by a backup. 143 | func (m *MetricsInfo) RecordBackupDuration(policy string, seconds float64) { 144 | if c, ok := m.metrics[backupDurationSecondsKey].(*prometheus.HistogramVec); ok { 145 | c.WithLabelValues(policy).Observe(seconds) 146 | } 147 | } 148 | 149 | func toSeconds(d time.Duration) float64 { 150 | return float64(d / time.Second) 151 | } 152 | -------------------------------------------------------------------------------- /docs/userguide/source/backup.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | Backup 3 | ======== 4 | 5 | - Create a ``BackupLocation`` resource if not already done. 6 | 7 | - Create a "secret" containing three pieces of information that are 8 | required to connect to *etcd*: 9 | 10 | Since *kube-apiserver* also connects to *etcd*, one can find this 11 | information by looking at the command line arguments passed to 12 | *kube-apiserver* process. The options to look at are "etcd-cafile", 13 | "etcd-certfile", and "etcd-keyfile". 14 | 15 | Once these three files are found, copy the files with the following 16 | names: 17 | 18 | - etcd-cafile => ca.crt 19 | - etcd-certfile => client.crt 20 | - etcd-keyfile => client.key 21 | 22 | and create the secret as follows: 23 | 24 | .. code-block:: bash 25 | 26 | $ kubectl -n kubedr-system create secret generic etcd-creds \ 27 | --from-file=ca.crt --from-file=client.crt --from-file=client.key 28 | 29 | Note that once the secret is created, copies of the files can be 30 | deleted. They are not used any more. 31 | 32 | At this point, you are ready to create a ``MetadataBackupPolicy`` 33 | resource that defines a backup policy. Here is a sample policy 34 | resource and the description of each field: 35 | 36 | .. code-block:: yaml 37 | 38 | apiVersion: kubedr.catalogicsoftware.com/v1alpha1 39 | kind: MetadataBackupPolicy 40 | metadata: 41 | name: test-backup 42 | spec: 43 | destination: remote-minio 44 | 45 | certsDir: /etc/kubernetes/pki 46 | 47 | etcdEndpoint: https://127.0.0.1:2379 48 | etcdCreds: etcd-creds # secret 49 | 50 | schedule: "*/10 * * * *" 51 | 52 | retainNumBackups: 1 53 | 54 | name 55 | Name of the policy. 56 | 57 | destination 58 | Name of *BackupLocation* resource where backups should be stored. 59 | 60 | certsDir 61 | Directory containing Kubernetes certificates. Optional. If given, 62 | contents of entire directory will be backed up. 63 | 64 | etcdEndpoint 65 | Describes the endpoint where etcd server is 66 | available. Optional. In most cases, the default value of 67 | "https://127.0.0.1:2379" would work. You can check the end point 68 | by looking at "kube-apiserver" command line option "etcd-servers". 69 | 70 | etcdCreds 71 | Optional. Name of the Kubernetes "secret" resource containing etcd 72 | credentials. If the name "etcd-creds" is used for the secret, 73 | there is no need to include this field. 74 | 75 | schedule 76 | A string in the format of Kubernetes `cronjob`_ resources's 77 | "schedule" field. 78 | 79 | For example, "\*/10 \* \* \* \*` results in backups every 10 80 | minutes. 81 | 82 | retainNumBackups 83 | Optional. An integer specifying how many successful backups should 84 | be stored on the target. Default value is 120. 85 | 86 | In addition to above fields, the ``MetadataBackupPolicy`` resource also 87 | supports a field called *options* which is a map of string keys and 88 | string values. Currently, only one option is supported. 89 | 90 | master-node-label-name 91 | Describes the label that is used to designate master nodes. 92 | 93 | Note that if the label "node-role.kubernetes.io/master" is 94 | present, there is no need to specify it in the options here. If 95 | some other name (say "ismasternode") is used, it can be set as 96 | follows: 97 | 98 | .. code-block:: yaml 99 | 100 | options: 101 | "master-node-label-name": ismasternode 102 | 103 | Assuming you defined the ``MetadataBackupPolicy`` resource in a file 104 | called ``policy.yaml``, create the resource by running the command: 105 | 106 | .. code-block:: bash 107 | 108 | $ kubectl -n kubedr-system apply -f policy.yaml 109 | 110 | At this time, *Kubedr* will create a `cronjob`_ resource. 111 | 112 | After every successful backup, *KubeDR* creates a resource of the type 113 | ``MetadataBackupRecord`` which contains the snapshot ID of the 114 | backup. This resource acts as a "catalog" for the backups. Here is one 115 | such sample resource:: 116 | 117 | apiVersion: kubedr.catalogicsoftware.com/v1alpha1 118 | kind: MetadataBackupRecord 119 | metadata: 120 | creationTimestamp: "2020-02-21T18:35:10Z" 121 | finalizers: 122 | - mbr.finalizers.kubedr.catalogicsoftware.com 123 | generation: 2 124 | name: mbr-00f2bb92 125 | namespace: kubedr-system 126 | resourceVersion: "1739627" 127 | selfLink: /apis/kubedr.catalogicsoftware.com/v1alpha1/namespaces/kubedr-system/metadatabackuprecords/mbr-00f2bb92 128 | uid: 50cf3088-7763-4d8a-bb8b-0c308b1fbdac 129 | spec: 130 | backuploc: tests3-1582310048 131 | policy: backup-1582310055 132 | snapshotId: 00f2bb92 133 | 134 | As can be seen, the spec of ``MetadataBackupRecord`` has three pieces 135 | of information. 136 | 137 | backuploc 138 | Points to the ``BackupLocation`` resource used for the backup. 139 | 140 | policy 141 | Name of the ``MetadataBackupPolicy`` resource. 142 | 143 | snapshotId 144 | Snapshot ID of the backup. This value is used in restores. 145 | 146 | In addition to creating the above resource, *KubeDR* also generates an 147 | event both in case of success as well as in case of any 148 | failures. Please check :ref:`Backup Events` for more 149 | details. 150 | 151 | 152 | .. _cronjob: https://kubernetes.io/docs/tasks/job/automated-tasks-with-cron-jobs 153 | 154 | -------------------------------------------------------------------------------- /docs/userguide/source/monitoring.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Monitoring 3 | ============ 4 | 5 | There are multiple ways in which KubeDR's operations can be 6 | monitored. They include: 7 | 8 | - Prometheus metrics 9 | 10 | - "Status" section of individual resources. 11 | 12 | - Kubernetes events 13 | 14 | The following sections will elaborate on each of these mechanisms. 15 | 16 | Prometheus Metrics 17 | ================== 18 | 19 | *KubeDR* exposes several metrics that can be scraped with 20 | `Prometheus`_ and visualized using `Grafana`_. Most of the metrics 21 | deal with the internal implementation but the following ones provide 22 | very useful information to the user. They are widely known as 23 | `RED`_ metrics. 24 | 25 | kubedr_backup_size_bytes (Gauge) 26 | Size of the backup in bytes. 27 | 28 | kubedr_num_backups (Counter) 29 | Total number of backups. 30 | 31 | kubedr_num_successful_backups (Counter) 32 | Total number of successful backups. 33 | 34 | kubedr_num_failed_backups (Counter) 35 | Total number of successful backups. 36 | 37 | kubedr_backup_duration_seconds (Histogram) 38 | Time (seconds) taken for the backup. 39 | 40 | This metric is a histogram with the following buckets:: 41 | 42 | 15s, 30s, 1m, 5m, 10m, 15m, 30m, 1h, ...., 10h 43 | 44 | All the metrics will have a label called ``policyName`` set to the 45 | name of the ``MetadataBackupPolicy`` resource. 46 | 47 | .. note:: 48 | 49 | More details on how exactly Prometheus can be configured to scrape 50 | KubeDR's metrics will be provided soon. If you are interested, 51 | please check out `issue 26`_. 52 | 53 | Status of Resources 54 | =================== 55 | 56 | All Kubernetes resources have two sections - *spec* and *status*. 57 | 58 | *spec* describes the intent of the user and the the cluster constantly 59 | drives towards matching it. On the other hand, *status* is for the 60 | cluster components to set and it typically contains useful information 61 | about the current state of the resource. 62 | 63 | *KubeDR* makes use of the *status* field to set the results of backup 64 | and other operations. The following sections describe the *status* 65 | details for each resource. 66 | 67 | BackupLocation 68 | -------------- 69 | 70 | This resource defines a backup target (which is an S3 bucket) and 71 | when it is created, *KubeDR* initializes a backup repo at the given 72 | bucket. The *status* field of the resource indicates success or 73 | failure of such operation. 74 | 75 | Here is an example of an error condition:: 76 | 77 | status: 78 | initErrorMessage: |+ 79 | Fatal: create key in repository at s3:http://10.106.189.174:9000/testbucket50 failed: repository master key and config already initialized 80 | 81 | initStatus: Failed 82 | initTime: Thu Jan 30 16:02:53 2020 83 | 84 | When initialization succeeds:: 85 | 86 | status: 87 | initErrorMessage: "" 88 | initStatus: Completed 89 | initTime: Thu Jan 30 16:05:56 2020 90 | 91 | MetadataBackupPolicy 92 | -------------------- 93 | 94 | This resource defines the backup policy and its *status* field 95 | indicates details about the most recent backup. 96 | 97 | An example:: 98 | 99 | status: 100 | backupErrorMessage: "" 101 | backupStatus: Completed 102 | backupTime: Thu Jan 30 16:04:05 2020 103 | dataAdded: 1573023 104 | filesChanged: 1 105 | filesNew: 0 106 | mbrName': mbr-4c1223d6 107 | snapshotId: b0f347ef 108 | totalBytesProcessed: 15736864 109 | totalDurationSecs: "0.318463127" 110 | 111 | Apart from the stats regarding the backup, the status also contains 112 | the name of the ``MetadataBackupRecord`` resource that is required to 113 | perform restores. 114 | 115 | MetadataRestore 116 | --------------- 117 | 118 | This resource defines a restore and its *status* field indicates 119 | success or failure of the operation. 120 | 121 | Success:: 122 | 123 | restoreErrorMessage: "" 124 | restoreStatus: Completed 125 | 126 | Error:: 127 | 128 | restoreErrorMessage: Error in creating restore pod 129 | restoreStatus: Failed 130 | 131 | Events 132 | ====== 133 | 134 | *KubeDR* generates events after some operations that can be monitored 135 | by admins. The following sections provide more details about each such 136 | event. Note that events are generated in the namespace 137 | *kubedr-system*. 138 | 139 | Backup repo initialization 140 | -------------------------- 141 | 142 | When a ``BackupLocation`` resource is created first time, a backup 143 | repo is initialized at the given S3 bucket. An event is generated at 144 | the end of such init process. 145 | 146 | Here is an example of the event generated after successful 147 | initialization.:: 148 | 149 | $ kubectl -n kubedr-system get event 150 | 151 | ... 152 | 25s Normal InitSucceeded backuplocation/local-minio Repo at s3:http://10.106.189.174:9000/testbucket62 is successfully initialized 153 | 154 | In case of error:: 155 | 156 | $ kubectl -n kubedr-system get event 157 | 158 | ... 159 | 5s Error InitFailed backuplocation/local-minio Fatal: create key in repository at s3:http://10.106.189.174:9000/testbucket62 failed: repository master key and config already initialized 160 | 161 | .. _Backup events: 162 | 163 | 164 | Backup 165 | ------ 166 | 167 | After every backup, an event is generated containing details about 168 | success or failure and in the case of latter, the event will 169 | contain relevant error message. Here are couple of sample events. 170 | 171 | Success:: 172 | 173 | Normal BackupSucceeded metadatabackuppolicy/test-backup Backup completed, snapshot ID: 34abbf1b 174 | 175 | Error:: 176 | 177 | Error BackupFailed metadatabackuppolicy/test-backup subprocess.CalledProcessError: 178 | Command '['restic', '--json', '-r', 's3:http://10.106.189.174:9000/testbucket63', 179 | '--verbose', 'backup', '/data']' returned non-zero exit status 1. 180 | (Fatal: unable to open config file: Stat: The access key ID you provided does not exist 181 | in our records. Is there a repository at the following location? 182 | s3:http://10.106.189.174:9000/testbucket63 183 | 184 | Restore 185 | ------- 186 | 187 | After every restore, an event is generated containing details about 188 | success or failure and in the case of latter, the event will 189 | contain relevant error message. Here are couple of sample events. 190 | 191 | Success:: 192 | 193 | Normal RestoreSucceeded metadatarestore/mrtest Restore from snapshot 5bbc8b1a completed 194 | 195 | Error:: 196 | 197 | Error RestoreFailed metadatarestore/mrtest subprocess.CalledProcessError: 198 | Command '['restic', '-r', 's3:http://10.106.189.175:9000/testbucket110', 199 | '--verbose', 'restore', '--target', '/restore', '5bbc8b1a']' returned non-zero exit 200 | status 1. (Fatal: unable to open config file: Stat: 201 | Get http://10.106.189.175:9000/testbucket110/?location=: 202 | dial tcp 10.106.189.175:9000: i/o timeout 203 | Is there a repository at the following location? 204 | s3:http://10.106.189.175:9000/testbucket110) 205 | 206 | .. _Prometheus: https://prometheus.io 207 | .. _Grafana: https://grafana.com 208 | .. _RED: https://www.scalyr.com/blog/red-and-monitoring-three-key-metrics-and-why-they-matter/ 209 | .. _issue 26: https://github.com/catalogicsoftware/kubedr/issues/26 210 | 211 | -------------------------------------------------------------------------------- /docs/userguide/source/restore.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Restore 3 | ========= 4 | 5 | There are two types of restores supported by *KubeDR*. 6 | 7 | DR restore 8 | Used when master nodes are lost and you are setting up a new 9 | cluster. 10 | 11 | Regular restore 12 | Used when cluster is up and running but you need access to the 13 | certificates or etcd snapshot. 14 | 15 | DR Restore 16 | ========== 17 | 18 | In this case, first browse backups on the target and then pick a 19 | snapshot to restore from. 20 | 21 | To help simplify restore operations, a Python library called 22 | `kubedrctl`_ can be used. It can be installed by running: 23 | 24 | .. code-block:: bash 25 | 26 | $ pip install kubedrctl 27 | 28 | Please note that you need to use Python 3 for ``kubedrctl``. At this 29 | point, this library is a thin wrapper over the corresponding docker 30 | commands but its functionality will be enhanced in the future. 31 | 32 | In rest of the document, we will provide sample commands using 33 | ``kubedrctl`` as well as using ``docker`` directly. 34 | 35 | Browsing Backups 36 | ---------------- 37 | 38 | To browse backups, please run the following command (replace 39 | "", , and "" with values you 40 | used while creating ``BackupLocation`` resource): 41 | 42 | .. code-block:: bash 43 | 44 | $ kubedrctl list backups --accesskey \ 45 | --secretkey --repopwd \ 46 | --endpoint --bucket 47 | 48 | Alternatively, 49 | 50 | .. code-block:: bash 51 | 52 | $ docker run --rm -it -e AWS_ACCESS_KEY_ID= \ 53 | -e AWS_SECRET_ACCESS_KEY= \ 54 | -e RESTIC_PASSWORD= \ 55 | restic/restic \ 56 | -r s3:/ snapshots 57 | 58 | These commands will print list of backup snapshots available at the 59 | given backup location. Here is a sample output:: 60 | 61 | ID Time Host Tags Paths 62 | ------------------------------------------------------------ 63 | abe28f0f 2020-01-15 01:28:10 beast /data 64 | a0f7dbf7 2020-01-15 01:29:10 beast /data 65 | 734af8c7 2020-01-15 01:30:10 beast /data 66 | 67 | You need snapshot ID printed in the first column for restore. 68 | 69 | 70 | Restoring a Backup 71 | ------------------ 72 | 73 | To restore data from a snapshot into the directory ``/tmp/restore``: 74 | 75 | .. code-block:: bash 76 | 77 | $ kubedrctl restore --accesskey \ 78 | --secretkey --repopwd \ 79 | --endpoint --bucket \ 80 | --targetdir /tmp/restore 81 | 82 | Alternatively, 83 | 84 | .. code-block:: bash 85 | 86 | $ docker run --rm -it -e AWS_ACCESS_KEY_ID= \ 87 | -e AWS_SECRET_ACCESS_KEY= \ 88 | -e RESTIC_PASSWORD= \ 89 | -v /tmp/restore:/tmp/restore \ 90 | restic/restic \ 91 | -r s3:/ \ 92 | --target /tmp/restore \ 93 | restore 94 | 95 | Once restore is done, etcd snapshot file and (optionally) certificates 96 | will be available in ``/tmp/restore/data``. One can then configure etcd 97 | server to recover data from the snapshot. For more details, see 98 | `Restoring etcd cluster`_ and docs for your cluster distro. 99 | 100 | Regular Restore 101 | =============== 102 | 103 | This type allows you to restore certificates and etcd snapshot by 104 | simply creating a custom resource. The assumption is that the Cluster 105 | is up and running but you need access to this data for one reason or 106 | another. 107 | 108 | Browsing Backups 109 | ---------------- 110 | 111 | As has already be seen, *KubeDR* creates a resource of the type 112 | ``MetadataBackupRecord`` after every successful backup. To list all 113 | the backups in the chronological order, run the following command: 114 | 115 | .. code-block:: bash 116 | 117 | $ kubectl -n kubedr-system get metadatabackuprecords \ 118 | --sort-by=.metadata.creationTimestamp \ 119 | -o custom-columns=NAME:.metadata.name,CTIME:.metadata.creationTimestamp 120 | 121 | NAME CTIME 122 | mbr-00f2bb92 2020-02-21T18:35:10Z 123 | mbr-30efb3f4 2020-02-21T18:36:11Z 124 | mbr-a27e5153 2020-02-21T18:36:11Z 125 | mbr-9353053f 2020-02-21T18:45:11Z 126 | 127 | Based on the timestamp, select the backup you want to restore from and 128 | note the name. 129 | 130 | Restoring a Backup 131 | ------------------ 132 | 133 | In the previous step, you selected the source for the restore and now 134 | you need to tell *KubeDR* where the files need to be restored. This is 135 | done by creating a `PersistentVolumeClaim`_. 136 | 137 | `PersistentVolume`_ (PV) and `PersistentVolumeClaim`_ (PVC) resources 138 | are the primary mechanism by which storage is provided to pods and 139 | containers. In this case, the user needs to create a 140 | PV of the type "FileSystem" and then create a PVC that binds to 141 | it. There are many types of PVs supported by Kubernetes. One such type 142 | is "HostPath" which allows a local directory on a node to be used. 143 | 144 | Here is a sample "HostPath" PV that points to the local directory 145 | ``/tmp/restoredir``. 146 | 147 | .. code-block:: bash 148 | 149 | $ cat pv.yaml 150 | 151 | kind: PersistentVolume 152 | metadata: 153 | name: mrtest 154 | spec: 155 | accessModes: 156 | - ReadWriteOnce 157 | capacity: 158 | storage: 8Gi 159 | hostPath: 160 | path: /tmp/restoredir 161 | persistentVolumeReclaimPolicy: Delete 162 | storageClassName: standard 163 | 164 | $ kubectl apply -f pv.yaml 165 | 166 | The following PVC will bind to the above PV. 167 | 168 | .. code-block:: bash 169 | 170 | $ cat pvc.yaml 171 | 172 | apiVersion: v1 173 | kind: PersistentVolumeClaim 174 | metadata: 175 | name: mrtest-claim 176 | spec: 177 | accessModes: 178 | - ReadWriteOnce 179 | resources: 180 | requests: 181 | storage: 8Gi 182 | volumeMode: Filesystem 183 | volumeName: mrtest 184 | 185 | # Note that PVC needs to be created in the KubeDR namespace. 186 | $ kubectl -n kubedr-system apply -f pvc.yaml 187 | 188 | At this point, PVC ``mrtest-claim`` should be bound to the PV 189 | ``mrtest`` and should be ready to be used. You can verify it like so: 190 | 191 | .. code-block:: bash 192 | 193 | $ kubectl -n kubedr-system get pvc mrtest-claim 194 | 195 | NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE 196 | mrtest-claim Bound mrtest 8Gi RWO standard 1d 197 | 198 | Now, we are ready to create the resource that would trigger the restore. 199 | 200 | .. code-block:: bash 201 | 202 | $ cat restore.yaml 203 | 204 | apiVersion: kubedr.catalogicsoftware.com/v1alpha1 205 | kind: MetadataRestore 206 | metadata: 207 | name: mrtest 208 | spec: 209 | mbrName: mbr-e5014782 210 | pvcName: mrtest-claim 211 | 212 | $ kubectl -n kubedr-system apply -f restore.yaml 213 | 214 | When restore is complete, the status of the ``MetadataRestore`` 215 | resource created above would be updated. Example of a successful 216 | restore:: 217 | 218 | apiVersion: kubedr.catalogicsoftware.com/v1alpha1 219 | kind: MetadataRestore 220 | metadata: 221 | ... 222 | name: mrtest 223 | namespace: kubedr-system 224 | spec: 225 | mbrName: mbr-c41edb29 226 | pvcName: mrtest-claim 227 | status: 228 | observedGeneration: 1 229 | restoreErrorMessage: "" 230 | restoreStatus: Completed 231 | restoreTime: "2020-02-21T21:14:05Z" 232 | 233 | Once restore is complete, the restored files (``etcd-snapshot.db`` and 234 | certificate files) can be found in the directory pointed to by the 235 | persistent volume. At this point, you can safely delete the 236 | ``MetadataRestore`` resource. 237 | 238 | At the end of restore, *KubeDR* generates an event. Please check 239 | "Monitoring" section for more details. 240 | 241 | .. _Restoring etcd cluster: https://github.com/etcd-io/etcd/blob/master/Documentation/op-guide/recovery.md#restoring-a-cluster 242 | .. _kubedrctl: https://pypi.org/project/kubedrctl/ 243 | .. _PersistentVolumeClaim: https://kubernetes.io/docs/concepts/storage/persistent-volumes/ 244 | .. _PersistentVolume: https://kubernetes.io/docs/concepts/storage/persistent-volumes/ 245 | -------------------------------------------------------------------------------- /tests/src/common/kubeclient.py: -------------------------------------------------------------------------------- 1 | 2 | import base64 3 | import logging 4 | import time 5 | import urllib3 6 | 7 | from kubernetes import client, watch 8 | from kubernetes import config as k8sconfig 9 | 10 | import conftest 11 | 12 | class KubeResourceAPI: 13 | def __init__(self, namespace="default"): 14 | self.namespace = namespace 15 | self.v1api = client.CoreV1Api() 16 | self.batch_v1beta1_api = client.BatchV1beta1Api() 17 | self.cr_api = client.CustomObjectsApi() 18 | 19 | def create_metadata(self, name): 20 | metadata = client.V1ObjectMeta() 21 | metadata.name = name 22 | metadata.namespace = self.namespace 23 | 24 | return metadata 25 | 26 | class KubedrV1AlphaResource(KubeResourceAPI): 27 | def __init__(self, namespace="default"): 28 | super().__init__(namespace) 29 | self.group = "kubedr.catalogicsoftware.com" 30 | self.version = "v1alpha1" 31 | self.apiVersion = "{}/{}".format(self.group, self.version) 32 | self.res = { 33 | "apiVersion": self.apiVersion 34 | } 35 | 36 | # These must be set by subclasses. 37 | self.kind = "" 38 | self.plural = "" 39 | 40 | def create(self, name, spec): 41 | self.res["kind"] = self.kind 42 | self.res["metadata"] = {"name": name} 43 | self.res["spec"] = spec 44 | 45 | return self.cr_api.create_namespaced_custom_object( 46 | group=self.group, version=self.version, namespace=self.namespace, plural=self.plural, 47 | body=self.res) 48 | 49 | def delete(self, name): 50 | self.cr_api.delete_namespaced_custom_object( 51 | group=self.group, version=self.version, namespace=self.namespace, plural=self.plural, 52 | name=name, body=client.V1DeleteOptions()) 53 | 54 | def get(self, name): 55 | return self.cr_api.get_namespaced_custom_object( 56 | group=self.group, version=self.version, namespace=self.namespace, plural=self.plural, 57 | name=name) 58 | 59 | class SecretAPI(KubeResourceAPI): 60 | def __init__(self, namespace="default"): 61 | super().__init__(namespace) 62 | 63 | def create(self, name, data): 64 | body = client.V1Secret() 65 | body.data = data 66 | 67 | body.metadata = self.create_metadata(name) 68 | 69 | return self.v1api.create_namespaced_secret(body.metadata.namespace, body) 70 | 71 | def delete(self, name): 72 | self.v1api.delete_namespaced_secret(name, self.namespace, 73 | body=client.V1DeleteOptions()) 74 | 75 | class PersistentVolumeAPI(KubeResourceAPI): 76 | def __init__(self): 77 | super().__init__() 78 | 79 | def create(self, name, spec): 80 | body = client.V1PersistentVolume() 81 | body.spec = spec 82 | 83 | body.metadata = self.create_metadata(name) 84 | 85 | return self.v1api.create_persistent_volume(body) 86 | 87 | def delete(self, name): 88 | self.v1api.delete_persistent_volume(name, body=client.V1DeleteOptions()) 89 | 90 | class PersistentVolumeClaimAPI(KubeResourceAPI): 91 | def __init__(self, namespace="default"): 92 | super().__init__(namespace) 93 | 94 | def create(self, name, spec): 95 | body = client.V1PersistentVolumeClaim() 96 | body.spec = spec 97 | 98 | body.metadata = self.create_metadata(name) 99 | 100 | return self.v1api.create_namespaced_persistent_volume_claim(self.namespace, body) 101 | 102 | def get(self, name): 103 | return self.v1api.read_namespaced_persistent_volume_claim(name, self.namespace) 104 | 105 | def delete(self, name): 106 | self.v1api.delete_namespaced_persistent_volume_claim(name, self.namespace, body=client.V1DeleteOptions()) 107 | 108 | class PodAPI(KubeResourceAPI): 109 | def __init__(self, namespace="default"): 110 | super().__init__(namespace) 111 | 112 | def list(self, label_selector="", timeout_seconds=30): 113 | return self.v1api.list_namespaced_pod(self.namespace, label_selector=label_selector, 114 | timeout_seconds=timeout_seconds) 115 | 116 | def get_by_watch(self, label_selector="", timeout_seconds=30): 117 | w = watch.Watch() 118 | for event in w.stream(self.v1api.list_namespaced_pod, self.namespace, 119 | label_selector=label_selector, timeout_seconds=timeout_seconds): 120 | return event['object'] 121 | 122 | def read(self, name): 123 | return self.v1api.read_namespaced_pod(name, self.namespace) 124 | 125 | def delete(self, name): 126 | self.v1api.delete_namespaced_pod(name, self.namespace, 127 | body=client.V1DeleteOptions()) 128 | 129 | class CronJobAPI(KubeResourceAPI): 130 | def __init__(self, namespace="default"): 131 | super().__init__(namespace) 132 | 133 | def list(self, label_selector="", timeout_seconds=30): 134 | return self.batch_v1beta1_api.list_namespaced_cron_job(self.namespace, label_selector=label_selector, 135 | timeout_seconds=timeout_seconds) 136 | 137 | class BackupLocationAPI(KubedrV1AlphaResource): 138 | def __init__(self, namespace="default"): 139 | super().__init__(namespace) 140 | self.kind = "BackupLocation" 141 | self.plural = "backuplocations" 142 | 143 | class MetadataBackupPolicyAPI(KubedrV1AlphaResource): 144 | def __init__(self, namespace="default"): 145 | super().__init__(namespace) 146 | self.kind = "MetadataBackupPolicy" 147 | self.plural = "metadatabackuppolicies" 148 | 149 | class MetadataRestoreAPI(KubedrV1AlphaResource): 150 | def __init__(self, namespace="default"): 151 | super().__init__(namespace) 152 | self.kind = "MetadataRestore" 153 | self.plural = "metadatarestores" 154 | 155 | def create_backuploc_creds(name, access_key, secret_key, restic_password): 156 | creds_data = { 157 | "access_key": base64.b64encode(access_key.encode("utf-8")).decode("utf-8"), 158 | "secret_key": base64.b64encode(secret_key.encode("utf-8")).decode("utf-8"), 159 | "restic_repo_password": base64.b64encode(restic_password.encode("utf-8")).decode("utf-8") 160 | } 161 | secret_api = SecretAPI(namespace="kubedr-system") 162 | return secret_api.create(name, creds_data) 163 | 164 | def create_etcd_creds(name, ca_crt, client_crt, client_key): 165 | creds_data = { 166 | "ca.crt": base64.b64encode(open(ca_crt, "rb").read()).decode("utf-8"), 167 | "client.crt": base64.b64encode(open(client_crt, "rb").read()).decode("utf-8"), 168 | "client.key": base64.b64encode(open(client_key, "rb").read()).decode("utf-8") 169 | } 170 | secret_api = SecretAPI(namespace="kubedr-system") 171 | return secret_api.create(name, creds_data) 172 | 173 | def wait_for_pod_to_appear(label_selector): 174 | num_attempts = conftest.envconfig.wait_for_res_to_appear_num_attempts 175 | interval_secs = conftest.envconfig.wait_for_res_to_appear_interval_secs 176 | 177 | pod_api = PodAPI(namespace="kubedr-system") 178 | 179 | for i in range(num_attempts): 180 | time.sleep(interval_secs) 181 | 182 | pods = pod_api.list(label_selector=label_selector) 183 | if len(pods.items) > 0: 184 | return pods 185 | 186 | raise Exception("Timed out waiting for pod with label: {}.".format(label_selector)) 187 | 188 | def wait_for_cronjob_to_appear(label_selector): 189 | num_attempts = conftest.envconfig.wait_for_res_to_appear_num_attempts 190 | interval_secs = conftest.envconfig.wait_for_res_to_appear_interval_secs 191 | 192 | cronjob_api = CronJobAPI(namespace="kubedr-system") 193 | 194 | for i in range(num_attempts): 195 | time.sleep(interval_secs) 196 | 197 | cronjobs = cronjob_api.list(label_selector=label_selector) 198 | if len(cronjobs.items) > 0: 199 | return cronjobs 200 | 201 | raise Exception("Timed out waiting for cronjob with label: {}.".format(label_selector)) 202 | 203 | def wait_for_pod_to_be_done(pod_name): 204 | num_attempts = conftest.envconfig.wait_for_pod_to_be_done_num_attempts 205 | interval_secs = conftest.envconfig.wait_for_pod_to_be_done_interval_secs 206 | 207 | pod_api = PodAPI(namespace="kubedr-system") 208 | 209 | for i in range(num_attempts): 210 | time.sleep(interval_secs) 211 | 212 | pod = pod_api.read(pod_name) 213 | if pod.status.phase in ["Succeeded", "Failed"]: 214 | return pod 215 | 216 | raise Exception("pod {} did not finish in time.".format(pod_name)) 217 | 218 | def init(): 219 | k8sconfig.debug = True 220 | logging.getLogger("urllib3").setLevel(logging.DEBUG) 221 | k8sconfig.load_kube_config() 222 | 223 | 224 | -------------------------------------------------------------------------------- /kubedr/controllers/metadatabackuprecord_controller.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Catalogic Software 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package controllers 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | "k8s.io/apimachinery/pkg/types" 23 | "sort" 24 | 25 | // batchv1 "k8s.io/api/batch/v1" 26 | "github.com/go-logr/logr" 27 | corev1 "k8s.io/api/core/v1" 28 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 | "k8s.io/apimachinery/pkg/runtime" 30 | ctrl "sigs.k8s.io/controller-runtime" 31 | "sigs.k8s.io/controller-runtime/pkg/client" 32 | 33 | kubedrv1alpha1 "kubedr/api/v1alpha1" 34 | ) 35 | 36 | // MetadataBackupRecordReconciler reconciles a MetadataBackupRecord object 37 | type MetadataBackupRecordReconciler struct { 38 | client.Client 39 | Log logr.Logger 40 | Scheme *runtime.Scheme 41 | } 42 | 43 | // +kubebuilder:rbac:groups=kubedr.catalogicsoftware.com,resources=metadatabackuprecords,verbs=get;list;watch;create;update;patch;delete 44 | // +kubebuilder:rbac:groups=kubedr.catalogicsoftware.com,resources=metadatabackuprecords/status,verbs=get;update;patch 45 | // +kubebuilder:rbac:groups=core,resources=pods,verbs=create;get;list;update;patch;delete;watch 46 | 47 | // Reconcile is the the main entry point called by the framework. 48 | func (r *MetadataBackupRecordReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { 49 | ctx := context.Background() 50 | log := r.Log.WithValues("metadatabackuprecord", req.NamespacedName) 51 | 52 | // Every time a MBR is created, we need to check and delete some older snapshot 53 | // as per retention setting. 54 | 55 | var record kubedrv1alpha1.MetadataBackupRecord 56 | if err := r.Get(ctx, req.NamespacedName, &record); err != nil { 57 | log.Error(err, "unable to fetch MetadataBackupRecord") 58 | // we'll ignore not-found errors, since they can't be fixed by an immediate 59 | // requeue (we'll need to wait for a new notification). 60 | return ctrl.Result{}, ignoreNotFound(err) 61 | } 62 | 63 | finalizer := "mbr.finalizers.kubedr.catalogicsoftware.com" 64 | 65 | if record.ObjectMeta.DeletionTimestamp.IsZero() { 66 | // The object is not being deleted, so if it does not have our finalizer, 67 | // then lets add the finalizer and update the object. This is equivalent 68 | // to registering our finalizer. 69 | if !containsString(record.ObjectMeta.Finalizers, finalizer) { 70 | record.ObjectMeta.Finalizers = append(record.ObjectMeta.Finalizers, finalizer) 71 | if err := r.Update(context.Background(), &record); err != nil { 72 | return ctrl.Result{}, err 73 | } 74 | } 75 | } else { 76 | // The object is being deleted 77 | if containsString(record.ObjectMeta.Finalizers, finalizer) { 78 | // Our finalizer is present, handle any pre-deletion logic here. 79 | 80 | // remove our finalizer from the list and update it. 81 | record.ObjectMeta.Finalizers = removeString(record.ObjectMeta.Finalizers, finalizer) 82 | 83 | if err := r.Update(context.Background(), &record); err != nil { 84 | return ctrl.Result{}, err 85 | } 86 | } 87 | 88 | // Nothing more to do for DELETE. 89 | return ctrl.Result{}, nil 90 | } 91 | 92 | var policy kubedrv1alpha1.MetadataBackupPolicy 93 | log.Info("Getting policy...") 94 | if err := r.Get(ctx, 95 | types.NamespacedName{Namespace: req.Namespace, Name: record.Spec.Policy}, 96 | &policy); err != nil { 97 | 98 | log.Error(err, "unable to fetch MetadataBackupPolicy, no retention processing") 99 | // we'll ignore not-found errors, since they can't be fixed by an immediate 100 | // requeue (we'll need to wait for a new notification). 101 | return ctrl.Result{}, ignoreNotFound(err) 102 | } 103 | 104 | // Now, make sure spec matches the status of world. 105 | log.Info("Getting MBR list...") 106 | var mbrList kubedrv1alpha1.MetadataBackupRecordList 107 | if err := r.List(ctx, &mbrList, client.InNamespace(req.Namespace), 108 | client.MatchingFields{"policy": record.Spec.Policy}); err != nil { 109 | 110 | log.Error(err, "unable to list child Jobs") 111 | return ctrl.Result{}, err 112 | } 113 | 114 | log.Info(fmt.Sprintf("Number of MBR entries: %d", len(mbrList.Items))) 115 | 116 | sort.Slice(mbrList.Items, func(i, j int) bool { 117 | return mbrList.Items[i].ObjectMeta.CreationTimestamp.Before(&mbrList.Items[j].ObjectMeta.CreationTimestamp) 118 | }) 119 | 120 | log.Info(fmt.Sprintf("retention: %d", *policy.Spec.RetainNumBackups)) 121 | 122 | if int64(len(mbrList.Items)) <= *policy.Spec.RetainNumBackups { 123 | log.Info("Number of backups is less than retention...") 124 | return ctrl.Result{}, nil 125 | } 126 | 127 | backupLoc := &kubedrv1alpha1.BackupLocation{} 128 | backupLocKey := types.NamespacedName{Namespace: req.Namespace, Name: policy.Spec.Destination} 129 | err := r.Get(context.TODO(), backupLocKey, backupLoc) 130 | if err != nil { 131 | // If the error is "not found", there is no point in retrying. 132 | return ctrl.Result{}, err 133 | } 134 | 135 | // There are some snapshots that need to be deleted. 136 | for i := 0; int64(i) < (int64(len(mbrList.Items)) - *policy.Spec.RetainNumBackups); i++ { 137 | log.Info("Need to delete: " + mbrList.Items[i].Spec.SnapshotId) 138 | 139 | // Delete the record first. 140 | if err := r.Delete(ctx, &mbrList.Items[i]); ignoreNotFound(err) != nil { 141 | log.Error(err, "unable to delete mbr", "mbr", mbrList.Items[i]) 142 | } else { 143 | log.V(0).Info("deleted mbr", "mbr", mbrList.Items[i]) 144 | } 145 | 146 | pod, err := createResticSnapDeletePod(backupLoc, log, mbrList.Items[i].Spec.SnapshotId, 147 | mbrList.Items[i].Name, mbrList.Items[i].Namespace) 148 | 149 | if err != nil { 150 | log.Error(err, "Error in creating snapshot deletion pod") 151 | return ctrl.Result{}, err 152 | } 153 | 154 | // Delete restic snapshot 155 | log.Info("Starting a new Pod", "Pod.Namespace", pod.Namespace, "Pod.Name", pod.Name) 156 | err = r.Create(ctx, pod) 157 | if err != nil { 158 | log.Error(err, "Error in starting snap delete pod") 159 | return ctrl.Result{}, err 160 | } 161 | } 162 | 163 | // Keep last 3 snap deletetion pods and clean up the rest. 164 | // Make this number configurable. We need global options. This is not related 165 | // to individual policies. 166 | r.cleanupOldSnapDeletionPods(req.Namespace, log) 167 | 168 | return ctrl.Result{}, nil 169 | } 170 | 171 | func (r *MetadataBackupRecordReconciler) cleanupOldSnapDeletionPods(namespace string, log logr.Logger) { 172 | ctx := context.Background() 173 | 174 | var podList corev1.PodList 175 | if err := r.List(ctx, &podList, client.InNamespace(namespace), 176 | client.MatchingLabels{"kubedr.catalogicsoftware.com/snap-deletion-pod": "true"}); err != nil { 177 | log.Error(err, "unable to list snap deletion pods") 178 | return 179 | } 180 | 181 | log.Info(fmt.Sprintf("Number of snap deletion pods: %d", len(podList.Items))) 182 | 183 | sort.Slice(podList.Items, func(i, j int) bool { 184 | return podList.Items[i].ObjectMeta.CreationTimestamp.Before(&podList.Items[j].ObjectMeta.CreationTimestamp) 185 | }) 186 | 187 | if int64(len(podList.Items)) <= 3 { 188 | return 189 | } 190 | 191 | // There are some pods that need to be deleted. 192 | for i := 0; int64(i) < (int64(len(podList.Items)) - 3); i++ { 193 | if err := r.Delete(ctx, &podList.Items[i]); ignoreNotFound(err) != nil { 194 | log.Error(err, "unable to delete pod", "pod", podList.Items[i].Name) 195 | } else { 196 | log.V(0).Info("deleted pod", "pod", podList.Items[i].Name) 197 | } 198 | } 199 | } 200 | 201 | // SetupWithManager hooks up this controller with the manager. 202 | func (r *MetadataBackupRecordReconciler) SetupWithManager(mgr ctrl.Manager) error { 203 | if err := mgr.GetFieldIndexer().IndexField(&kubedrv1alpha1.MetadataBackupRecord{}, 204 | "policy", func(rawObj runtime.Object) []string { 205 | // grab the job object, extract the owner... 206 | record := rawObj.(*kubedrv1alpha1.MetadataBackupRecord) 207 | 208 | return []string{record.Spec.Policy} 209 | }); err != nil { 210 | return err 211 | } 212 | 213 | return ctrl.NewControllerManagedBy(mgr). 214 | For(&kubedrv1alpha1.MetadataBackupRecord{}). 215 | Complete(r) 216 | } 217 | 218 | func createResticSnapDeletePod(backupLocation *kubedrv1alpha1.BackupLocation, log logr.Logger, 219 | snapshotId string, mbrName string, namespace string) (*corev1.Pod, error) { 220 | 221 | s3EndPoint := "s3:" + backupLocation.Spec.Url + "/" + backupLocation.Spec.BucketName 222 | 223 | accessKey := corev1.SecretKeySelector{} 224 | accessKey.Name = backupLocation.Spec.Credentials 225 | accessKey.Key = "access_key" 226 | 227 | secretKey := corev1.SecretKeySelector{} 228 | secretKey.Name = backupLocation.Spec.Credentials 229 | secretKey.Key = "secret_key" 230 | 231 | resticPassword := corev1.SecretKeySelector{} 232 | resticPassword.Name = backupLocation.Spec.Credentials 233 | resticPassword.Key = "restic_repo_password" 234 | 235 | return &corev1.Pod{ 236 | ObjectMeta: metav1.ObjectMeta{ 237 | Name: mbrName + "-snapdel-pod-" + snapshotId, 238 | Namespace: namespace, 239 | Labels: map[string]string{ 240 | "kubedr.catalogicsoftware.com/snap-deletion-pod": "true", 241 | }, 242 | }, 243 | 244 | Spec: corev1.PodSpec{ 245 | Containers: []corev1.Container{ 246 | { 247 | Name: mbrName + "-del", 248 | Image: "restic/restic", 249 | Args: []string{"-r", s3EndPoint, "forget", "--prune", snapshotId}, 250 | Env: []corev1.EnvVar{ 251 | { 252 | Name: "AWS_ACCESS_KEY", 253 | ValueFrom: &corev1.EnvVarSource{ 254 | SecretKeyRef: &accessKey, 255 | }, 256 | }, 257 | { 258 | Name: "AWS_SECRET_KEY", 259 | ValueFrom: &corev1.EnvVarSource{ 260 | SecretKeyRef: &secretKey, 261 | }, 262 | }, 263 | { 264 | Name: "RESTIC_PASSWORD", 265 | ValueFrom: &corev1.EnvVarSource{ 266 | SecretKeyRef: &resticPassword, 267 | }, 268 | }, 269 | }, 270 | }, 271 | }, 272 | RestartPolicy: "Never", 273 | }, 274 | }, nil 275 | } 276 | -------------------------------------------------------------------------------- /tests/src/test_backup.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import pprint 4 | import shutil 5 | import subprocess 6 | import time 7 | 8 | from kubernetes import client 9 | import pytest 10 | 11 | from common import kubeclient, util 12 | 13 | def timestamp(): 14 | return int(time.time()) 15 | 16 | def log_state(namespace, resdata): 17 | # Capture the state before cleaning up resources. This will help in 18 | # debugging. 19 | print("Output of 'describe all'") 20 | subprocess.call("kubectl describe persistentvolume", shell=True) 21 | subprocess.call("kubectl -n {} describe all".format(namespace), shell=True) 22 | subprocess.call("kubectl -n {} describe backuplocation".format(namespace), shell=True) 23 | subprocess.call("kubectl -n {} describe metadatabackuppolicy".format(namespace), shell=True) 24 | subprocess.call("kubectl -n {} describe metadatabackuprecord".format(namespace), shell=True) 25 | subprocess.call("kubectl -n {} describe metadatarestore".format(namespace), shell=True) 26 | subprocess.call("kubectl -n {} describe persistentvolumeclaim".format(namespace), shell=True) 27 | 28 | print("Output of 'logs'") 29 | for pod_name in resdata["pods"]: 30 | print("Output of 'logs' for {}".format(pod_name)) 31 | subprocess.call("kubectl -n {} logs --all-containers {}".format(namespace, pod_name), shell=True) 32 | 33 | if "pv_path" in resdata: 34 | print("contents of PV dir {}".format(resdata["pv_path"])) 35 | cmd = "ls -lR {}".format(resdata["pv_path"]) 36 | subprocess.call(cmd, shell=True) 37 | 38 | # "resources" is used to store state as resources are being created. 39 | # This allows us to delete all the resources in one place and also 40 | # enables deletion even in case of test failures. 41 | @pytest.fixture(scope="module") 42 | def resources(globalconfig): 43 | if not globalconfig.testenv: 44 | pytest.skip("Test environment data is not given, skipping...") 45 | backuploc = globalconfig.testenv["backuploc"] 46 | 47 | backuploc_creds = "{}-{}".format("s3creds", timestamp()) 48 | kubeclient.create_backuploc_creds(backuploc_creds, backuploc["access_key"], backuploc["secret_key"], 49 | globalconfig.restic_password) 50 | 51 | resdata = {"backuploc_creds": backuploc_creds, "pods": [], "backup_names": []} 52 | 53 | # If we create multiple resources in set up, we need to take care to do clean 54 | # up in case there are any errors. This is not an issue right now as we create 55 | # only one resource. 56 | 57 | yield resdata 58 | 59 | util.ignore_errors(lambda: log_state(globalconfig.namespace, resdata)) 60 | 61 | util.ignore_errors_pred("restore_name" in resdata, lambda: globalconfig.mr_api.delete(resdata["restore_name"])) 62 | 63 | for backup_name in resdata.get("backup_names", []): 64 | util.ignore_errors(lambda: globalconfig.mbp_api.delete(backup_name)) 65 | 66 | util.ignore_errors_pred("etcd_creds" in resdata, lambda: globalconfig.secret_api.delete(resdata["etcd_creds"])) 67 | util.ignore_errors_pred("backuploc_name" in resdata, lambda: globalconfig.backuploc_api.delete(resdata["backuploc_name"])) 68 | util.ignore_errors_pred("pvc_name" in resdata, lambda: globalconfig.pvc_api.delete(resdata["pvc_name"])) 69 | 70 | # PV should have been automatically deleted when PVC is deleted but just in case, 71 | # PVC was not created or to take care of any corner cases, try to delete pV any way. 72 | util.ignore_errors_pred("pv_name" in resdata, lambda: globalconfig.pvc_api.delete(resdata["pv_name"])) 73 | util.ignore_errors_pred("pv_path" in resdata, lambda: shutil.rmtree(resdata["pv_path"])) 74 | 75 | util.ignore_errors(lambda: globalconfig.secret_api.delete(backuploc_creds)) 76 | 77 | @pytest.mark.dependency() 78 | def test_creating_backuplocation(globalconfig, resources): 79 | init_annotation = "initialized.annotations.kubedr.catalogicsoftware.com" 80 | endpoint = globalconfig.testenv["backuploc"]["endpoint"] 81 | 82 | bucket_name = "{}-{}".format( 83 | globalconfig.testenv["backuploc"]["bucket_name_prefix"], 84 | timestamp()) 85 | 86 | backuploc_name = "{}-{}".format("tests3", timestamp()) 87 | backuploc_spec = { 88 | "url": endpoint, 89 | "bucketName": bucket_name, 90 | "credentials": resources["backuploc_creds"] 91 | } 92 | globalconfig.backuploc_api.create(backuploc_name, backuploc_spec) 93 | resources["backuploc_name"] = backuploc_name 94 | 95 | label_selector='kubedr.type=backuploc-init,kubedr.backuploc={}'.format(backuploc_name) 96 | pods = kubeclient.wait_for_pod_to_appear(label_selector) 97 | 98 | assert len(pods.items) == 1, "Found pods: ({})".format(", ".join([x.metadata.name for x in pods.items])) 99 | pod_name = pods.items[0].metadata.name 100 | 101 | pod = kubeclient.wait_for_pod_to_be_done(pod_name) 102 | resources["pods"].append(pod_name) 103 | assert pod.status.phase == "Succeeded" 104 | 105 | backup_loc = globalconfig.backuploc_api.get(backuploc_name) 106 | assert backup_loc 107 | 108 | assert backup_loc["metadata"]["annotations"][init_annotation] == "true" 109 | 110 | def do_backup(globalconfig, resources, backup_name, backup_spec): 111 | print("creating backup: {}".format(backup_name)) 112 | globalconfig.mbp_api.create(backup_name, backup_spec) 113 | resources["backup_names"].append(backup_name) 114 | 115 | # Wait for cronjob to appear 116 | label_selector='kubedr.type=backup,kubedr.backup-policy={}'.format(backup_name) 117 | cronjobs = kubeclient.wait_for_cronjob_to_appear(label_selector) 118 | 119 | assert len(cronjobs.items) == 1 120 | cronjob_name = cronjobs.items[0].metadata.name 121 | 122 | # Wait for a backup pod to appear and then check its status. 123 | # Since the backup schedule is every minute, wait for slightly 124 | # longer than a minute before timing out. 125 | backup_pod = globalconfig.pod_api.get_by_watch(label_selector, timeout_seconds=75) 126 | 127 | pod_name = backup_pod.metadata.name 128 | resources["pods"].append(pod_name) 129 | 130 | phase = backup_pod.status.phase 131 | if phase == "Running" or phase == "Pending": 132 | pod = kubeclient.wait_for_pod_to_be_done(pod_name) 133 | backup_pod = globalconfig.pod_api.read(pod_name) 134 | 135 | assert backup_pod.status.phase == "Succeeded" 136 | policy = globalconfig.mbp_api.get(backup_name) 137 | pprint.pprint(policy) 138 | 139 | return policy 140 | 141 | @pytest.mark.dependency(depends=["test_creating_backuplocation"]) 142 | def test_backup_without_certificates(globalconfig, resources): 143 | if "etcd_data" not in globalconfig.testenv: 144 | pytest.skip("etcd data is not given, skipping...") 145 | 146 | etcd_data = globalconfig.testenv["etcd_data"] 147 | etcd_creds = "{}-{}".format("etcd-creds", timestamp()) 148 | kubeclient.create_etcd_creds(etcd_creds, etcd_data["ca.crt"], etcd_data["client.crt"], 149 | etcd_data["client.key"]) 150 | 151 | resources["etcd_creds"] = etcd_creds 152 | 153 | backup_name = "{}-{}".format("backup", timestamp()) 154 | backup_spec = { 155 | "destination": resources["backuploc_name"], 156 | "etcdCreds": etcd_creds, 157 | "schedule": "*/1 * * * *" 158 | } 159 | 160 | policy = do_backup(globalconfig, resources, backup_name, backup_spec) 161 | 162 | status = policy["status"] 163 | files_total = status["filesChanged"] + status["filesNew"] 164 | assert files_total == 1 165 | 166 | @pytest.mark.dependency(depends=["test_creating_backuplocation"]) 167 | def test_backup_with_certificates(globalconfig, resources): 168 | if "etcd_data" not in globalconfig.testenv: 169 | pytest.skip("etcd data is not given, skipping...") 170 | 171 | if "certs_dir" not in globalconfig.testenv: 172 | pytest.skip("Certificates dir is not given, skipping...") 173 | 174 | etcd_data = globalconfig.testenv["etcd_data"] 175 | etcd_creds = "{}-{}".format("etcd-creds", timestamp()) 176 | kubeclient.create_etcd_creds(etcd_creds, etcd_data["ca.crt"], etcd_data["client.crt"], 177 | etcd_data["client.key"]) 178 | 179 | resources["etcd_creds"] = etcd_creds 180 | 181 | backup_name = "{}-{}".format("backup", timestamp()) 182 | backup_spec = { 183 | "destination": resources["backuploc_name"], 184 | "certsDir": globalconfig.testenv["certs_dir"], 185 | "etcdCreds": etcd_creds, 186 | "schedule": "*/1 * * * *" 187 | } 188 | 189 | policy = do_backup(globalconfig, resources, backup_name, backup_spec) 190 | 191 | status = policy["status"] 192 | resources["mbr_with_certs"] = status["mbrName"] 193 | files_total = status["filesChanged"] + status["filesNew"] 194 | assert files_total > 1 195 | 196 | @pytest.mark.dependency(depends=["test_backup_with_certificates"]) 197 | def test_restore(globalconfig, resources): 198 | pv = util.create_hostpath_pv() 199 | resources["pv_name"] = pv.metadata.name 200 | resources["pv_path"] = pv.spec.host_path.path 201 | 202 | pvc = util.create_pvc_for_pv(pv) 203 | resources["pvc_name"] = pvc.metadata.name 204 | 205 | mr_name = "{}-{}".format("mr", timestamp()) 206 | mr_spec = { 207 | "mbrName": resources["mbr_with_certs"], 208 | "pvcName": resources["pvc_name"] 209 | } 210 | 211 | globalconfig.mr_api.create(mr_name, mr_spec) 212 | resources["restore_name"] = mr_name 213 | 214 | label_selector='kubedr.type=restore,kubedr.restore-mbr={}'.format(mr_spec["mbrName"]) 215 | restore_pod = globalconfig.pod_api.get_by_watch(label_selector) 216 | 217 | pod_name = restore_pod.metadata.name 218 | resources["pods"].append(pod_name) 219 | 220 | phase = restore_pod.status.phase 221 | if phase == "Running" or phase == "Pending": 222 | pod = kubeclient.wait_for_pod_to_be_done(pod_name) 223 | restore_pod = globalconfig.pod_api.read(pod_name) 224 | 225 | assert restore_pod.status.phase == "Succeeded" 226 | 227 | if os.environ.get("RUNNING_IN_CI", None): 228 | print("Not verifying the contents of the target directory until we figure out how to check remote directories.") 229 | return 230 | 231 | print("Verifying the contents of the target directory...") 232 | assert os.path.exists("{}/data/etcd-snapshot.db".format(resources["pv_path"])) 233 | assert os.path.exists("{}/data/certificates".format(resources["pv_path"])) 234 | assert os.listdir(resources["pv_path"]) 235 | -------------------------------------------------------------------------------- /kubedr/controllers/metadatarestore_controller.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Catalogic Software 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package controllers 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | "os" 23 | 24 | "github.com/go-logr/logr" 25 | corev1 "k8s.io/api/core/v1" 26 | apierrors "k8s.io/apimachinery/pkg/api/errors" 27 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 | "k8s.io/apimachinery/pkg/runtime" 29 | "k8s.io/apimachinery/pkg/types" 30 | ctrl "sigs.k8s.io/controller-runtime" 31 | "sigs.k8s.io/controller-runtime/pkg/client" 32 | 33 | kubedrv1alpha1 "kubedr/api/v1alpha1" 34 | ) 35 | 36 | // MetadataRestoreReconciler reconciles a MetadataRestore object 37 | type MetadataRestoreReconciler struct { 38 | client.Client 39 | Log logr.Logger 40 | Scheme *runtime.Scheme 41 | } 42 | 43 | func (r *MetadataRestoreReconciler) setStatus(mr *kubedrv1alpha1.MetadataRestore, status string, errmsg string) { 44 | mr.Status.ObservedGeneration = mr.ObjectMeta.Generation 45 | 46 | mr.Status.RestoreStatus = status 47 | mr.Status.RestoreErrorMessage = errmsg 48 | mr.Status.RestoreTime = metav1.Now().String() 49 | 50 | r.Log.Info("Updating status...") 51 | if err := r.Status().Update(context.Background(), mr); err != nil { 52 | r.Log.Error(err, "unable to update MetadataRestore status") 53 | } 54 | } 55 | 56 | // +kubebuilder:rbac:groups=kubedr.catalogicsoftware.com,resources=metadatarestores,verbs=get;list;watch;create;update;patch;delete 57 | // +kubebuilder:rbac:groups=kubedr.catalogicsoftware.com,resources=metadatarestores/status,verbs=get;update;patch 58 | // +kubebuilder:rbac:groups=kubedr.catalogicsoftware.com,resources=metadatabackuprecords/status,verbs=get 59 | // +kubebuilder:rbac:groups=kubedr.catalogicsoftware.com,resources=backuplocations/status,verbs=get 60 | // +kubebuilder:rbac:groups=core,resources=pods,verbs=create;get 61 | 62 | /* 63 | * Top level Reconcile logic 64 | * 65 | * - If generation number hasn't changed, do nothing. We don't want to process updates 66 | * unless spec has changed. 67 | * 68 | * - Check if the annotation, which indicates that this restore resource is already 69 | * processed, is present. If so, there is nothing more to do. If not, proceed with 70 | * restore logic. 71 | * 72 | * - There is nothing to do for deletion so we don't add any finalizers. 73 | * 74 | * - If there is a previous restore pod for this resource, delete the pod. 75 | * 76 | * - Create the pod that will restore the data. The kubedrutil "restore" command 77 | * will call restic to restore the data and then, it will set the annotation to 78 | * indicate that this resource is processed. 79 | * 80 | * - The "restore" command will also set the status both in case of success and 81 | * failure. 82 | */ 83 | 84 | // Reconcile is the the main entry point called by the framework. 85 | func (r *MetadataRestoreReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { 86 | ctx := context.Background() 87 | 88 | var mr kubedrv1alpha1.MetadataRestore 89 | if err := r.Get(ctx, req.NamespacedName, &mr); err != nil { 90 | if apierrors.IsNotFound(err) { 91 | // we'll ignore not-found errors, since they can't be fixed by an immediate 92 | // requeue (we'll need to wait for a new notification). 93 | r.Log.Info("MetadataRestore (" + req.NamespacedName.Name + ") is not found") 94 | return ctrl.Result{}, nil 95 | } 96 | 97 | r.Log.Error(err, "unable to fetch MetadataRestore") 98 | return ctrl.Result{}, err 99 | } 100 | 101 | // Skip if spec hasn't changed. This check prevents reconcile on status 102 | // updates. 103 | if mr.Status.ObservedGeneration == mr.ObjectMeta.Generation { 104 | r.Log.Info("Skipping reconcile as generation number hasn't changed") 105 | return ctrl.Result{}, nil 106 | } 107 | 108 | // No deletion logic as we don't really have anything to do during 109 | // deletion of a MetadataRestore resource. 110 | 111 | // Check annotations to see if this resource was already processed 112 | // and restore was successful. 113 | restoreAnnotation := "restored.annotations.kubedr.catalogicsoftware.com" 114 | 115 | restored, exists := mr.ObjectMeta.Annotations[restoreAnnotation] 116 | if exists && (restored == "true") { 117 | // No need to process the resource as restore was done already. 118 | r.Log.Info("Restore was already done") 119 | return ctrl.Result{}, nil 120 | } 121 | 122 | // We are deliberately avoiding any attempt to make the name unique. 123 | // The client is in a better position to come up with a unique name. 124 | // If we do switch to generating a unique name, we need to make sure 125 | // that any previous pods are cleaned up. 126 | podName := mr.Name + "-mr" 127 | 128 | // Since we don't generate a unique name for the pod that initializes the repo, 129 | // we need to explicitly check and delete the pod if it exists. 130 | var prevPod corev1.Pod 131 | if err := r.Get(ctx, types.NamespacedName{Namespace: req.Namespace, Name: podName}, &prevPod); err == nil { 132 | r.Log.Info("Found a previous restore pod, will delete it and continue...") 133 | if err := r.Delete(ctx, &prevPod); ignoreNotFound(err) != nil { 134 | r.Log.Error(err, "Error in deleting init pod") 135 | return ctrl.Result{}, err 136 | } 137 | } 138 | 139 | pod, err := r.buildRestorePod(&mr, req.Namespace, podName) 140 | if err != nil { 141 | r.Log.Error(err, "Error in creating restore pod") 142 | if apierrors.IsNotFound(err) { 143 | // This shouldn't really happen but if an invalid MBR is given or 144 | // if backup location inside the MBR is wrong, there is nothing we can 145 | // do. 146 | r.setStatus(&mr, "Failed", 147 | fmt.Sprintf("Error in creating restore pod, reason (%s)", err.Error())) 148 | return ctrl.Result{}, nil 149 | } 150 | 151 | return ctrl.Result{}, err 152 | } 153 | 154 | if err := ctrl.SetControllerReference(&mr, pod, r.Scheme); err != nil { 155 | return ctrl.Result{}, err 156 | } 157 | 158 | r.Log.Info("Starting a new Pod", "Pod.Namespace", pod.Namespace, "Pod.Name", pod.Name) 159 | err = r.Create(ctx, pod) 160 | if err != nil { 161 | r.Log.Error(err, "Error in starting restore pod") 162 | r.setStatus(&mr, "Failed", err.Error()) 163 | return ctrl.Result{}, err 164 | } 165 | 166 | return ctrl.Result{}, nil 167 | } 168 | 169 | // SetupWithManager hooks up this controller with the manager. 170 | func (r *MetadataRestoreReconciler) SetupWithManager(mgr ctrl.Manager) error { 171 | return ctrl.NewControllerManagedBy(mgr). 172 | For(&kubedrv1alpha1.MetadataRestore{}). 173 | Complete(r) 174 | } 175 | 176 | func getRepoData(backupLocation *kubedrv1alpha1.BackupLocation) (string, *corev1.SecretKeySelector, 177 | *corev1.SecretKeySelector, *corev1.SecretKeySelector) { 178 | 179 | s3EndPoint := "s3:" + backupLocation.Spec.Url + "/" + backupLocation.Spec.BucketName 180 | 181 | accessKey := corev1.SecretKeySelector{} 182 | accessKey.Name = backupLocation.Spec.Credentials 183 | accessKey.Key = "access_key" 184 | 185 | secretKey := corev1.SecretKeySelector{} 186 | secretKey.Name = backupLocation.Spec.Credentials 187 | secretKey.Key = "secret_key" 188 | 189 | resticPassword := corev1.SecretKeySelector{} 190 | resticPassword.Name = backupLocation.Spec.Credentials 191 | resticPassword.Key = "restic_repo_password" 192 | 193 | return s3EndPoint, &accessKey, &secretKey, &resticPassword 194 | } 195 | 196 | func (r *MetadataRestoreReconciler) buildRestorePod(cr *kubedrv1alpha1.MetadataRestore, 197 | namespace string, podName string) (*corev1.Pod, error) { 198 | 199 | kubedrUtilImage := os.Getenv("KUBEDR_UTIL_IMAGE") 200 | if kubedrUtilImage == "" { 201 | // This should really not happen. 202 | err := fmt.Errorf("KUBEDR_UTIL_IMAGE is not set") 203 | r.Log.Error(err, "") 204 | return nil, err 205 | } 206 | 207 | mbr := &kubedrv1alpha1.MetadataBackupRecord{} 208 | mbrKey := types.NamespacedName{Namespace: namespace, Name: cr.Spec.MBRName} 209 | if err := r.Get(context.TODO(), mbrKey, mbr); err != nil { 210 | return nil, err 211 | } 212 | 213 | backupLocation := &kubedrv1alpha1.BackupLocation{} 214 | backupLocKey := types.NamespacedName{Namespace: namespace, Name: mbr.Spec.Backuploc} 215 | if err := r.Get(context.TODO(), backupLocKey, backupLocation); err != nil { 216 | return nil, err 217 | } 218 | s3EndPoint, accessKey, secretKey, resticPassword := getRepoData(backupLocation) 219 | 220 | labels := map[string]string{ 221 | "kubedr.type": "restore", 222 | "kubedr.restore-mbr": mbr.Name, 223 | } 224 | 225 | targetDirVolume := corev1.Volume{Name: "restore-target"} 226 | targetDirVolume.PersistentVolumeClaim = &corev1.PersistentVolumeClaimVolumeSource{ 227 | ClaimName: cr.Spec.PVCName} 228 | 229 | volumes := []corev1.Volume{ 230 | targetDirVolume, 231 | } 232 | 233 | env := []corev1.EnvVar{ 234 | { 235 | Name: "MY_POD_NAME", 236 | ValueFrom: &corev1.EnvVarSource{ 237 | FieldRef: &corev1.ObjectFieldSelector{ 238 | FieldPath: "metadata.name", 239 | }, 240 | }, 241 | }, 242 | { 243 | Name: "AWS_ACCESS_KEY", 244 | ValueFrom: &corev1.EnvVarSource{ 245 | SecretKeyRef: accessKey, 246 | }, 247 | }, 248 | { 249 | Name: "AWS_SECRET_KEY", 250 | ValueFrom: &corev1.EnvVarSource{ 251 | SecretKeyRef: secretKey, 252 | }, 253 | }, 254 | { 255 | Name: "RESTIC_PASSWORD", 256 | ValueFrom: &corev1.EnvVarSource{ 257 | SecretKeyRef: resticPassword, 258 | }, 259 | }, 260 | { 261 | Name: "KDR_MR_NAME", 262 | Value: cr.Name, 263 | }, 264 | { 265 | Name: "RESTIC_REPO", 266 | Value: s3EndPoint, 267 | }, 268 | { 269 | Name: "KDR_RESTORE_DEST", 270 | Value: "/restore", 271 | }, 272 | } 273 | 274 | volumeMounts := []corev1.VolumeMount{ 275 | { 276 | Name: "restore-target", 277 | MountPath: "/restore", 278 | }, 279 | } 280 | 281 | return &corev1.Pod{ 282 | ObjectMeta: metav1.ObjectMeta{ 283 | Name: podName, 284 | Namespace: cr.Namespace, 285 | Labels: labels, 286 | }, 287 | 288 | Spec: corev1.PodSpec{ 289 | RestartPolicy: "Never", 290 | 291 | Volumes: volumes, 292 | 293 | Containers: []corev1.Container{ 294 | { 295 | Name: cr.Name, 296 | Image: kubedrUtilImage, 297 | VolumeMounts: volumeMounts, 298 | Env: env, 299 | 300 | Args: []string{ 301 | "/usr/local/bin/kubedrutil", "restore", 302 | }, 303 | }, 304 | }, 305 | }, 306 | }, nil 307 | } 308 | -------------------------------------------------------------------------------- /kubedr/controllers/backuplocation_controller.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Catalogic Software 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package controllers 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | "os" 23 | 24 | "github.com/go-logr/logr" 25 | corev1 "k8s.io/api/core/v1" 26 | apierrors "k8s.io/apimachinery/pkg/api/errors" 27 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 | "k8s.io/apimachinery/pkg/runtime" 29 | "k8s.io/apimachinery/pkg/types" 30 | ctrl "sigs.k8s.io/controller-runtime" 31 | "sigs.k8s.io/controller-runtime/pkg/client" 32 | // "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 33 | 34 | kubedrv1alpha1 "kubedr/api/v1alpha1" 35 | ) 36 | 37 | // BackupLocationReconciler reconciles a BackupLocation object 38 | type BackupLocationReconciler struct { 39 | client.Client 40 | Log logr.Logger 41 | Scheme *runtime.Scheme 42 | } 43 | 44 | // +kubebuilder:rbac:groups=kubedr.catalogicsoftware.com,resources=backuplocations,verbs=get;list;watch;create;update;patch;delete 45 | // +kubebuilder:rbac:groups=kubedr.catalogicsoftware.com,resources=backuplocations/status,verbs=get;update;patch 46 | // +kubebuilder:rbac:groups=core,resources=pods,verbs=create;get 47 | // +kubebuilder:rbac:groups=core,resources=events,verbs=create;get;list;watch;update 48 | 49 | /* 50 | We generally want to ignore (not requeue) NotFound errors, since we'll get a 51 | reconciliation request once the object exists, and requeuing in the meantime 52 | won't help. 53 | 54 | Top level Reconcile logic: 55 | 56 | - If generation number hasn't changed, do nothing. We don't want to process updates 57 | unless spec has changed. 58 | 59 | - Add a finalizer if not already present. This will convert deletes to updates 60 | and allows us to perform any actions before the resource is actually deleted. 61 | However, there is really no delete logic at present. 62 | 63 | - Check if the annotation, which indicates that repo is already initialized, is 64 | present. If so, there is nothing more to do. If not, proceed with init logic. 65 | 66 | - Since we don't generate a unique name for init pod, it is possible that pod 67 | from a previous attempt still exists. So check for such a pod and delete it. 68 | We may eventually use unique names but that requires clean up of old pods. 69 | Also note that the name of the pod includes BackupLocation resource name so it 70 | is not a hard-coded name really. 71 | 72 | - Create the pod that will initialize the repo. The kubedrutil "repoinit" command 73 | will call restic to initialize the repo and it will then set the annotation to 74 | indicate that repo is initialized. It will also set the status both in case of 75 | success and failure. 76 | */ 77 | 78 | func ignoreNotFound(err error) error { 79 | if apierrors.IsNotFound(err) { 80 | return nil 81 | } 82 | return err 83 | } 84 | 85 | // In case of some errors such as "not found" and "already exists", 86 | // there is no point in requeuing the reconcile. 87 | // See https://github.com/kubernetes-sigs/controller-runtime/issues/377 88 | func ignoreErrors(err error) error { 89 | if apierrors.IsNotFound(err) { 90 | return nil 91 | } 92 | 93 | if apierrors.IsAlreadyExists(err) { 94 | return nil 95 | } 96 | 97 | return err 98 | } 99 | 100 | func (r *BackupLocationReconciler) setStatus(backupLoc *kubedrv1alpha1.BackupLocation, status string, errmsg string) { 101 | // Allows us to check and skip reconciles for only metadata updates. 102 | backupLoc.Status.ObservedGeneration = backupLoc.ObjectMeta.Generation 103 | 104 | backupLoc.Status.InitStatus = status 105 | if errmsg == "" { 106 | // For some reason, empty error string is causing problems even though 107 | // the field is marked "optional" in Status struct. 108 | errmsg = "." 109 | } 110 | backupLoc.Status.InitErrorMessage = errmsg 111 | 112 | backupLoc.Status.InitTime = metav1.Now().String() 113 | 114 | r.Log.Info("Updating status...") 115 | if err := r.Status().Update(context.Background(), backupLoc); err != nil { 116 | r.Log.Error(err, "unable to update backup location status") 117 | } 118 | } 119 | 120 | // Reconcile is the the main entry point called by the framework. 121 | func (r *BackupLocationReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { 122 | ctx := context.Background() 123 | log := r.Log.WithValues("backuplocation", req.NamespacedName) 124 | 125 | var backupLoc kubedrv1alpha1.BackupLocation 126 | if err := r.Get(ctx, req.NamespacedName, &backupLoc); err != nil { 127 | if apierrors.IsNotFound(err) { 128 | // we'll ignore not-found errors, since they can't be fixed by an immediate 129 | // requeue (we'll need to wait for a new notification). 130 | log.Info("BackupLocation (" + req.NamespacedName.Name + ") is not found") 131 | return ctrl.Result{}, nil 132 | } 133 | 134 | log.Error(err, "unable to fetch BackupLocation") 135 | return ctrl.Result{}, err 136 | } 137 | 138 | // Skip if spec hasn't changed. This check prevents reconcile on status 139 | // updates. 140 | if backupLoc.Status.ObservedGeneration == backupLoc.ObjectMeta.Generation { 141 | r.Log.Info("Skipping reconcile as generation number hasn't changed") 142 | return ctrl.Result{}, nil 143 | } 144 | 145 | finalizer := "backuplocation.finalizers.kubedr.catalogicsoftware.com" 146 | 147 | if backupLoc.ObjectMeta.DeletionTimestamp.IsZero() { 148 | // The object is not being deleted, so if it does not have our finalizer, 149 | // then lets add the finalizer and update the object. This is equivalent 150 | // to registering our finalizer. 151 | if !containsString(backupLoc.ObjectMeta.Finalizers, finalizer) { 152 | backupLoc.ObjectMeta.Finalizers = append(backupLoc.ObjectMeta.Finalizers, finalizer) 153 | if err := r.Update(context.Background(), &backupLoc); err != nil { 154 | return ctrl.Result{}, err 155 | } 156 | } 157 | } else { 158 | // The object is being deleted 159 | if containsString(backupLoc.ObjectMeta.Finalizers, finalizer) { 160 | // our finalizer is present, handle any pre-deletion logic here. 161 | 162 | // remove our finalizer from the list and update it. 163 | backupLoc.ObjectMeta.Finalizers = removeString(backupLoc.ObjectMeta.Finalizers, finalizer) 164 | 165 | if err := r.Update(context.Background(), &backupLoc); err != nil { 166 | return ctrl.Result{}, err 167 | } 168 | } 169 | 170 | // Nothing more to do for DELETE. 171 | return ctrl.Result{}, nil 172 | } 173 | 174 | // Check annotations to see if repo is already initialized. 175 | // Ideally, we should check the repo itself to confirm that it is 176 | // initialized, instead of depending on annotation. 177 | initAnnotation := "initialized.annotations.kubedr.catalogicsoftware.com" 178 | 179 | initialized, exists := backupLoc.ObjectMeta.Annotations[initAnnotation] 180 | if exists && (initialized == "true") { 181 | // No need to initialize the repo. 182 | log.Info("Repo is already initialized") 183 | return ctrl.Result{}, nil 184 | } 185 | 186 | // Annotation doesn't exist so we need to initialize the repo. 187 | 188 | initPodName := backupLoc.Name + "-init-pod" 189 | 190 | // Since we don't generate a unique name for the pod that initializes the repo, 191 | // we need to explicitly check and delete the pod if it exists. We may eventually 192 | // use a unique name but that will also require cleanup of old pods. 193 | var pod corev1.Pod 194 | if err := r.Get(ctx, types.NamespacedName{Namespace: req.Namespace, Name: initPodName}, &pod); err == nil { 195 | log.Info("Found init pod, will delete it and continue...") 196 | if err := r.Delete(ctx, &pod); ignoreNotFound(err) != nil { 197 | log.Error(err, "Error in deleting init pod") 198 | return ctrl.Result{}, err 199 | } 200 | } 201 | 202 | r.setStatus(&backupLoc, "Initializing", "") 203 | 204 | // Initialize the repo. 205 | initPod, err := buildResticRepoInitPod(&backupLoc, log) 206 | if err != nil { 207 | log.Error(err, "Error in creating init pod") 208 | return ctrl.Result{}, err 209 | } 210 | 211 | if err := ctrl.SetControllerReference(&backupLoc, initPod, r.Scheme); err != nil { 212 | return ctrl.Result{}, err 213 | } 214 | 215 | log.Info("Starting a new Pod", "Pod.Namespace", initPod.Namespace, "Pod.Name", initPod.Name) 216 | err = r.Create(ctx, initPod) 217 | if err != nil { 218 | r.Log.Error(err, "Error in starting init pod") 219 | r.setStatus(&backupLoc, "Failed", err.Error()) 220 | return ctrl.Result{}, err 221 | } 222 | 223 | return ctrl.Result{}, nil 224 | } 225 | 226 | func buildResticRepoInitPod(cr *kubedrv1alpha1.BackupLocation, log logr.Logger) (*corev1.Pod, error) { 227 | kubedrUtilImage := os.Getenv("KUBEDR_UTIL_IMAGE") 228 | if kubedrUtilImage == "" { 229 | // This should really not happen. 230 | err := fmt.Errorf("KUBEDR_UTIL_IMAGE is not set") 231 | log.Error(err, "") 232 | return nil, err 233 | } 234 | log.V(1).Info(fmt.Sprintf("kubedrUtilImage: %s", kubedrUtilImage)) 235 | 236 | s3EndPoint := "s3:" + cr.Spec.Url + "/" + cr.Spec.BucketName 237 | 238 | labels := map[string]string{ 239 | "kubedr.type": "backuploc-init", 240 | "kubedr.backuploc": cr.Name, 241 | } 242 | 243 | accessKey := corev1.SecretKeySelector{} 244 | accessKey.Name = cr.Spec.Credentials 245 | accessKey.Key = "access_key" 246 | 247 | secretKey := corev1.SecretKeySelector{} 248 | secretKey.Name = cr.Spec.Credentials 249 | secretKey.Key = "secret_key" 250 | 251 | resticPassword := corev1.SecretKeySelector{} 252 | resticPassword.Name = cr.Spec.Credentials 253 | resticPassword.Key = "restic_repo_password" 254 | 255 | return &corev1.Pod{ 256 | ObjectMeta: metav1.ObjectMeta{ 257 | Name: cr.Name + "-init-pod", 258 | Namespace: cr.Namespace, 259 | Labels: labels, 260 | }, 261 | Spec: corev1.PodSpec{ 262 | Containers: []corev1.Container{ 263 | { 264 | Name: cr.Name + "-init", 265 | Image: kubedrUtilImage, 266 | Args: []string{ 267 | "/usr/local/bin/kubedrutil", "repoinit", 268 | }, 269 | Env: []corev1.EnvVar{ 270 | { 271 | Name: "MY_POD_NAME", 272 | ValueFrom: &corev1.EnvVarSource{ 273 | FieldRef: &corev1.ObjectFieldSelector{ 274 | FieldPath: "metadata.name", 275 | }, 276 | }, 277 | }, 278 | { 279 | Name: "AWS_ACCESS_KEY", 280 | ValueFrom: &corev1.EnvVarSource{ 281 | SecretKeyRef: &accessKey, 282 | }, 283 | }, 284 | { 285 | Name: "AWS_SECRET_KEY", 286 | ValueFrom: &corev1.EnvVarSource{ 287 | SecretKeyRef: &secretKey, 288 | }, 289 | }, 290 | { 291 | Name: "RESTIC_PASSWORD", 292 | ValueFrom: &corev1.EnvVarSource{ 293 | SecretKeyRef: &resticPassword, 294 | }, 295 | }, 296 | { 297 | Name: "RESTIC_REPO", 298 | Value: s3EndPoint, 299 | }, 300 | { 301 | Name: "KDR_BACKUPLOC_NAME", 302 | Value: cr.Name, 303 | }, 304 | }, 305 | }, 306 | }, 307 | RestartPolicy: "Never", 308 | }, 309 | }, nil 310 | } 311 | 312 | // SetupWithManager hooks up this controller with the manager. 313 | func (r *BackupLocationReconciler) SetupWithManager(mgr ctrl.Manager) error { 314 | return ctrl.NewControllerManagedBy(mgr). 315 | For(&kubedrv1alpha1.BackupLocation{}). 316 | Complete(r) 317 | } 318 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------