├── config ├── webhook │ ├── kustomization.yaml │ ├── webhook.yaml │ └── manifests.yaml ├── certmanager │ ├── kustomization.yaml │ └── certificate.yaml ├── manifests │ ├── kustomization.yaml │ └── config_map.yaml ├── rbac │ ├── kustomization.yaml │ ├── role_binding.yaml │ ├── role.yaml │ └── rbac.yaml ├── manager │ ├── kustomization.yaml │ └── manager.yaml ├── crd │ └── kustomization.yaml ├── samples │ └── cr.yaml ├── app │ ├── bookkeeper-cluster.yaml │ └── crd.yaml └── default │ └── kustomization.yaml ├── .github ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── ci.yaml ├── doc ├── configuration.md ├── service-configuration.md ├── rbac.md ├── operator-upgrade.md ├── release_process.md ├── webhook.md ├── troubleshooting.md ├── bookkeeper-options.md ├── development.md ├── rollback-cluster.md └── upgrade-cluster.md ├── scripts ├── check_format.sh ├── check_license.sh └── pre-upgrade.sh ├── api └── v1alpha1 │ ├── doc.go │ ├── groupversion_info.go │ ├── bookkeepercluster_webhook.go │ ├── zz_generated.deepcopy_test.go │ └── status.go ├── pkg ├── version │ └── version.go ├── util │ ├── zookeeper_util_test.go │ ├── k8sutil.go │ ├── k8sutil_test.go │ ├── zookeeper_util.go │ ├── leader.go │ ├── leader_test.go │ ├── bookkeepercluster.go │ └── bookkeepercluster_test.go ├── controller │ └── config │ │ └── config.go └── test │ └── e2e │ └── e2eutil │ └── spec_util.go ├── hack └── boilerplate.go.txt ├── PROJECT ├── controllers └── controllers_suite_test.go ├── charts └── README.md ├── test └── e2e │ ├── resources │ ├── kubernetes_slave_install.sh │ ├── kubernetes_master_install.sh │ ├── zookeeper.yaml │ └── local-storage.yaml │ ├── scale_test.go │ ├── pod_deletion_test.go │ ├── webhook_test.go │ ├── basic_test.go │ ├── upgrade_test.go │ ├── suite_test.go │ ├── cmchanges_test.go │ ├── rollback_test.go │ └── multiple_bk_test.go ├── Dockerfile ├── .travis.yml ├── example └── cr-detailed.yaml ├── README.md ├── .gitignore ├── go.mod ├── Makefile └── main.go /config/webhook/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - webhook.yaml 3 | -------------------------------------------------------------------------------- /config/certmanager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - certificate.yaml 3 | -------------------------------------------------------------------------------- /config/manifests/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - config_map.yaml 3 | -------------------------------------------------------------------------------- /config/rbac/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - rbac.yaml 3 | - role.yaml 4 | - role_binding.yaml 5 | -------------------------------------------------------------------------------- /config/manifests/config_map.yaml: -------------------------------------------------------------------------------- 1 | kind: ConfigMap 2 | apiVersion: v1 3 | metadata: 4 | name: bookkeeper-configmap 5 | data: 6 | # Configuration values can be set as key-value properties 7 | PRAVEGA_CLUSTER_NAME: pravega 8 | WAIT_FOR: zookeeper-client:2181 9 | -------------------------------------------------------------------------------- /config/manager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - manager.yaml 3 | apiVersion: kustomize.config.k8s.io/v1beta1 4 | kind: Kustomization 5 | images: 6 | - name: pravega/bookkeeper-operator 7 | newName: testbkop/bookkeeper-operator-testimages 8 | newTag: 0.1.8-94-671d745 9 | -------------------------------------------------------------------------------- /config/rbac/role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: manager-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: manager-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: default 12 | namespace: system 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Description 2 | 3 | _(Describe the feature, bug, question, proposal that you are requesting)_ 4 | 5 | ### Importance 6 | 7 | _(Indicate the importance of this issue to you (blocker, must-have, should-have, nice-to-have))_ 8 | 9 | ### Location 10 | 11 | _(Where is the piece of code, package, or document affected by this issue?)_ 12 | 13 | ### Suggestions for an improvement 14 | 15 | _(How do you suggest to fix or proceed with this issue?)_ 16 | -------------------------------------------------------------------------------- /doc/configuration.md: -------------------------------------------------------------------------------- 1 | ## Configuration 2 | 3 | This document explains how to configure Bookkeeper 4 | 5 | * [RBAC](rbac.md) 6 | * [Use non-default service accounts](rbac.md#use-non-default-service-accounts) 7 | * [Installing on a Custom Namespace with RBAC enabled](rbac.md#installing-on-a-custom-namespace-with-rbac-enabled) 8 | * [Tune Bookkeeper Configuration](bookkeeper-options.md) 9 | * [Enable admission webhook](webhook.md) 10 | * [Configuring Service Name](service-configuration.md) 11 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Change log description 2 | 3 | _(2-3 concise points about the changes in this PR. When committing this PR, the committer is expected to copy the content of this section to the merge description box)_ 4 | 5 | ### Purpose of the change 6 | 7 | _(e.g., Fixes #666, Closes #1234)_ 8 | 9 | ### What the code does 10 | 11 | _(Detailed description of the code changes)_ 12 | 13 | ### How to verify it 14 | 15 | _(Steps to verify that the changes are effective)_ 16 | -------------------------------------------------------------------------------- /scripts/check_format.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # exit immediately when a command fails 3 | set -e 4 | # only exit with zero if all commands of the pipeline exit successfully 5 | set -o pipefail 6 | # error on unset variables 7 | set -u 8 | 9 | goFiles=$(find . -name \*.go -not -path "./vendor/*" -print) 10 | invalidFiles=$(gofmt -l $goFiles) 11 | 12 | if [ "$invalidFiles" ]; then 13 | echo -e "These files did not pass the 'go fmt' check, please run 'go fmt' on them:" 14 | echo -e $invalidFiles 15 | exit 1 16 | fi 17 | -------------------------------------------------------------------------------- /scripts/check_license.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # exit immediately when a command fails 3 | set -e 4 | # only exit with zero if all commands of the pipeline exit successfully 5 | set -o pipefail 6 | # error on unset variables 7 | set -u 8 | 9 | licRes=$( 10 | find . -type f -iname '*.go' ! -path '*/vendor/*' -exec \ 11 | sh -c 'head -n3 $1 | grep -Eq "(Copyright|generated|GENERATED)" || echo -e $1' {} {} \; 12 | ) 13 | 14 | if [ -n "${licRes}" ]; then 15 | echo -e "license header checking failed:\\n${licRes}" 16 | exit 255 17 | fi 18 | -------------------------------------------------------------------------------- /api/v1alpha1/doc.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | // Package v1alpha1 contains API Schema definitions for the pravega v1alpha1 API group 12 | // +k8s:deepcopy-gen=package,register 13 | // +groupName=bookkeeper.pravega.io 14 | package v1alpha1 15 | -------------------------------------------------------------------------------- /pkg/version/version.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package version 12 | 13 | // Version represents the software version of the Pravega Operator 14 | var Version string 15 | 16 | // GitSHA represents the Git commit hash in short format 17 | var GitSHA string 18 | -------------------------------------------------------------------------------- /hack/boilerplate.go.txt: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ -------------------------------------------------------------------------------- /PROJECT: -------------------------------------------------------------------------------- 1 | domain: pravega.io 2 | layout: 3 | - go.kubebuilder.io/v3 4 | plugins: 5 | manifests.sdk.operatorframework.io/v2: {} 6 | scorecard.sdk.operatorframework.io/v2: {} 7 | projectName: bookkeeper-operator 8 | repo: github.com/pravega/bookkeeper-operator 9 | resources: 10 | - api: 11 | crdVersion: v1 12 | namespaced: true 13 | controller: true 14 | domain: pravega.io 15 | group: bookkeeper 16 | kind: BookkeeperCluster 17 | path: github.com/pravega/bookkeeper-operator/api/v1alpha1 18 | version: v1alpha1 19 | webhooks: 20 | defaulting: true 21 | validation: true 22 | webhookVersion: v1 23 | version: "3" 24 | -------------------------------------------------------------------------------- /config/certmanager/certificate.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: cert-manager.io/v1 2 | kind: Issuer 3 | metadata: 4 | name: selfsigned-issuer-bk 5 | namespace: default 6 | spec: 7 | selfSigned: {} 8 | --- 9 | apiVersion: cert-manager.io/v1 10 | kind: Certificate 11 | metadata: 12 | name: selfsigned-cert-bk 13 | namespace: default 14 | spec: 15 | secretName: selfsigned-cert-tls-bk 16 | commonName: bookkeeper-webhook-svc.default.svc.cluster.local 17 | dnsNames: 18 | - bookkeeper-webhook-svc 19 | - bookkeeper-webhook-svc.default.svc.cluster.local 20 | - bookkeeper-webhook-svc.default.svc 21 | issuerRef: 22 | name: selfsigned-issuer-bk 23 | -------------------------------------------------------------------------------- /controllers/controllers_suite_test.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2021 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package controllers 12 | 13 | import ( 14 | "testing" 15 | 16 | . "github.com/onsi/ginkgo" 17 | . "github.com/onsi/gomega" 18 | ) 19 | 20 | func TestBookkeeperAPIs(t *testing.T) { 21 | RegisterFailHandler(Fail) 22 | RunSpecs(t, "BookkeeperCluster Controller Tests") 23 | } 24 | -------------------------------------------------------------------------------- /config/rbac/role.yaml: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | kind: ClusterRole 5 | metadata: 6 | creationTimestamp: null 7 | name: manager-role 8 | rules: 9 | - apiGroups: 10 | - bookkeeper.pravega.io 11 | resources: 12 | - bookkeeperclusters 13 | verbs: 14 | - create 15 | - delete 16 | - get 17 | - list 18 | - patch 19 | - update 20 | - watch 21 | - apiGroups: 22 | - bookkeeper.pravega.io 23 | resources: 24 | - bookkeeperclusters/finalizers 25 | verbs: 26 | - update 27 | - apiGroups: 28 | - bookkeeper.pravega.io 29 | resources: 30 | - bookkeeperclusters/status 31 | verbs: 32 | - get 33 | - patch 34 | - update 35 | -------------------------------------------------------------------------------- /doc/service-configuration.md: -------------------------------------------------------------------------------- 1 | # Configuring Bookkeeper Headless Service Name 2 | 3 | By default bookkeeper headless service name is configured as `[CLUSTER_NAME]-bookie-headless`. 4 | 5 | ``` 6 | bookkeeper-bookie-headless ClusterIP None 3181/TCP 4d15h 7 | ``` 8 | But we can configure the headless service name as follows: 9 | 10 | ``` 11 | helm install bookkeeper pravega/bookkeeper --set headlessSvcNameSuffix="headless" 12 | ``` 13 | 14 | After installation services can be listed using `kubectl get svc` command. 15 | 16 | 17 | ``` 18 | bookkeeper-headless ClusterIP None 3181/TCP 4d15h 19 | ``` 20 | -------------------------------------------------------------------------------- /charts/README.md: -------------------------------------------------------------------------------- 1 | # Moved 2 | 3 | The bookkeeper operator and bookkeeper helm charts originally developed as part of this repository have been moved [here](https://github.com/pravega/charts/tree/master/charts). 4 | 5 | A copy of the charts will remain here till the migration is complete. We will however no longer accept pull requests for any modification to these charts. All subsequent issues and pull requests will be tracked in the [pravega/charts](https://github.com/pravega/charts) repository by following the guidelines mentioned [here](https://github.com/pravega/charts/wiki/Contributing). 6 | 7 | [Here](https://github.com/pravega/bookkeeper-operator/issues/147) is the issue for tracking this migration activity. 8 | -------------------------------------------------------------------------------- /pkg/util/zookeeper_util_test.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | package util 11 | 12 | import ( 13 | . "github.com/onsi/ginkgo" 14 | . "github.com/onsi/gomega" 15 | ) 16 | 17 | var _ = Describe("zookeeperutil", func() { 18 | Context("DeleteAllZnodes", func() { 19 | 20 | var err error 21 | BeforeEach(func() { 22 | 23 | err = DeleteAllZnodes("zookeeper-client:2181", "default", "bookie") 24 | }) 25 | It("should not be nil", func() { 26 | Ω(err).ShouldNot(BeNil()) 27 | }) 28 | }) 29 | }) 30 | -------------------------------------------------------------------------------- /scripts/pre-upgrade.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -ex 3 | 4 | if [[ "$#" -lt 1 || "$#" -gt 2 ]]; then 5 | echo "Error : Invalid number of arguments" 6 | Usage: "./pre-upgrade.sh " 7 | exit 1 8 | fi 9 | 10 | name=$1 11 | namespace=${2:-default} 12 | 13 | kubectl annotate Service bookkeeper-webhook-svc meta.helm.sh/release-name=$name -n $namespace --overwrite 14 | kubectl annotate Service bookkeeper-webhook-svc meta.helm.sh/release-namespace=$namespace -n $namespace --overwrite 15 | kubectl label Service bookkeeper-webhook-svc app.kubernetes.io/managed-by=Helm -n $namespace --overwrite 16 | 17 | #deleting the mutatingwebhookconfiguration created by the previous operator 18 | kubectl delete mutatingwebhookconfiguration bookkeeper-webhook-config 19 | -------------------------------------------------------------------------------- /config/crd/kustomization.yaml: -------------------------------------------------------------------------------- 1 | # This kustomization.yaml is not intended to be run by itself, 2 | # since it depends on service name and namespace that are out of this kustomize package. 3 | # It should be run by config/default 4 | resources: 5 | - bases/bookkeeper.pravega.io_bookkeeperclusters.yaml 6 | # +kubebuilder:scaffold:crdkustomizeresource 7 | 8 | patchesStrategicMerge: 9 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. 10 | # patches here are for enabling the conversion webhook for each CRD 11 | #- patches/webhook_in_zookeeperclusters.yaml 12 | # +kubebuilder:scaffold:crdkustomizewebhookpatch 13 | 14 | # [CERTMANAGER] To enable webhook, uncomment all the sections with [CERTMANAGER] prefix. 15 | # patches here are for enabling the CA injection for each CRD 16 | #- patches/cainjection_in_zookeeperclusters.yaml 17 | # +kubebuilder:scaffold:crdkustomizecainjectionpatch 18 | -------------------------------------------------------------------------------- /config/samples/cr.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "bookkeeper.pravega.io/v1alpha1" 2 | kind: "BookkeeperCluster" 3 | metadata: 4 | name: "bookkeeper" 5 | spec: 6 | version: 0.9.0 7 | zookeeperUri: zookeeper-client:2181 8 | image: 9 | imageSpec: 10 | repository: pravega/bookkeeper 11 | pullPolicy: IfNotPresent 12 | replicas: 3 13 | envVars: bookkeeper-configmap 14 | autoRecovery: true 15 | storage: 16 | ledgerVolumeClaimTemplate: 17 | accessModes: [ "ReadWriteOnce" ] 18 | storageClassName: "standard" 19 | resources: 20 | requests: 21 | storage: 10Gi 22 | journalVolumeClaimTemplate: 23 | accessModes: [ "ReadWriteOnce" ] 24 | storageClassName: "standard" 25 | resources: 26 | requests: 27 | storage: 10Gi 28 | indexVolumeClaimTemplate: 29 | accessModes: [ "ReadWriteOnce" ] 30 | storageClassName: "standard" 31 | resources: 32 | requests: 33 | storage: 10Gi 34 | -------------------------------------------------------------------------------- /pkg/controller/config/config.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package config 12 | 13 | // TestMode enables test mode in the operator and applies 14 | // the following changes: 15 | // - Disables BookKeeper minimum number of replicas 16 | var TestMode bool 17 | 18 | // DisableFinalizer disables the finalizers for bookkeeper clusters and 19 | // skips the znode cleanup phase when bookkeeper cluster get deleted. 20 | // This is useful when operator deletion may happen before bookkeeper clusters deletion. 21 | // NOTE: enabling this flag with caution! It causes stale znode data in zk and 22 | // leads to conflicts with subsequent bookkeeper clusters deployments 23 | var DisableFinalizer bool 24 | -------------------------------------------------------------------------------- /config/app/bookkeeper-cluster.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: app.k8s.io/v1 2 | kind: Application 3 | metadata: 4 | name: "bookkeeper-cluster" 5 | labels: 6 | app.kubernetes.io/name: "bookkeeper-cluster" 7 | annotations: 8 | com.dellemc.kahm.subscribed: "true" 9 | nautilus.dellemc.com/chart-version: "latest" 10 | spec: 11 | assemblyPhase: "Pending" 12 | selector: 13 | matchLabels: 14 | app.kubernetes.io/name: "bookkeeper-cluster" 15 | componentKinds: 16 | - group: core 17 | kind: Service 18 | - group: core 19 | kind: Pod 20 | - group: apps 21 | kind: StatefulSet 22 | - group: core 23 | kind: ConfigMap 24 | - group: core 25 | kind: Secret 26 | - group: core 27 | kind: PersistentVolumeClaim 28 | - group: core 29 | kind: ServiceAccount 30 | - group: pravega.pravega.io 31 | kind: BookkeeperCluster 32 | descriptor: 33 | type: "bookkeeper-cluster" 34 | version: "latest" 35 | description: > 36 | Bookkeeper deployment on Kubernetes 37 | keywords: 38 | - "nautilus" 39 | - "pravega" 40 | - "bookkeeper" 41 | -------------------------------------------------------------------------------- /config/webhook/webhook.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: bookkeeper-webhook-svc 5 | namespace: default 6 | spec: 7 | ports: 8 | - port: 443 9 | protocol: TCP 10 | targetPort: 9443 11 | selector: 12 | component: bookkeeper-operator 13 | sessionAffinity: None 14 | type: ClusterIP 15 | --- 16 | 17 | apiVersion: admissionregistration.k8s.io/v1 18 | kind: ValidatingWebhookConfiguration 19 | metadata: 20 | name: bookkeeper-webhook-config 21 | annotations: 22 | cert-manager.io/inject-ca-from: default/selfsigned-cert-bk 23 | webhooks: 24 | - clientConfig: 25 | service: 26 | name: bookkeeper-webhook-svc 27 | namespace: default 28 | path: /validate-bookkeeper-pravega-io-v1alpha1-bookkeepercluster 29 | name: bookkeeperwebhook.pravega.io 30 | failurePolicy: Fail 31 | rules: 32 | - apiGroups: 33 | - bookkeeper.pravega.io 34 | apiVersions: 35 | - v1alpha1 36 | operations: 37 | - CREATE 38 | - UPDATE 39 | resources: 40 | - bookkeeperclusters 41 | scope: "*" 42 | admissionReviewVersions: ["v1beta1", "v1"] 43 | sideEffects: None 44 | timeoutSeconds: 30 45 | -------------------------------------------------------------------------------- /config/manager/manager.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: bookkeeper-operator 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | name: bookkeeper-operator 10 | template: 11 | metadata: 12 | labels: 13 | name: bookkeeper-operator 14 | component: bookkeeper-operator 15 | spec: 16 | serviceAccountName: bookkeeper-operator 17 | containers: 18 | - name: bookkeeper-operator 19 | image: pravega/bookkeeper-operator:0.1.5 20 | ports: 21 | - containerPort: 60000 22 | name: metrics 23 | command: 24 | - bookkeeper-operator 25 | imagePullPolicy: Always 26 | env: 27 | - name: WATCH_NAMESPACE 28 | value: "" 29 | - name: POD_NAME 30 | valueFrom: 31 | fieldRef: 32 | fieldPath: metadata.name 33 | - name: OPERATOR_NAME 34 | value: "bookkeeper-operator" 35 | volumeMounts: 36 | - name: webhook-cert 37 | mountPath: "/tmp/k8s-webhook-server/serving-certs" 38 | readOnly: true 39 | volumes: 40 | - name: webhook-cert 41 | secret: 42 | secretName: selfsigned-cert-tls-bk 43 | -------------------------------------------------------------------------------- /api/v1alpha1/groupversion_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // Package v1alpha1 contains API Schema definitions for the bookkeeper v1alpha1 API group 18 | // +kubebuilder:object:generate=true 19 | // +groupName=bookkeeper.pravega.io 20 | package v1alpha1 21 | 22 | import ( 23 | "k8s.io/apimachinery/pkg/runtime/schema" 24 | "sigs.k8s.io/controller-runtime/pkg/scheme" 25 | ) 26 | 27 | var ( 28 | // GroupVersion is group version used to register these objects 29 | GroupVersion = schema.GroupVersion{Group: "bookkeeper.pravega.io", Version: "v1alpha1"} 30 | 31 | // SchemeBuilder is used to add go types to the GroupVersionKind scheme 32 | SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} 33 | 34 | // AddToScheme adds the types in this group-version to the given scheme. 35 | AddToScheme = SchemeBuilder.AddToScheme 36 | ) 37 | -------------------------------------------------------------------------------- /test/e2e/resources/kubernetes_slave_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | sudo apt-get update 4 | echo "update done" 5 | sudo apt-get install nfs-common make -y 6 | sudo rm /lib/systemd/system/nfs-common.service 7 | sudo systemctl daemon-reload 8 | sudo systemctl start nfs-common 9 | sudo systemctl status nfs-common 10 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - 11 | sudo add-apt-repository \ 12 | "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ 13 | $(lsb_release -cs) \ 14 | stable" 15 | sudo apt-get update 16 | echo "update done" 17 | sudo apt-get install docker-ce=5:19.03.9~3-0~ubuntu-focal -y 18 | echo "docker install" 19 | sudo systemctl enable --now docker 20 | apt-get update && apt-get install -y \ 21 | apt-transport-https ca-certificates curl software-properties-common gnupg2 22 | echo "installed certs" 23 | sudo curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add - 24 | echo "deb http://apt.kubernetes.io/ kubernetes-xenial main" \ 25 | | sudo tee -a /etc/apt/sources.list.d/kubernetes.list \ 26 | && sudo apt-get update 27 | sudo apt-get update \ 28 | && sudo apt-get install -yq \ 29 | kubelet=1.21.2-00 \ 30 | kubeadm=1.21.2-00 \ 31 | kubernetes-cni 32 | sudo apt-mark hold kubelet kubeadm kubectl 33 | UUID=`cat /etc/fstab | grep swap | awk '{print $1}' | tr -d "#UUID="` 34 | sed -i '2 s/^/#/' /etc/fstab 35 | echo "swapoff UUID=$UUID" 36 | swapoff UUID=$UUID 37 | -------------------------------------------------------------------------------- /config/webhook/manifests.yaml: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | apiVersion: admissionregistration.k8s.io/v1 4 | kind: MutatingWebhookConfiguration 5 | metadata: 6 | creationTimestamp: null 7 | name: mutating-webhook-configuration 8 | webhooks: 9 | - admissionReviewVersions: 10 | - v1 11 | clientConfig: 12 | service: 13 | name: webhook-service 14 | namespace: system 15 | path: /mutate-bookkeeper-pravega-io-v1alpha1-bookkeepercluster 16 | failurePolicy: Fail 17 | name: mbookkeepercluster.kb.io 18 | rules: 19 | - apiGroups: 20 | - bookkeeper.pravega.io 21 | apiVersions: 22 | - v1alpha1 23 | operations: 24 | - CREATE 25 | - UPDATE 26 | resources: 27 | - bookkeeperclusters 28 | sideEffects: None 29 | 30 | --- 31 | apiVersion: admissionregistration.k8s.io/v1 32 | kind: ValidatingWebhookConfiguration 33 | metadata: 34 | creationTimestamp: null 35 | name: validating-webhook-configuration 36 | webhooks: 37 | - admissionReviewVersions: 38 | - v1 39 | clientConfig: 40 | service: 41 | name: webhook-service 42 | namespace: system 43 | path: /validate-bookkeeper-pravega-io-v1alpha1-bookkeepercluster 44 | failurePolicy: Fail 45 | name: vbookkeepercluster.kb.io 46 | rules: 47 | - apiGroups: 48 | - bookkeeper.pravega.io 49 | apiVersions: 50 | - v1alpha1 51 | operations: 52 | - CREATE 53 | - UPDATE 54 | resources: 55 | - bookkeeperclusters 56 | sideEffects: None 57 | -------------------------------------------------------------------------------- /doc/rbac.md: -------------------------------------------------------------------------------- 1 | ## Setting up RBAC for Bookkeeper operator 2 | 3 | ### Use non-default service accounts 4 | 5 | You can optionally configure non-default service accounts for the Bookkeeper. 6 | 7 | For Bookkeeper, set the `serviceAccountName` field under the `spec` block. 8 | 9 | ``` 10 | ... 11 | spec: 12 | serviceAccountName: bk-service-account 13 | ... 14 | ``` 15 | 16 | Replace the `namespace` with your own namespace. 17 | 18 | ### Installing on a Custom Namespace with RBAC enabled 19 | 20 | Create the namespace. 21 | 22 | ``` 23 | $ kubectl create namespace pravega-io 24 | ``` 25 | 26 | Update the namespace configured in the `deploy/role_binding.yaml` file. 27 | 28 | ``` 29 | $ sed -i -e 's/namespace: default/namespace: pravega-io/g' deploy/role_binding.yaml 30 | ``` 31 | 32 | Apply the changes. 33 | 34 | ``` 35 | $ kubectl -n pravega-io apply -f deploy 36 | ``` 37 | 38 | ``` 39 | $ kubectl -n pravega-io create -f example/cr-detailed.yaml 40 | ``` 41 | 42 | ``` 43 | $ kubectl -n pravega-io get bk 44 | NAME AGE 45 | bookkeeper 28m 46 | ``` 47 | 48 | ``` 49 | $ kubectl -n pravega-io get pods -l bookkeeper_cluster=bookkeeper 50 | NAME READY STATUS RESTARTS AGE 51 | bookkeeper-bookie-0 1/1 Running 0 29m 52 | bookkeeper-bookie-1 1/1 Running 0 29m 53 | bookkeeper-bookie-2 1/1 Running 0 29m 54 | ``` 55 | -------------------------------------------------------------------------------- /config/default/kustomization.yaml: -------------------------------------------------------------------------------- 1 | # Adds namespace to all resources. 2 | namespace: default 3 | 4 | # Value of this field is prepended to the 5 | # names of all resources, e.g. a deployment named 6 | # "wordpress" becomes "alices-wordpress". 7 | # Note that it should also match with the prefix (text before '-') of the namespace 8 | 9 | # Labels to add to all resources and selectors. 10 | #commonLabels: 11 | # someName: someValue 12 | 13 | bases: 14 | - ../manifests 15 | - ../crd 16 | - ../certmanager 17 | - ../webhook 18 | - ../manager 19 | - ../rbac 20 | 21 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in 22 | # crd/kustomization.yaml 23 | #- ../webhook 24 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. 25 | #- ../certmanager 26 | # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. 27 | #- ../prometheus 28 | 29 | patchesStrategicMerge: 30 | # Protect the /metrics endpoint by putting it behind auth. 31 | # If you want your controller-manager to expose the /metrics 32 | # # endpoint w/o any authn/z, please comment the following line. 33 | #- manager_auth_proxy_patch.yaml 34 | 35 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in 36 | # crd/kustomization.yaml 37 | #- manager_webhook_patch.yaml 38 | 39 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 40 | # Uncomment 'CERTMANAGER' sections in crd/kustomization.yaml to enable the CA injection in the admission webhooks. 41 | # 'CERTMANAGER' needs to be enabled to use ca injection 42 | #- webhookcainjection_patch.yaml 43 | -------------------------------------------------------------------------------- /pkg/test/e2e/e2eutil/spec_util.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package e2eutil 12 | 13 | import ( 14 | api "github.com/pravega/bookkeeper-operator/api/v1alpha1" 15 | corev1 "k8s.io/api/core/v1" 16 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 17 | ) 18 | 19 | // NewDefaultCluster returns a cluster with an empty spec, which will be filled 20 | // with default values 21 | func NewDefaultCluster(namespace string) *api.BookkeeperCluster { 22 | return &api.BookkeeperCluster{ 23 | TypeMeta: metav1.TypeMeta{ 24 | Kind: "BookkeeperCluster", 25 | APIVersion: "pravega.pravega.io/v1alpha1", 26 | }, 27 | ObjectMeta: metav1.ObjectMeta{ 28 | Name: "bookkeeper", 29 | Namespace: namespace, 30 | }, 31 | Spec: api.BookkeeperClusterSpec{}, 32 | } 33 | } 34 | 35 | func NewClusterWithVersion(namespace, version string) *api.BookkeeperCluster { 36 | cluster := NewDefaultCluster(namespace) 37 | cluster.Spec = api.BookkeeperClusterSpec{ 38 | Version: version, 39 | } 40 | return cluster 41 | } 42 | 43 | func NewConfigMap(namespace, name string, pravega string) *corev1.ConfigMap { 44 | return &corev1.ConfigMap{ 45 | TypeMeta: metav1.TypeMeta{ 46 | Kind: "ConfigMap", 47 | APIVersion: "v1", 48 | }, 49 | ObjectMeta: metav1.ObjectMeta{ 50 | Name: name, 51 | Namespace: namespace, 52 | }, 53 | Data: map[string]string{ 54 | "PRAVEGA_CLUSTER_NAME": pravega, 55 | }, 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | ARG DOCKER_REGISTRY 11 | ARG GO_VERSION=1.21 12 | ARG ALPINE_VERSION=3.18 13 | 14 | FROM ${DOCKER_REGISTRY:+$DOCKER_REGISTRY/}golang:${GO_VERSION}-alpine${ALPINE_VERSION} as go-builder 15 | 16 | ARG PROJECT_NAME=bookkeeper-operator 17 | ARG REPO_PATH=github.com/pravega/$PROJECT_NAME 18 | 19 | # Build version and commit SHA should be passed in when performing docker build 20 | ARG VERSION=0.0.0-localdev 21 | ARG GIT_SHA=0000000 22 | 23 | WORKDIR /src 24 | 25 | COPY pkg ./pkg 26 | COPY go.mod ./ 27 | COPY go.sum ./ 28 | 29 | # Download all dependencies. 30 | RUN go mod download 31 | 32 | # Copy the go source 33 | COPY main.go main.go 34 | COPY api/ api/ 35 | COPY controllers/ controllers/ 36 | 37 | RUN GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o /src/${PROJECT_NAME} \ 38 | -ldflags "-X ${REPO_PATH}/pkg/version.Version=${VERSION} -X ${REPO_PATH}/pkg/version.GitSHA=${GIT_SHA}" \ 39 | main.go 40 | 41 | # ============================================================================= 42 | FROM ${DOCKER_REGISTRY:+$DOCKER_REGISTRY/}alpine:${ALPINE_VERSION} AS final 43 | 44 | RUN apk update && apk add --upgrade \ 45 | sudo \ 46 | libcap \ 47 | busybox 48 | 49 | ARG PROJECT_NAME=bookkeeper-operator 50 | 51 | COPY --from=go-builder /src/${PROJECT_NAME} /usr/local/bin/${PROJECT_NAME} 52 | 53 | RUN sudo setcap CAP_NET_BIND_SERVICE=+eip /usr/local/bin/${PROJECT_NAME} 54 | 55 | RUN adduser -D ${PROJECT_NAME} 56 | USER ${PROJECT_NAME} 57 | 58 | ENTRYPOINT ["/usr/local/bin/bookkeeper-operator"] 59 | -------------------------------------------------------------------------------- /test/e2e/resources/kubernetes_master_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | echo "TCPKeepAlive yes" >> /etc/ssh/sshd_config 3 | echo "ClientAliveInterval 60" >> /etc/ssh/sshd_config 4 | echo "ClientAliveCountMax 3" >> /etc/ssh/sshd_config 5 | service sshd restart 6 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - 7 | sudo add-apt-repository \ 8 | "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ 9 | $(lsb_release -cs) \ 10 | stable" 11 | sudo apt-get update 12 | echo "update done" 13 | sudo apt-get install docker-ce=5:19.03.9~3-0~ubuntu-focal -y 14 | echo "docker install" 15 | sudo systemctl enable --now docker 16 | sudo apt-get install nfs-common -y 17 | sudo rm /lib/systemd/system/nfs-common.service 18 | sudo systemctl daemon-reload 19 | sudo systemctl start nfs-common 20 | sudo systemctl status nfs-common 21 | apt-get update && apt-get install -y \ 22 | apt-transport-https ca-certificates curl software-properties-common gnupg2 23 | echo "installed certs" 24 | sudo curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add - 25 | echo "deb http://apt.kubernetes.io/ kubernetes-xenial main" \ 26 | | sudo tee -a /etc/apt/sources.list.d/kubernetes.list \ 27 | && sudo apt-get update 28 | sudo apt-get update \ 29 | && sudo apt-get install -yq \ 30 | kubelet=1.21.2-00 \ 31 | kubeadm=1.21.2-00 \ 32 | kubernetes-cni 33 | sudo apt-mark hold kubelet kubeadm kubectl 34 | UUID=`cat /etc/fstab | grep swap | awk '{print $1}' | tr -d "#UUID="` 35 | sed -i '2 s/^/#/' /etc/fstab 36 | echo "swapoff UUID=$UUID" 37 | swapoff UUID=$UUID 38 | IP=`ifconfig bond0:0 | grep "inet" | awk '{print $2}'` 39 | sudo kubeadm init --apiserver-advertise-address=$IP --pod-network-cidr=192.168.0.0/16 40 | mkdir -p $HOME/.kube 41 | sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config 42 | sudo chown $(id -u):$(id -g) $HOME/.kube/config 43 | kubectl get nodes 44 | kubectl apply -f https://docs.projectcalico.org/v3.14/manifests/calico.yaml 45 | kubectl get nodes 46 | sudo apt-get install binutils bison gcc make -y 47 | mkdir /export 48 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | 3 | language: go 4 | go_import_path: github.com/pravega/bookkeeper-operator 5 | go: 6 | - '1.13.8' 7 | 8 | env: 9 | global: 10 | - secure: "X4zdixrmuh/D4WwJ6btu0fJROq7fTPO5n4/jeyoxTQvrpwikycwf3ZpXS94u43rHx1ovc8vweVIc4Kur7MFAJDmFpPz5adjD7cCjqaRHo+Sw4a/gdNKizFEmZEBFlelynJ+cYekfF3rIFRws+u/8yWUQnSPl4495Qq1NKjrDTqoB6ZrQDxYdIPPpLAit+Aj9a2zQ4xPHyOv02SmfgL22wQhOx8SF78VNmqVah6klC8j3tAemHJQSBmUQ2fPVBpqAy3YsI7mpF9aSdBOzr7Dk1/cajOx4M5WrAxDPZ7+fSKD97SjICKqtM6MuX63O6Tg86zzdoEdvTWvwBwjWpbI3iCytJOCD8vT1QBPd9bLwZI7K5dElrW990HGrBBS4GmKTFykEE6PCNvxDHu1pG4N0vmLhZ3Hh4evZxA8xSnHRyYTLOaDkSe/vurlmsRJiW0GOEa6Fyz+xrD5y4l5MDkPVvaEPzWFZQf8v+l3OvvDvmHmqup4ADXE/XBYbVraTn8wPcQnvk5ueCvJWBOd/E4keGFsGbqDxQpvQRG/8aHWv32lj2HHK6/qbihXvTfJfRbrWli2BV0LMBL/OBIiuL1JeXycYZG3AtbnwgJVXPzs8kpwrmxkRYvbb3Kc9HXu9x+ajeWDYb978Rq6/0DfXhKFIiLrgUOxR2SK6eHDTcfUY+us=" 11 | 12 | stages: 13 | - check 14 | - unit 15 | - deploy 16 | 17 | jobs: 18 | include: 19 | - stage: check 20 | name: Gofmt and License checks 21 | script: 22 | - make check 23 | 24 | - stage: unit 25 | name: Unit tests 26 | script: 27 | - make test-unit 28 | after_success: 29 | - bash <(curl -s https://codecov.io/bash) 30 | 31 | - stage: deploy 32 | name: Push Docker image 33 | if: type != pull_request AND tag IS present 34 | services: 35 | - docker 36 | script: 37 | - make push 38 | 39 | notifications: 40 | slack: 41 | rooms: 42 | secure: Gv0RJx1Sa/y5fmvLNwY+2ivfWZYCM0ekrr6UAHqsegnid6P/DFZrSrfSpwvcVh2OVNH8DHLV0BoiuDJ7amtl1eMDMXz5/lLz8tFWFKaHv4yDSadm8ILY/KnYUoP4IRuM3NyKQmBrmZB9Or5KFXboG6ex6UkgbuYy0Zyl6syEe168Iw8hlCRx26Jei7/y+8eE2MIGFh09TLRZ/944YbULum9H3KQLYv8nFdPc7GmR5AK461fnwZ7iYjb7MXkCctE5Vml3p9+2Qliv1ZJqNsQeKmSFW6IhiP6pNZ1V8VJEWMQmX/nBr9745l/N+CoLQz9ajLonlxn9xHdWms4TEu1ynFk6uxEJjlcpXcvcEaKhqAKcTMl0GMMRab2m+/Vt3S/VutJnVXQmnhZGT9glLFQHwcdHNqM/LEbXtyisB7zmGImUQpF2InCwO25IXug5gv64IfOHGMzL56yNIhbRgBY9Ud4Tux+pmkV5ZxJiBkul7/FiHQX7tQLUrzQosD0oyCOmaWD7kmbt15A0TOkLgup4HE+sSS1ASwisa7J2+HsbI3Upy3rNVKuIJP0L4KSTn4HSlDlMLLcWM+nz/YCEfuwSRXJTIstotNYHdsLUZAZSYAX7ejpeiuBRed4a4AlCROeKbKKwCcSvqCOjmCaPTpwJAGeJByOXLL2hfQzpDMKCIKM= 43 | -------------------------------------------------------------------------------- /example/cr-detailed.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "bookkeeper.pravega.io/v1alpha1" 2 | kind: "BookkeeperCluster" 3 | metadata: 4 | name: "bookkeeper" 5 | spec: 6 | version: 0.9.0 7 | zookeeperUri: zookeeper-client:2181 8 | 9 | image: 10 | imageSpec: 11 | repository: pravega/bookkeeper 12 | pullPolicy: IfNotPresent 13 | 14 | replicas: 3 15 | 16 | envVars: bookkeeper-configmap 17 | 18 | resources: 19 | requests: 20 | memory: "3Gi" 21 | cpu: "1000m" 22 | limits: 23 | memory: "5Gi" 24 | cpu: "2000m" 25 | 26 | storage: 27 | ledgerVolumeClaimTemplate: 28 | accessModes: [ "ReadWriteOnce" ] 29 | storageClassName: "standard" 30 | resources: 31 | requests: 32 | storage: 10Gi 33 | 34 | journalVolumeClaimTemplate: 35 | accessModes: [ "ReadWriteOnce" ] 36 | storageClassName: "standard" 37 | resources: 38 | requests: 39 | storage: 10Gi 40 | 41 | indexVolumeClaimTemplate: 42 | accessModes: [ "ReadWriteOnce" ] 43 | storageClassName: "standard" 44 | resources: 45 | requests: 46 | storage: 10Gi 47 | 48 | # Turns on automatic recovery 49 | # see https://bookkeeper.apache.org/docs/latest/admin/autorecovery/ 50 | autoRecovery: true 51 | 52 | # To enable bookkeeper metrics feature, take codahale for example here. 53 | # See http://bookkeeper.apache.org/docs/4.7.0/admin/metrics/ for more metrics provider 54 | # See http://bookkeeper.apache.org/docs/4.7.0/reference/config/#statistics for metrics provider configuration details 55 | options: 56 | enableStatistics: "true" 57 | statsProviderClass: "org.apache.bookkeeper.stats.codahale.CodahaleMetricsProvider" 58 | codahaleStatsGraphiteEndpoint: "graphite.example.com:2003" 59 | # Default is 60 60 | codahaleStatsOutputFrequencySeconds: "30" 61 | 62 | # Pass the JVM options to Bookkeeper 63 | jvmOptions: 64 | memoryOpts: ["-Xms2g", "-XX:MaxDirectMemorySize=2g"] 65 | gcOpts: ["-XX:MaxGCPauseMillis=20"] 66 | gcLoggingOpts: ["-Xlog:gc*,safepoint::time,level,tags:filecount=5,filesize=64m"] 67 | extraOpts: ["-XX:+IgnoreUnrecognizedVMOptions"] 68 | -------------------------------------------------------------------------------- /doc/operator-upgrade.md: -------------------------------------------------------------------------------- 1 | # Upgrade Guide 2 | 3 | ## Upgrading till 0.1.2 4 | 5 | Bookkeeper operator can be upgraded to a version **[VERSION]** via helm using the following command 6 | 7 | ``` 8 | $ helm upgrade [BOOKKEEPER_OPERATOR_RELEASE_NAME] pravega/bookkeeper-operator --version=[VERSION] 9 | ``` 10 | 11 | ## Upgrading to 0.1.3 12 | 13 | ### Pre-requisites 14 | 15 | For upgrading Operator to version 0.1.3, the following must be true: 16 | 1. The Kubernetes Server version must be at least 1.15, with Beta APIs 17 | 18 | 2. Cert-Manager v0.15.0+ or some other certificate management solution must be deployed for managing webhook service certificates. The upgrade trigger script assumes that the user has [cert-manager](https://cert-manager.io/docs/installation/kubernetes/) installed but any other cert management solution can also be used and script would need to be modified accordingly. 19 | To install cert-manager check [this](https://cert-manager.io/docs/installation/kubernetes/). 20 | 21 | 3. Install an Issuer and a Certificate (either self-signed or CA signed) in the same namespace as the Bookkeeper Operator (refer to [this](https://github.com/pravega/bookkeeper-operator/blob/master/deploy/certificate.yaml) manifest to create a self-signed certificate in the default namespace). 22 | 23 | 4. Execute the script `pre-upgrade.sh` inside the [scripts](https://github.com/pravega/bookkeeper-operator/blob/master/scripts) folder. This script patches the `bookkeeper-webhook-svc` with the required annotations and labels. The format of the command is 24 | ``` 25 | ./pre-upgrade.sh [BOOKKEEPER_OPERATOR_RELEASE_NAME][BOOKKEEPER_OPERATOR_NAMESPACE] 26 | ``` 27 | where: 28 | - `[BOOKKEEPER_OPERATOR_RELEASE_NAME]` is the release name of the bookkeeper operator deployment 29 | - `[BOOKKEEPER_OPERATOR_NAMESPACE]` is the namespace in which the bookkeeper operator has been deployed (this is an optional parameter and its default value is `default`) 30 | 31 | ### Triggering the upgrade 32 | 33 | The upgrade to Operator 0.1.3 can be triggered using the following command 34 | ``` 35 | helm upgrade [BOOKKEEPER_OPERATOR_RELEASE_NAME] pravega/bookkeeper-operator --version=0.1.3 --set webhookCert.certName=[CERT_NAME] --set webhookCert.secretName=[SECRET_NAME] 36 | ``` 37 | where: 38 | - `[CERT_NAME]` is the name of the certificate that has been created 39 | - `[SECRET_NAME]` is the name of the secret created by the above certificate 40 | -------------------------------------------------------------------------------- /doc/release_process.md: -------------------------------------------------------------------------------- 1 | # Bookkeeper Operator Release Process 2 | 3 | ## Release Versioning 4 | Bookkeeper Operator follows the [Semantic Versioning](https://semver.org/) model for numbering releases. 5 | 6 | ## Introduction 7 | This page documents the tagging, branching and release process followed for Bookkeeper Operator. 8 | 9 | ## Types of Releases 10 | 11 | ### Minor Release (Bug Fix release) 12 | 13 | This is a minor release with backward compatible changes and bug fixes. 14 | 15 | 1. Create a new branch with last number bumped up from the existing release branch. 16 | For example, if the existing release branch is 0.1.2, the new branch will be named 0.1.3. 17 | 18 | `$ git clone --branch git@github.com:pravega/bookkeeper-operator.git ` 19 | 20 | `$ git checkout -b ` 21 | 22 | 2. Cherry pick commits from master/private branches into the release branch. 23 | Change operator version in Version.go 24 | 25 | `$ git cherry-pick --signoff ` 26 | 27 | 3. Make sure all unit and end to end tests pass successfully. 28 | 29 | `$ make test` 30 | 31 | 4. Push changes to the newly created release branch. 32 | 33 | `$ git push origin ` 34 | 35 | 5. Create a new release candidate tag on this branch. 36 | Tag name should correspond to release-branch-name-. 37 | For example: `0.1.3-rc1` for the first release candidate. 38 | 39 | `$ git tag -a -m ""` 40 | 41 | `$ git push origin ` 42 | 43 | It is possible that a release candidate is problematic and we need to do a new release candidate. In this case, we need to repeat this tagging step as many times as needed. 44 | 45 | 6. Push docker image for release to docker hub pravega repo: 46 | 47 | `$ make build-image` 48 | 49 | `$ docker tag pravega/bookkeeper-operator:latest pravega/bookkeeper-operator:` 50 | 51 | `$ docker push pravega/bookkeeper-operator:` 52 | 53 | 7. Once a release candidate is tested and there are no more changes needed, push a final release tag and image (like `0.1.3`) 54 | 55 | 8. Release Notes 56 | 57 | ### Major Release (Feature + bugfixes) 58 | 59 | This has non backward compatible changes. 60 | Here, we bump up the middle or most significant digit from earlier release. 61 | Follow same steps as minor release. 62 | 63 | ## Reference 64 | https://github.com/pravega/pravega/wiki/How-to-release 65 | -------------------------------------------------------------------------------- /test/e2e/scale_test.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package e2e 12 | 13 | import ( 14 | . "github.com/onsi/ginkgo" 15 | . "github.com/onsi/gomega" 16 | 17 | bookkeeper_e2eutil "github.com/pravega/bookkeeper-operator/pkg/test/e2e/e2eutil" 18 | ) 19 | 20 | var _ = Describe("Scaling Cluster", func() { 21 | Context("Check Scale operations", func() { 22 | It("should scale without error", func() { 23 | 24 | cluster := bookkeeper_e2eutil.NewDefaultCluster(testNamespace) 25 | cluster.WithDefaults() 26 | 27 | bookkeeper, err := bookkeeper_e2eutil.CreateBKCluster(&t, k8sClient, cluster) 28 | Expect(err).NotTo(HaveOccurred()) 29 | 30 | err = bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, bookkeeper) 31 | Expect(err).NotTo(HaveOccurred()) 32 | 33 | // This is to get the latest Bookkeeper cluster object 34 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 35 | Expect(err).NotTo(HaveOccurred()) 36 | 37 | // Scale up Bookkeeper cluster 38 | bookkeeper.Spec.Replicas = 5 39 | 40 | err = bookkeeper_e2eutil.UpdateBKCluster(&t, k8sClient, bookkeeper) 41 | Expect(err).NotTo(HaveOccurred()) 42 | 43 | err = bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, bookkeeper) 44 | Expect(err).NotTo(HaveOccurred()) 45 | 46 | // This is to get the latest Bookkeeper cluster object 47 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 48 | Expect(err).NotTo(HaveOccurred()) 49 | 50 | // Scale down Bookkeeper cluster back to default 51 | bookkeeper.Spec.Replicas = 3 52 | 53 | err = bookkeeper_e2eutil.UpdateBKCluster(&t, k8sClient, bookkeeper) 54 | Expect(err).NotTo(HaveOccurred()) 55 | 56 | err = bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, bookkeeper) 57 | Expect(err).NotTo(HaveOccurred()) 58 | 59 | // Delete cluster 60 | err = bookkeeper_e2eutil.DeleteBKCluster(&t, k8sClient, bookkeeper) 61 | Expect(err).NotTo(HaveOccurred()) 62 | 63 | err = bookkeeper_e2eutil.WaitForBKClusterToTerminate(&t, k8sClient, bookkeeper) 64 | Expect(err).NotTo(HaveOccurred()) 65 | 66 | }) 67 | }) 68 | }) 69 | -------------------------------------------------------------------------------- /test/e2e/pod_deletion_test.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package e2e 12 | 13 | import ( 14 | "time" 15 | 16 | . "github.com/onsi/ginkgo" 17 | . "github.com/onsi/gomega" 18 | bookkeeper_e2eutil "github.com/pravega/bookkeeper-operator/pkg/test/e2e/e2eutil" 19 | ) 20 | 21 | var _ = Describe("Delete pod test", func() { 22 | Context("Delete pod operations", func() { 23 | It("should delete pods ", func() { 24 | 25 | cluster := bookkeeper_e2eutil.NewDefaultCluster(testNamespace) 26 | cluster.WithDefaults() 27 | 28 | bookkeeper, err := bookkeeper_e2eutil.CreateBKCluster(&t, k8sClient, cluster) 29 | Expect(err).NotTo(HaveOccurred()) 30 | 31 | err = bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, bookkeeper) 32 | Expect(err).NotTo(HaveOccurred()) 33 | 34 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 35 | Expect(err).NotTo(HaveOccurred()) 36 | 37 | podDeleteCount := 1 38 | err = bookkeeper_e2eutil.DeletePods(&t, k8sClient, bookkeeper, podDeleteCount) 39 | Expect(err).NotTo(HaveOccurred()) 40 | 41 | time.Sleep(10 * time.Second) 42 | err = bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, bookkeeper) 43 | Expect(err).NotTo(HaveOccurred()) 44 | 45 | podDeleteCount = 2 46 | err = bookkeeper_e2eutil.DeletePods(&t, k8sClient, bookkeeper, podDeleteCount) 47 | Expect(err).NotTo(HaveOccurred()) 48 | time.Sleep(10 * time.Second) 49 | 50 | err = bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, bookkeeper) 51 | Expect(err).NotTo(HaveOccurred()) 52 | 53 | podDeleteCount = 3 54 | err = bookkeeper_e2eutil.DeletePods(&t, k8sClient, bookkeeper, podDeleteCount) 55 | Expect(err).NotTo(HaveOccurred()) 56 | time.Sleep(10 * time.Second) 57 | 58 | err = bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, bookkeeper) 59 | Expect(err).NotTo(HaveOccurred()) 60 | 61 | err = bookkeeper_e2eutil.DeleteBKCluster(&t, k8sClient, bookkeeper) 62 | Expect(err).NotTo(HaveOccurred()) 63 | 64 | err = bookkeeper_e2eutil.WaitForBKClusterToTerminate(&t, k8sClient, bookkeeper) 65 | Expect(err).NotTo(HaveOccurred()) 66 | 67 | }) 68 | }) 69 | }) 70 | -------------------------------------------------------------------------------- /test/e2e/webhook_test.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2019 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | package e2e 11 | 12 | import ( 13 | . "github.com/onsi/ginkgo" 14 | . "github.com/onsi/gomega" 15 | bookkeeper_e2eutil "github.com/pravega/bookkeeper-operator/pkg/test/e2e/e2eutil" 16 | ) 17 | 18 | var _ = Describe("Webhook test", func() { 19 | Context("Webhook validation operations", func() { 20 | It("should throw proper error message with invalid config", func() { 21 | cluster := bookkeeper_e2eutil.NewDefaultCluster(testNamespace) 22 | cluster.WithDefaults() 23 | 24 | //Test webhook with an invalid Bookkeeper cluster version format 25 | invalidVersion := bookkeeper_e2eutil.NewClusterWithVersion(testNamespace, "999") 26 | invalidVersion.WithDefaults() 27 | _, err := bookkeeper_e2eutil.CreateBKCluster(&t, k8sClient, invalidVersion) 28 | Expect(err).To(HaveOccurred(), "Should reject deployment of invalid version format") 29 | Expect(err.Error()).To(ContainSubstring("request version is not in valid format:")) 30 | 31 | // Test webhook with a valid Bookkeeper cluster version format 32 | validVersion := bookkeeper_e2eutil.NewClusterWithVersion(testNamespace, "0.6.0") 33 | validVersion.WithDefaults() 34 | bookkeeper, err := bookkeeper_e2eutil.CreateBKCluster(&t, k8sClient, validVersion) 35 | Expect(err).NotTo(HaveOccurred()) 36 | 37 | err = bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, bookkeeper) 38 | Expect(err).NotTo(HaveOccurred()) 39 | 40 | // Try to downgrade the cluster 41 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 42 | Expect(err).NotTo(HaveOccurred()) 43 | bookkeeper.Spec.Version = "0.5.0" 44 | err = bookkeeper_e2eutil.UpdateBKCluster(&t, k8sClient, bookkeeper) 45 | Expect(err).To(HaveOccurred(), "Should not allow downgrade") 46 | Expect(err.Error()).To(ContainSubstring("downgrading the cluster from version 0.6.0 to 0.5.0 is not supported")) 47 | 48 | // Delete cluster 49 | err = bookkeeper_e2eutil.DeleteBKCluster(&t, k8sClient, bookkeeper) 50 | Expect(err).NotTo(HaveOccurred()) 51 | 52 | err = bookkeeper_e2eutil.WaitForBKClusterToTerminate(&t, k8sClient, bookkeeper) 53 | Expect(err).NotTo(HaveOccurred()) 54 | }) 55 | }) 56 | }) 57 | -------------------------------------------------------------------------------- /test/e2e/resources/zookeeper.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: zookeeper-operator 5 | 6 | --- 7 | 8 | kind: ClusterRole 9 | apiVersion: rbac.authorization.k8s.io/v1 10 | metadata: 11 | name: zookeeper-operator 12 | rules: 13 | - apiGroups: 14 | - zookeeper.pravega.io 15 | resources: 16 | - "*" 17 | verbs: 18 | - "*" 19 | - apiGroups: 20 | - "" 21 | resources: 22 | - pods 23 | - services 24 | - endpoints 25 | - persistentvolumeclaims 26 | - events 27 | - configmaps 28 | - secrets 29 | verbs: 30 | - "*" 31 | - apiGroups: 32 | - apps 33 | resources: 34 | - deployments 35 | - daemonsets 36 | - replicasets 37 | - statefulsets 38 | verbs: 39 | - "*" 40 | - apiGroups: 41 | - policy 42 | resources: 43 | - poddisruptionbudgets 44 | verbs: 45 | - "*" 46 | 47 | --- 48 | 49 | kind: ClusterRoleBinding 50 | apiVersion: rbac.authorization.k8s.io/v1 51 | metadata: 52 | name: zookeeper-operator-cluster-role-binding 53 | subjects: 54 | - kind: ServiceAccount 55 | name: zookeeper-operator 56 | namespace: default 57 | roleRef: 58 | kind: ClusterRole 59 | name: zookeeper-operator 60 | apiGroup: rbac.authorization.k8s.io 61 | 62 | --- 63 | 64 | apiVersion: apps/v1 65 | kind: Deployment 66 | metadata: 67 | name: zookeeper-operator 68 | spec: 69 | replicas: 1 70 | selector: 71 | matchLabels: 72 | name: zookeeper-operator 73 | template: 74 | metadata: 75 | labels: 76 | name: zookeeper-operator 77 | spec: 78 | serviceAccountName: zookeeper-operator 79 | containers: 80 | - name: zookeeper-operator 81 | image: pravega/zookeeper-operator:latest 82 | ports: 83 | - containerPort: 60000 84 | name: metrics 85 | command: 86 | - zookeeper-operator 87 | imagePullPolicy: IfNotPresent 88 | env: 89 | - name: WATCH_NAMESPACE 90 | value: "" 91 | - name: POD_NAME 92 | valueFrom: 93 | fieldRef: 94 | fieldPath: metadata.name 95 | - name: OPERATOR_NAME 96 | value: "zookeeper-operator" 97 | 98 | --- 99 | 100 | apiVersion: "zookeeper.pravega.io/v1beta1" 101 | kind: "ZookeeperCluster" 102 | metadata: 103 | name: "zookeeper" 104 | spec: 105 | image: 106 | repository: "pravega/zookeeper" 107 | tag: latest 108 | pullPolicy: IfNotPresent 109 | replicas: 1 110 | persistence: 111 | reclaimPolicy: Delete 112 | -------------------------------------------------------------------------------- /config/rbac/rbac.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: bookkeeper-operator 5 | 6 | --- 7 | 8 | kind: Role 9 | apiVersion: rbac.authorization.k8s.io/v1 10 | metadata: 11 | name: bookkeeper-operator 12 | rules: 13 | - apiGroups: 14 | - bookkeeper.pravega.io 15 | resources: 16 | - "*" 17 | verbs: 18 | - "*" 19 | - apiGroups: 20 | - "" 21 | resources: 22 | - pods 23 | - services 24 | - endpoints 25 | - persistentvolumeclaims 26 | - events 27 | - configmaps 28 | - secrets 29 | verbs: 30 | - '*' 31 | - apiGroups: 32 | - apps 33 | resources: 34 | - deployments 35 | - daemonsets 36 | - replicasets 37 | - statefulsets 38 | verbs: 39 | - "*" 40 | - apiGroups: 41 | - policy 42 | resources: 43 | - poddisruptionbudgets 44 | verbs: 45 | - "*" 46 | - apiGroups: 47 | - batch 48 | resources: 49 | - jobs 50 | verbs: 51 | - '*' 52 | 53 | --- 54 | 55 | kind: ClusterRole 56 | apiVersion: rbac.authorization.k8s.io/v1 57 | metadata: 58 | name: bookkeeper-operator 59 | rules: 60 | - apiGroups: 61 | - "" 62 | resources: 63 | - nodes 64 | - pods 65 | - services 66 | - endpoints 67 | - persistentvolumeclaims 68 | - events 69 | - configmaps 70 | - secrets 71 | verbs: 72 | - get 73 | - watch 74 | - list 75 | - create 76 | - update 77 | - delete 78 | - apiGroups: 79 | - admissionregistration.k8s.io 80 | resources: 81 | - "*" 82 | verbs: 83 | - '*' 84 | - apiGroups: 85 | - bookkeeper.pravega.io 86 | resources: 87 | - "*" 88 | verbs: 89 | - "*" 90 | - apiGroups: 91 | - policy 92 | resources: 93 | - poddisruptionbudgets 94 | verbs: 95 | - "*" 96 | - apiGroups: 97 | - apps 98 | resources: 99 | - deployments 100 | - daemonsets 101 | - replicasets 102 | - statefulsets 103 | verbs: 104 | - "*" 105 | 106 | --- 107 | 108 | kind: RoleBinding 109 | apiVersion: rbac.authorization.k8s.io/v1 110 | metadata: 111 | name: bookkeeper-operator 112 | subjects: 113 | - kind: ServiceAccount 114 | name: bookkeeper-operator 115 | roleRef: 116 | kind: Role 117 | name: bookkeeper-operator 118 | apiGroup: rbac.authorization.k8s.io 119 | 120 | --- 121 | 122 | kind: ClusterRoleBinding 123 | apiVersion: rbac.authorization.k8s.io/v1 124 | metadata: 125 | name: bookkeeper-operator 126 | subjects: 127 | - kind: ServiceAccount 128 | name: bookkeeper-operator 129 | namespace: default 130 | roleRef: 131 | kind: ClusterRole 132 | name: bookkeeper-operator 133 | apiGroup: rbac.authorization.k8s.io 134 | -------------------------------------------------------------------------------- /pkg/util/k8sutil.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package util 12 | 13 | import ( 14 | "fmt" 15 | 16 | corev1 "k8s.io/api/core/v1" 17 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 18 | ) 19 | 20 | func DownwardAPIEnv() []corev1.EnvVar { 21 | return []corev1.EnvVar{ 22 | { 23 | Name: "POD_NAME", 24 | ValueFrom: &corev1.EnvVarSource{ 25 | FieldRef: &corev1.ObjectFieldSelector{ 26 | APIVersion: "v1", 27 | FieldPath: "metadata.name", 28 | }, 29 | }, 30 | }, 31 | { 32 | Name: "POD_NAMESPACE", 33 | ValueFrom: &corev1.EnvVarSource{ 34 | FieldRef: &corev1.ObjectFieldSelector{ 35 | APIVersion: "v1", 36 | FieldPath: "metadata.namespace", 37 | }, 38 | }, 39 | }, 40 | } 41 | } 42 | 43 | func PodAntiAffinity(component string, clusterName string) *corev1.Affinity { 44 | return &corev1.Affinity{ 45 | PodAntiAffinity: &corev1.PodAntiAffinity{ 46 | PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ 47 | { 48 | Weight: 100, 49 | PodAffinityTerm: corev1.PodAffinityTerm{ 50 | LabelSelector: &metav1.LabelSelector{ 51 | MatchExpressions: []metav1.LabelSelectorRequirement{ 52 | { 53 | Key: "component", 54 | Operator: metav1.LabelSelectorOpIn, 55 | Values: []string{component}, 56 | }, 57 | { 58 | Key: "bookkeeper_cluster", 59 | Operator: metav1.LabelSelectorOpIn, 60 | Values: []string{clusterName}, 61 | }, 62 | }, 63 | }, 64 | TopologyKey: "kubernetes.io/hostname", 65 | }, 66 | }, 67 | }, 68 | }, 69 | } 70 | } 71 | 72 | func IsPodReady(pod *corev1.Pod) bool { 73 | for _, condition := range pod.Status.Conditions { 74 | if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { 75 | return true 76 | } 77 | } 78 | return false 79 | } 80 | 81 | func IsPodFaulty(pod *corev1.Pod) (bool, error) { 82 | if len(pod.Status.ContainerStatuses) > 0 && pod.Status.ContainerStatuses[0].State.Waiting != nil && (pod.Status.ContainerStatuses[0].State.Waiting.Reason == "ImagePullBackOff" || 83 | pod.Status.ContainerStatuses[0].State.Waiting.Reason == "CrashLoopBackOff") { 84 | return true, fmt.Errorf("pod %s update failed because of %s", pod.Name, pod.Status.ContainerStatuses[0].State.Waiting.Reason) 85 | } 86 | return false, nil 87 | } 88 | -------------------------------------------------------------------------------- /test/e2e/basic_test.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package e2e 12 | 13 | import ( 14 | "fmt" 15 | . "github.com/onsi/ginkgo" 16 | . "github.com/onsi/gomega" 17 | 18 | bookkeeper_e2eutil "github.com/pravega/bookkeeper-operator/pkg/test/e2e/e2eutil" 19 | ) 20 | 21 | // Test create and recreate a Bookkeeper cluster with the same name 22 | 23 | var _ = Describe("Test create and recreate Bookkeeper cluster with the same name", func() { 24 | Context("Check create/delete operations", func() { 25 | It("should create and delete operations should be successful", func() { 26 | By("create Bookkeeper cluster") 27 | defaultCluster := bookkeeper_e2eutil.NewDefaultCluster(testNamespace) 28 | defaultCluster.WithDefaults() 29 | defaultCluster.Spec.HeadlessSvcNameSuffix = "headlesssvc" 30 | 31 | bookkeeper, err := bookkeeper_e2eutil.CreateBKCluster(&t, k8sClient, defaultCluster) 32 | Expect(err).NotTo(HaveOccurred()) 33 | 34 | Expect(bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, defaultCluster)).NotTo(HaveOccurred()) 35 | 36 | // This is to get the latest Bookkeeper cluster object 37 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 38 | Expect(err).NotTo(HaveOccurred()) 39 | svcName := fmt.Sprintf("%s-headlesssvc", bookkeeper.Name) 40 | err = bookkeeper_e2eutil.CheckServiceExists(&t, k8sClient, bookkeeper, svcName) 41 | Expect(err).NotTo(HaveOccurred()) 42 | By("delete created Bookkeeper cluster") 43 | Expect(k8sClient.Delete(ctx, bookkeeper)).Should(Succeed()) 44 | Expect(bookkeeper_e2eutil.WaitForBKClusterToTerminate(&t, k8sClient, bookkeeper)).NotTo(HaveOccurred()) 45 | 46 | By("create Bookkeeper cluster with the same name") 47 | defaultCluster = bookkeeper_e2eutil.NewDefaultCluster(testNamespace) 48 | defaultCluster.WithDefaults() 49 | 50 | bookkeeper, err = bookkeeper_e2eutil.CreateBKCluster(&t, k8sClient, defaultCluster) 51 | Expect(err).NotTo(HaveOccurred()) 52 | Expect(bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, defaultCluster)).NotTo(HaveOccurred()) 53 | svcName = fmt.Sprintf("%s-bookie-headless", bookkeeper.Name) 54 | err = bookkeeper_e2eutil.CheckServiceExists(&t, k8sClient, bookkeeper, svcName) 55 | Expect(err).NotTo(HaveOccurred()) 56 | By("delete created Bookkeeper cluster") 57 | Expect(k8sClient.Delete(ctx, bookkeeper)).Should(Succeed()) 58 | Expect(bookkeeper_e2eutil.WaitForBKClusterToTerminate(&t, k8sClient, bookkeeper)).NotTo(HaveOccurred()) 59 | }) 60 | }) 61 | }) 62 | -------------------------------------------------------------------------------- /pkg/util/k8sutil_test.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | package util 11 | 12 | import ( 13 | . "github.com/onsi/ginkgo" 14 | . "github.com/onsi/gomega" 15 | v1 "k8s.io/api/core/v1" 16 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 17 | ) 18 | 19 | var _ = Describe("k8sutil", func() { 20 | Context("DownwardAPIEnv()", func() { 21 | env := DownwardAPIEnv() 22 | It("should not be nil", func() { 23 | Ω(env).ShouldNot(BeNil()) 24 | }) 25 | }) 26 | Context("PodAntiAffinity", func() { 27 | affinity := PodAntiAffinity("bookie", "bkcluster") 28 | It("should not be nil", func() { 29 | Ω(affinity).ShouldNot(BeNil()) 30 | }) 31 | 32 | }) 33 | Context("podReady", func() { 34 | var result, result1 bool 35 | BeforeEach(func() { 36 | testpod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Namespace: "default"}, Spec: v1.PodSpec{Containers: []v1.Container{{Image: "testimage"}}}, 37 | Status: v1.PodStatus{ 38 | Conditions: []v1.PodCondition{ 39 | { 40 | Type: v1.PodReady, 41 | Status: v1.ConditionTrue, 42 | }, 43 | }}, 44 | } 45 | testpod1 := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Namespace: "default"}, Spec: v1.PodSpec{Containers: []v1.Container{{Image: "testimage"}}}} 46 | result = IsPodReady(testpod) 47 | result1 = IsPodReady(testpod1) 48 | }) 49 | It("pod ready should be true", func() { 50 | Ω(result).To(Equal(true)) 51 | }) 52 | It("pod ready should be false", func() { 53 | Ω(result1).To(Equal(false)) 54 | }) 55 | }) 56 | Context("podFaulty", func() { 57 | var result, result1 bool 58 | BeforeEach(func() { 59 | testpod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Namespace: "default", Name: "test"}, Spec: v1.PodSpec{Containers: []v1.Container{{Image: "testimage"}}}, 60 | Status: v1.PodStatus{ 61 | ContainerStatuses: []v1.ContainerStatus{ 62 | { 63 | Name: "test", 64 | State: v1.ContainerState{ 65 | Waiting: &v1.ContainerStateWaiting{ 66 | Reason: "CrashLoopBackOff", 67 | }, 68 | }, 69 | }, 70 | }}, 71 | } 72 | testpod1 := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Namespace: "default", Name: "test"}, Spec: v1.PodSpec{Containers: []v1.Container{{Image: "testimage"}}}, 73 | Status: v1.PodStatus{ 74 | ContainerStatuses: []v1.ContainerStatus{ 75 | { 76 | Name: "test", 77 | State: v1.ContainerState{}, 78 | }, 79 | }}, 80 | } 81 | result, _ = IsPodFaulty(testpod) 82 | result1, _ = IsPodFaulty(testpod1) 83 | }) 84 | It("pod faulty should be true", func() { 85 | Ω(result).To(Equal(true)) 86 | }) 87 | It("pod faulty should be false", func() { 88 | Ω(result1).To(Equal(false)) 89 | }) 90 | }) 91 | 92 | }) 93 | -------------------------------------------------------------------------------- /pkg/util/zookeeper_util.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package util 12 | 13 | import ( 14 | "container/list" 15 | "fmt" 16 | "log" 17 | "strings" 18 | "time" 19 | 20 | "github.com/samuel/go-zookeeper/zk" 21 | ) 22 | 23 | const ( 24 | // Set in https://github.com/pravega/bookkeeper/blob/master/docker/bookkeeper/entrypoint.sh#L21 25 | PravegaPath = "pravega" 26 | ZkFinalizer = "cleanUpZookeeper" 27 | ) 28 | 29 | // Delete all znodes related to a specific Bookkeeper cluster 30 | func DeleteAllZnodes(uri string, namespace string, pravegaClusterName string) (err error) { 31 | zkUri := strings.Split(uri, ":") 32 | zkSvcName := "" 33 | zkSvcPort := "" 34 | if len(zkUri) >= 1 { 35 | zkSvcName = zkUri[0] 36 | if len(zkUri) == 1 { 37 | zkSvcPort = "2181" 38 | } else { 39 | zkSvcPort = zkUri[1] 40 | } 41 | } 42 | hostname := zkSvcName + "." + namespace + ".svc.cluster.local:" + zkSvcPort 43 | host := []string{hostname} 44 | conn, _, err := zk.Connect(host, time.Second*5) 45 | if err != nil { 46 | return fmt.Errorf("failed to connect to zookeeper (%s): %v", hostname, err) 47 | } 48 | defer conn.Close() 49 | 50 | root := fmt.Sprintf("/%s/%s", PravegaPath, pravegaClusterName) 51 | exist, _, err := conn.Exists(root) 52 | if err != nil { 53 | return fmt.Errorf("failed to check if zookeeper path exists: %v", err) 54 | } 55 | 56 | if exist { 57 | // Construct BFS tree to delete all znodes recursively 58 | tree, err := ListSubTreeBFS(conn, root) 59 | if err != nil { 60 | return fmt.Errorf("failed to construct BFS tree: %v", err) 61 | } 62 | 63 | for tree.Len() != 0 { 64 | err := conn.Delete(tree.Back().Value.(string), -1) 65 | if err != nil { 66 | return fmt.Errorf("failed to delete znode (%s): %v", tree.Back().Value.(string), err) 67 | } 68 | tree.Remove(tree.Back()) 69 | } 70 | log.Println("zookeeper metadata deleted") 71 | } else { 72 | log.Println("zookeeper metadata not found") 73 | } 74 | return nil 75 | } 76 | 77 | // Construct a BFS tree 78 | func ListSubTreeBFS(conn *zk.Conn, root string) (*list.List, error) { 79 | queue := list.New() 80 | tree := list.New() 81 | queue.PushBack(root) 82 | tree.PushBack(root) 83 | 84 | for { 85 | if queue.Len() == 0 { 86 | break 87 | } 88 | node := queue.Front() 89 | children, _, err := conn.Children(node.Value.(string)) 90 | if err != nil { 91 | return tree, err 92 | } 93 | 94 | for _, child := range children { 95 | childPath := fmt.Sprintf("%s/%s", node.Value.(string), child) 96 | queue.PushBack(childPath) 97 | tree.PushBack(childPath) 98 | } 99 | queue.Remove(node) 100 | } 101 | return tree, nil 102 | } 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bookkeeper Operator 2 | 3 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![GoDoc](https://godoc.org/github.com/pravega/bookkeeper-operator?status.svg)](https://godoc.org/github.com/pravega/bookkeeper-operator) [![Build Status](https://travis-ci.org/pravega/bookkeeper-operator.svg?branch=master)](https://travis-ci.org/pravega/bookkeeper-operator) [![Go Report](https://goreportcard.com/badge/github.com/pravega/bookkeeper-operator)](https://goreportcard.com/report/github.com/pravega/bookkeeper-operator) [![Version](https://img.shields.io/github/release/pravega/bookkeeper-operator.svg)](https://github.com/pravega/bookkeeper-operator/releases) 4 | 5 | ## Overview 6 | 7 | [Bookkeeper](https://bookkeeper.apache.org/) is a scalable, fault-tolerant, and low-latency storage service optimized for real-time workloads. 8 | 9 | The Bookkeeper Operator manages Bookkeeper clusters deployed to Kubernetes and automates tasks related to operating a Bookkeeper cluster.The operator itself is built with the [Operator framework](https://github.com/operator-framework/operator-sdk). 10 | 11 | ## Project status: alpha 12 | 13 | The project is currently alpha. While no breaking API changes are currently planned, we reserve the right to address bugs and change the API before the project is declared stable. 14 | 15 | ## Install the Operator 16 | 17 | To understand how to deploy a Bookkeeper Operator refer to [Operator Deployment](https://github.com/pravega/charts/tree/master/charts/bookkeeper-operator#deploying-bookkeeper-operator). 18 | 19 | ## Upgrade the Operator 20 | 21 | For upgrading the bookkeeper operator check the document on [Operator Upgrade](doc/operator-upgrade.md) 22 | 23 | ## Features 24 | 25 | - [x] [Create and destroy a Bookkeeper cluster](https://github.com/pravega/charts/tree/master/charts/bookkeeper#deploying-bookkeeper) 26 | - [x] [Resize cluster](https://github.com/pravega/charts/tree/master/charts/bookkeeper#updating-bookkeeper-cluster) 27 | - [x] [Rolling upgrades/Rollback](doc/upgrade-cluster.md) 28 | - [x] [Bookkeeper Configuration tuning](doc/configuration.md) 29 | - [x] Input validation 30 | 31 | ## Development 32 | 33 | Check out the [development guide](doc/development.md). 34 | 35 | ## Releases 36 | 37 | The latest Bookkeeper releases can be found on the [Github Release](https://github.com/pravega/bookkeeper-operator/releases) project page. 38 | 39 | ## Contributing and Community 40 | 41 | We thrive to build a welcoming and open community for anyone who wants to use the operator or contribute to it. [Here](https://github.com/pravega/bookkeeper-operator/wiki/Contributing) we describe how to contribute to bookkeepe operator. Contact the developers and community on [slack](https://pravega-io.slack.com/) ([signup](https://pravega-slack-invite.herokuapp.com/)) if you need any help. 42 | 43 | ## Troubleshooting 44 | 45 | Check out the [bookkeeper troubleshooting](doc/troubleshooting.md#bookkeeper-cluster-issues) for bookkeeper issues and for operator issues [operator troubleshooting](doc/troubleshooting.md#bookkeeper-operator-issues). 46 | 47 | ## License 48 | 49 | Bookkeeper Operator is under Apache 2.0 license. See the [LICENSE](https://github.com/pravega/bookkeeper-operator/blob/master/LICENSE) for details. 50 | -------------------------------------------------------------------------------- /test/e2e/resources/local-storage.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: local-path-storage 5 | --- 6 | apiVersion: v1 7 | kind: ServiceAccount 8 | metadata: 9 | name: local-path-provisioner-service-account 10 | namespace: local-path-storage 11 | --- 12 | apiVersion: rbac.authorization.k8s.io/v1 13 | kind: ClusterRole 14 | metadata: 15 | name: local-path-provisioner-role 16 | rules: 17 | - apiGroups: [""] 18 | resources: ["nodes", "persistentvolumeclaims"] 19 | verbs: ["get", "list", "watch"] 20 | - apiGroups: [""] 21 | resources: ["endpoints", "persistentvolumes", "pods"] 22 | verbs: ["*"] 23 | - apiGroups: [""] 24 | resources: ["events"] 25 | verbs: ["create", "patch"] 26 | - apiGroups: ["storage.k8s.io"] 27 | resources: ["storageclasses"] 28 | verbs: ["get", "list", "watch"] 29 | --- 30 | apiVersion: rbac.authorization.k8s.io/v1 31 | kind: ClusterRoleBinding 32 | metadata: 33 | name: local-path-provisioner-bind 34 | roleRef: 35 | apiGroup: rbac.authorization.k8s.io 36 | kind: ClusterRole 37 | name: local-path-provisioner-role 38 | subjects: 39 | - kind: ServiceAccount 40 | name: local-path-provisioner-service-account 41 | namespace: local-path-storage 42 | --- 43 | apiVersion: apps/v1 44 | kind: Deployment 45 | metadata: 46 | name: local-path-provisioner 47 | namespace: local-path-storage 48 | spec: 49 | replicas: 1 50 | selector: 51 | matchLabels: 52 | app: local-path-provisioner 53 | template: 54 | metadata: 55 | labels: 56 | app: local-path-provisioner 57 | spec: 58 | serviceAccountName: local-path-provisioner-service-account 59 | containers: 60 | - name: local-path-provisioner 61 | image: rancher/local-path-provisioner:v0.0.14 62 | imagePullPolicy: IfNotPresent 63 | command: 64 | - local-path-provisioner 65 | - --debug 66 | - start 67 | - --config 68 | - /etc/config/config.json 69 | volumeMounts: 70 | - name: config-volume 71 | mountPath: /etc/config/ 72 | env: 73 | - name: POD_NAMESPACE 74 | valueFrom: 75 | fieldRef: 76 | fieldPath: metadata.namespace 77 | volumes: 78 | - name: config-volume 79 | configMap: 80 | name: local-path-config 81 | --- 82 | apiVersion: storage.k8s.io/v1 83 | kind: StorageClass 84 | metadata: 85 | name: standard 86 | annotations: 87 | storageclass.kubernetes.io/is-default-class: "true" 88 | provisioner: rancher.io/local-path 89 | volumeBindingMode: WaitForFirstConsumer 90 | reclaimPolicy: Delete 91 | --- 92 | kind: ConfigMap 93 | apiVersion: v1 94 | metadata: 95 | name: local-path-config 96 | namespace: local-path-storage 97 | data: 98 | config.json: |- 99 | { 100 | "nodePathMap":[ 101 | { 102 | "node":"DEFAULT_PATH_FOR_NON_LISTED_NODES", 103 | "paths":["/data"] 104 | } 105 | ] 106 | } 107 | setup: |- 108 | #!/bin/sh 109 | path=$1 110 | mkdir -m 0777 -p ${path} 111 | teardown: |- 112 | #!/bin/sh 113 | path=$1 114 | rm -rf ${path} 115 | -------------------------------------------------------------------------------- /test/e2e/upgrade_test.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package e2e 12 | 13 | import ( 14 | "time" 15 | 16 | . "github.com/onsi/ginkgo" 17 | . "github.com/onsi/gomega" 18 | 19 | bookkeeper_e2eutil "github.com/pravega/bookkeeper-operator/pkg/test/e2e/e2eutil" 20 | ) 21 | 22 | var _ = Describe("Upgrade Cluster", func() { 23 | Context("upgrade operations", func() { 24 | It("upgrade pods shoould be successful", func() { 25 | cluster := bookkeeper_e2eutil.NewDefaultCluster(testNamespace) 26 | cluster.WithDefaults() 27 | initialVersion := "0.6.0" 28 | firstUpgradeVersion := "0.7.0" 29 | secondUpgradeVersion := "0.7.1" 30 | cluster.Spec.Version = initialVersion 31 | 32 | bookkeeper, err := bookkeeper_e2eutil.CreateBKCluster(&t, k8sClient, cluster) 33 | Expect(err).NotTo(HaveOccurred()) 34 | 35 | err = bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, bookkeeper) 36 | Expect(err).NotTo(HaveOccurred()) 37 | 38 | // This is to get the latest Bookkeeper cluster object 39 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 40 | Expect(err).NotTo(HaveOccurred()) 41 | Expect(bookkeeper.Status.CurrentVersion).To(Equal(initialVersion)) 42 | 43 | // This is to get the latest Bookkeeper cluster object 44 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 45 | Expect(err).NotTo(HaveOccurred()) 46 | 47 | bookkeeper.Spec.Version = firstUpgradeVersion 48 | err = bookkeeper_e2eutil.UpdateBKCluster(&t, k8sClient, bookkeeper) 49 | Expect(err).NotTo(HaveOccurred()) 50 | time.Sleep(2 * time.Second) 51 | 52 | // trigger another upgrade while this upgrade is happening- it should fail 53 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 54 | Expect(err).NotTo(HaveOccurred()) 55 | bookkeeper.Spec.Version = secondUpgradeVersion 56 | err = bookkeeper_e2eutil.UpdateBKCluster(&t, k8sClient, bookkeeper) 57 | Expect(err).To(HaveOccurred(), "Should reject upgrade request while upgrade is in progress") 58 | Expect(err.Error()).To(ContainSubstring("failed to process the request, cluster is upgrading")) 59 | 60 | err = bookkeeper_e2eutil.WaitForBKClusterToUpgrade(&t, k8sClient, bookkeeper, firstUpgradeVersion) 61 | Expect(err).NotTo(HaveOccurred()) 62 | 63 | // This is to get the latest Bookkeeper cluster object 64 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 65 | Expect(err).NotTo(HaveOccurred()) 66 | 67 | Expect(bookkeeper.Spec.Version).To(Equal(firstUpgradeVersion)) 68 | Expect(bookkeeper.Status.CurrentVersion).To(Equal(firstUpgradeVersion)) 69 | Expect(bookkeeper.Status.TargetVersion).To(Equal("")) 70 | 71 | // check version history 72 | Expect(bookkeeper.Status.VersionHistory[0]).To(Equal("0.6.0")) 73 | Expect(bookkeeper.Status.VersionHistory[1]).To(Equal("0.7.0")) 74 | 75 | // Delete cluster 76 | err = bookkeeper_e2eutil.DeleteBKCluster(&t, k8sClient, bookkeeper) 77 | Expect(err).NotTo(HaveOccurred()) 78 | 79 | err = bookkeeper_e2eutil.WaitForBKClusterToTerminate(&t, k8sClient, bookkeeper) 80 | Expect(err).NotTo(HaveOccurred()) 81 | 82 | }) 83 | }) 84 | }) 85 | -------------------------------------------------------------------------------- /doc/webhook.md: -------------------------------------------------------------------------------- 1 | ## Admission Webhook 2 | 3 | [Admission webhooks](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/) are HTTP callbacks that receive admission requests and do something with them. 4 | There are two webhooks [ValidatingAdmissionWebhook](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#validatingadmissionwebhook) and 5 | [MutatingAdmissionWebhook](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#mutatingadmissionwebhook) which are basically doing the same thing except MutatingAdmissionWebhook can modify the requests. In our case, we are using a ValidatingAdmissionWebhook so that it can reject requests to enforce custom policies (which in our case is to ensure that the user is unable to install an invalid bookkeeper version or upgrade to any unsupported bookkeeper version). 6 | 7 | In the bookkeeper operator repo, we are leveraging the webhook implementation from controller-runtime package, here is the [GoDoc](https://godoc.org/sigs.k8s.io/controller-runtime/pkg/webhook). 8 | 9 | If you want to implement admission webhooks for your CRD, the only thing you need to do is to implement the `Defaulter` and (or) the `Validator` interface. Kubebuilder takes care of the rest for you, such as: 10 | - Creating the webhook server. 11 | - Ensuring the server has been added in the manager. 12 | - Creating handlers for your webhooks. 13 | - Registering each handler with a path in your server. 14 | The webhook server registers webhook configuration with the apiserver and creates an HTTP server to route requests to the handlers. 15 | The server is behind a Kubernetes Service and provides a certificate to the apiserver when serving requests. 16 | The kubebuilder has a detailed instruction of building a webhook, see [here](https://book.kubebuilder.io/cronjob-tutorial/webhook-implementation.html) 17 | 18 | The webhook feature itself is enabled by default but it can be disabled if `webhook=false` is specified when installing the 19 | operator locally using `operator-sdk run --local`. E.g. `operator-sdk run --local --operator-flags -webhook=false`. The use case of this is that webhook needs to be disabled when developing the operator locally since webhook can only be deployed in Kubernetes environment. 20 | 21 | ### How to deploy 22 | The ValidatingAdmissionWebhook and the webhook service should be deployed using the provided manifest `webhook.yaml` while deploying the Bookkeeper Operator. However, there are some configurations that are necessary to make webhook work. 23 | 24 | 1. Permission 25 | 26 | It is necessary to have permissions for `admissionregistration.k8s.io/v1beta1` resource to configure the webhook. The below is 27 | an example of the additional permission 28 | ``` 29 | - apiGroups: 30 | - admissionregistration.k8s.io 31 | resources: 32 | - validatingwebhookconfigurations 33 | verbs: 34 | - '*' 35 | ``` 36 | 37 | 2. Webhook service label selector 38 | 39 | The webhook will deploy a Kubernetes service. This service will need to select the operator pod as its backend. 40 | The way to select is using Kubernetes label selector and user will need to specify `"component": "bookkeeper-operator"` as the label 41 | when deploying the Bookkeeper operator deployment. 42 | 43 | ### What it does 44 | The webhook maintains a compatibility matrix of the Bookkeeper versions. Requests will be rejected if the version is not valid or not upgrade compatible with the current running version. Also, all the upgrade requests will be rejected if the current cluster is in upgrade status. 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | 3 | # Temporary Build Files 4 | tmp/_output/ 5 | tmp/_test 6 | 7 | # Created by https://www.gitignore.io/api/go,vim,emacs,visualstudiocode,intellij 8 | 9 | ### Emacs ### 10 | # -*- mode: gitignore; -*- 11 | *~ 12 | \#*\# 13 | /.emacs.desktop 14 | /.emacs.desktop.lock 15 | *.elc 16 | auto-save-list 17 | tramp 18 | .\#* 19 | 20 | # Org-mode 21 | .org-id-locations 22 | *_archive 23 | 24 | # flymake-mode 25 | *_flymake.* 26 | 27 | # eshell files 28 | /eshell/history 29 | /eshell/lastdir 30 | 31 | # elpa packages 32 | /elpa/ 33 | 34 | # reftex files 35 | *.rel 36 | 37 | # AUCTeX auto folder 38 | /auto/ 39 | 40 | # cask packages 41 | .cask/ 42 | dist/ 43 | 44 | # Flycheck 45 | flycheck_*.el 46 | 47 | # server auth directory 48 | /server/ 49 | 50 | # projectiles files 51 | .projectile 52 | 53 | # directory configuration 54 | .dir-locals.el 55 | 56 | ### Go ### 57 | # Binaries for programs and plugins 58 | *.exe 59 | *.exe~ 60 | *.dll 61 | *.so 62 | *.dylib 63 | 64 | # Test binary, build with `go test -c` 65 | *.test 66 | 67 | # Output of the go coverage tool, specifically when used with LiteIDE 68 | *.out 69 | 70 | ### Intellij ### 71 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 72 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 73 | 74 | # User-specific stuff 75 | .idea/**/vcs.xml 76 | .idea/**/workspace.xml 77 | .idea/**/tasks.xml 78 | .idea/**/usage.statistics.xml 79 | .idea/**/dictionaries 80 | .idea/**/shelf 81 | 82 | # Sensitive or high-churn files 83 | .idea/**/dataSources/ 84 | .idea/**/dataSources.ids 85 | .idea/**/dataSources.local.xml 86 | .idea/**/sqlDataSources.xml 87 | .idea/**/dynamic.xml 88 | .idea/**/uiDesigner.xml 89 | .idea/**/dbnavigator.xml 90 | 91 | # Gradle 92 | .idea/**/gradle.xml 93 | .idea/**/libraries 94 | 95 | # CMake 96 | cmake-build-*/ 97 | 98 | # Mongo Explorer plugin 99 | .idea/**/mongoSettings.xml 100 | 101 | # File-based project format 102 | *.iws 103 | 104 | # IntelliJ 105 | out/ 106 | 107 | # mpeltonen/sbt-idea plugin 108 | .idea_modules/ 109 | 110 | # JIRA plugin 111 | atlassian-ide-plugin.xml 112 | 113 | # Cursive Clojure plugin 114 | .idea/replstate.xml 115 | 116 | # Crashlytics plugin (for Android Studio and IntelliJ) 117 | com_crashlytics_export_strings.xml 118 | crashlytics.properties 119 | crashlytics-build.properties 120 | fabric.properties 121 | 122 | # Editor-based Rest Client 123 | .idea/httpRequests 124 | 125 | ### Intellij Patch ### 126 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 127 | 128 | # *.iml 129 | # modules.xml 130 | # .idea/misc.xml 131 | # *.ipr 132 | 133 | # Sonarlint plugin 134 | .idea/sonarlint 135 | 136 | ### Vim ### 137 | # Swap 138 | [._]*.s[a-v][a-z] 139 | [._]*.sw[a-p] 140 | [._]s[a-rt-v][a-z] 141 | [._]ss[a-gi-z] 142 | [._]sw[a-p] 143 | 144 | # Session 145 | Session.vim 146 | 147 | # Temporary 148 | .netrwhist 149 | # Auto-generated tag files 150 | tags 151 | # Persistent undo 152 | [._]*.un~ 153 | 154 | ### VisualStudioCode ### 155 | .vscode/* 156 | !.vscode/settings.json 157 | !.vscode/tasks.json 158 | !.vscode/launch.json 159 | !.vscode/extensions.json 160 | 161 | 162 | # End of https://www.gitignore.io/api/go,vim,emacs,visualstudiocode,intellij 163 | 164 | bin/ 165 | 166 | # Ignore YAML files in the project root directory 167 | /*.yaml 168 | 169 | # Operator SDK 170 | deploy/test 171 | deploy/test-pod.yaml 172 | build/test-framework/ 173 | -------------------------------------------------------------------------------- /doc/troubleshooting.md: -------------------------------------------------------------------------------- 1 | # Troubleshooting 2 | 3 | ## Bookkeeper Cluster Issues 4 | 5 | * [Certificate Error: Internal error occurred: failed calling webhook](#certificate-error-internal-error-occurred-failed-calling-webhook) 6 | * [Invalid Cookie Exception](#invalid-cookie-exception) 7 | * [Unrecognized VM option](#unrecognized-vm-option) 8 | 9 | ## Bookkeeper operator Issues 10 | * [Operator pod in container creating state](#operator-pod-in-container-creating-state) 11 | 12 | ## Certificate Error: Internal error occurred: failed calling webhook 13 | 14 | While installing bookkeeper, if we get the error as below, 15 | ``` 16 | helm repo add pravega https://charts.pravega.io 17 | helm install [RELEASE_NAME] pravega/bookkeeper 18 | Error: Post https://bookkeeper-webhook-svc.default.svc:443/validate-bookkeeper-pravega-io-v1alpha1-bookkeepercluster?timeout=30s: x509: certificate signed by unknown authority 19 | ``` 20 | We need to ensure that certificates are installed before installing the operator. Please refer to the [prerequisites](https://github.com/pravega/charts/tree/master/charts/bookkeeper-operator#prerequisites) 21 | 22 | ## Invalid Cookie Exception 23 | 24 | While installing bookkeeper, if the pods are not coming to ready state `1/1` and in the bookie logs if the error messages are seen as below, 25 | 26 | ``` 27 | 2020-06-26 09:03:34,893 - ERROR - [main:Main@223] - Failed to build bookie server 28 | org.apache.bookkeeper.bookie.BookieException$InvalidCookieException: 29 | at org.apache.bookkeeper.bookie.Bookie.checkEnvironmentWithStorageExpansion(Bookie.java:470) 30 | at org.apache.bookkeeper.bookie.Bookie.checkEnvironment(Bookie.java:252) 31 | at org.apache.bookkeeper.bookie.Bookie.(Bookie.java:691) 32 | at org.apache.bookkeeper.proto.BookieServer.newBookie(BookieServer.java:137) 33 | at org.apache.bookkeeper.proto.BookieServer.(BookieServer.java:106) 34 | at org.apache.bookkeeper.server.service.BookieService.(BookieService.java:43) 35 | at org.apache.bookkeeper.server.Main.buildBookieServer(Main.java:301) 36 | at org.apache.bookkeeper.server.Main.doMain(Main.java:221) 37 | at org.apache.bookkeeper.server.Main.main(Main.java:203) 38 | ``` 39 | 40 | we need to ensure that znode entries are cleaned up from previous installation. This can be done by either cleaning up znode entries from zookeeper nodes or by completely reinstalling zookeeper. 41 | 42 | ## Unrecognized VM option 43 | 44 | While installing bookkeeper, if the pods don't come up to ready state and the logs contain the error shown below 45 | 46 | ``` 47 | Unrecognized VM option 'PrintGCDateStamps' 48 | Error: Could not create the Java Virtual Machine. 49 | Error: A fatal exception has occurred. Program will exit. 50 | ``` 51 | This is happening because some of default JVM options added by the operator are not supported by Java version used by bookkeeper. This issue can therefore be resolved by setting an additional JVM option `IgnoreUnrecognizedVMOptions` while installing the bookkeeper cluster as shown below. 52 | 53 | ``` 54 | helm repo add pravega https://charts.pravega.io 55 | helm install [RELEASE_NAME] pravega/bookkeeper --version=[VERSION] --set zookeeperUri=[ZOOKEEPER_HOST] --set 'jvmOptions.extraOpts={-XX:+IgnoreUnrecognizedVMOptions}' 56 | ``` 57 | 58 | ## Operator pod in container creating state 59 | 60 | While installing operator, if the operator pod goes in `ContainerCreating` state for long time, make sure certificates are installed correctly. Please refer to the [prerequisites](https://github.com/pravega/charts/tree/master/charts/bookkeeper-operator#prerequisites) 61 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/pravega/bookkeeper-operator 2 | 3 | go 1.21 4 | 5 | require ( 6 | github.com/google/go-cmp v0.5.6 // indirect 7 | github.com/hashicorp/go-version v1.2.0 8 | github.com/onsi/ginkgo v1.16.5 9 | github.com/onsi/gomega v1.17.0 10 | github.com/operator-framework/operator-lib v0.6.0 11 | github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da 12 | github.com/sirupsen/logrus v1.8.1 13 | k8s.io/api v0.23.0 14 | k8s.io/apimachinery v0.23.0 15 | k8s.io/client-go v0.23.0 16 | sigs.k8s.io/controller-runtime v0.11.0 17 | ) 18 | 19 | require ( 20 | cloud.google.com/go v0.81.0 // indirect 21 | github.com/Azure/go-autorest v14.2.0+incompatible // indirect 22 | github.com/Azure/go-autorest/autorest v0.11.18 // indirect 23 | github.com/Azure/go-autorest/autorest/adal v0.9.13 // indirect 24 | github.com/Azure/go-autorest/autorest/date v0.3.0 // indirect 25 | github.com/Azure/go-autorest/logger v0.2.1 // indirect 26 | github.com/Azure/go-autorest/tracing v0.6.0 // indirect 27 | github.com/beorn7/perks v1.0.1 // indirect 28 | github.com/cespare/xxhash/v2 v2.1.1 // indirect 29 | github.com/davecgh/go-spew v1.1.1 // indirect 30 | github.com/evanphx/json-patch v4.12.0+incompatible // indirect 31 | github.com/form3tech-oss/jwt-go v3.2.3+incompatible // indirect 32 | github.com/fsnotify/fsnotify v1.5.1 // indirect 33 | github.com/go-logr/logr v1.2.0 // indirect 34 | github.com/go-logr/zapr v1.2.0 // indirect 35 | github.com/gogo/protobuf v1.3.2 // indirect 36 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 37 | github.com/golang/protobuf v1.5.2 // indirect 38 | github.com/google/gofuzz v1.1.0 // indirect 39 | github.com/google/uuid v1.1.2 // indirect 40 | github.com/googleapis/gnostic v0.5.5 // indirect 41 | github.com/imdario/mergo v0.3.12 // indirect 42 | github.com/json-iterator/go v1.1.12 // indirect 43 | github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect 44 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 45 | github.com/modern-go/reflect2 v1.0.2 // indirect 46 | github.com/nxadm/tail v1.4.8 // indirect 47 | github.com/pkg/errors v0.9.1 // indirect 48 | github.com/prometheus/client_golang v1.11.1 // indirect 49 | github.com/prometheus/client_model v0.2.0 // indirect 50 | github.com/prometheus/common v0.28.0 // indirect 51 | github.com/prometheus/procfs v0.6.0 // indirect 52 | github.com/spf13/pflag v1.0.5 // indirect 53 | go.uber.org/atomic v1.7.0 // indirect 54 | go.uber.org/multierr v1.6.0 // indirect 55 | go.uber.org/zap v1.19.1 // indirect 56 | golang.org/x/crypto v0.17.0 // indirect 57 | golang.org/x/net v0.17.0 // indirect 58 | golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f // indirect 59 | golang.org/x/sys v0.15.0 // indirect 60 | golang.org/x/term v0.15.0 // indirect 61 | golang.org/x/text v0.14.0 // indirect 62 | golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac // indirect 63 | gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect 64 | google.golang.org/appengine v1.6.7 // indirect 65 | google.golang.org/protobuf v1.27.1 // indirect 66 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect 67 | gopkg.in/inf.v0 v0.9.1 // indirect 68 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect 69 | gopkg.in/yaml.v2 v2.4.0 // indirect 70 | gopkg.in/yaml.v3 v3.0.0 // indirect 71 | k8s.io/apiextensions-apiserver v0.23.0 // indirect 72 | k8s.io/component-base v0.23.0 // indirect 73 | k8s.io/klog/v2 v2.30.0 // indirect 74 | k8s.io/kube-openapi v0.0.0-20211115234752-e816edb12b65 // indirect 75 | k8s.io/utils v0.0.0-20210930125809-cb0fa318a74b // indirect 76 | sigs.k8s.io/json v0.0.0-20211020170558-c049b76a60c6 // indirect 77 | sigs.k8s.io/structured-merge-diff/v4 v4.2.0 // indirect 78 | sigs.k8s.io/yaml v1.3.0 // indirect 79 | ) 80 | -------------------------------------------------------------------------------- /test/e2e/suite_test.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (&the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package e2e 12 | 13 | import ( 14 | "context" 15 | 16 | api "github.com/pravega/bookkeeper-operator/api/v1alpha1" 17 | bookkeeperv1alpha1 "github.com/pravega/bookkeeper-operator/api/v1alpha1" 18 | bookkeepercontroller "github.com/pravega/bookkeeper-operator/controllers" 19 | 20 | "k8s.io/client-go/kubernetes/scheme" 21 | _ "k8s.io/client-go/plugin/pkg/client/auth/oidc" 22 | "k8s.io/client-go/rest" 23 | "os" 24 | ctrl "sigs.k8s.io/controller-runtime" 25 | "sigs.k8s.io/controller-runtime/pkg/cache" 26 | "sigs.k8s.io/controller-runtime/pkg/client" 27 | "sigs.k8s.io/controller-runtime/pkg/envtest" 28 | logf "sigs.k8s.io/controller-runtime/pkg/log" 29 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 30 | "testing" 31 | 32 | . "github.com/onsi/ginkgo" 33 | . "github.com/onsi/gomega" 34 | ) 35 | 36 | var ( 37 | cfg *rest.Config 38 | k8sClient client.Client // You'll be using this client in your tests. 39 | testEnv *envtest.Environment 40 | ctx context.Context 41 | cancel context.CancelFunc 42 | testNamespace = "default" 43 | t testing.T 44 | ) 45 | 46 | func TestAPIs(t *testing.T) { 47 | RegisterFailHandler(Fail) 48 | RunSpecs(t, "Controller e2e Suite") 49 | } 50 | 51 | var _ = BeforeSuite(func() { 52 | logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) 53 | 54 | ctx, cancel = context.WithCancel(context.TODO()) 55 | 56 | enabled := true 57 | By("bootstrapping test environment") 58 | testEnv = &envtest.Environment{ 59 | Config: cfg, 60 | UseExistingCluster: &enabled, 61 | } 62 | 63 | /* 64 | Then, we start the envtest cluster. 65 | */ 66 | cfg, err := testEnv.Start() 67 | Expect(err).NotTo(HaveOccurred()) 68 | Expect(cfg).NotTo(BeNil()) 69 | 70 | err = bookkeeperv1alpha1.AddToScheme(scheme.Scheme) 71 | Expect(err).NotTo(HaveOccurred()) 72 | 73 | /* 74 | After the schemas, you will see the following marker. 75 | This marker is what allows new schemas to be added here automatically when a new API is added to the project. 76 | */ 77 | 78 | //+kubebuilder:scaffold:scheme 79 | 80 | /* 81 | A client is created for our test CRUD operations. 82 | */ 83 | k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) 84 | Expect(err).NotTo(HaveOccurred()) 85 | Expect(k8sClient).NotTo(BeNil()) 86 | 87 | if os.Getenv("RUN_LOCAL") == "true" { 88 | k8sManager, err := ctrl.NewManager(cfg, ctrl.Options{ 89 | Scheme: scheme.Scheme, 90 | Namespace: testNamespace, 91 | Port: 9443, 92 | NewCache: cache.MultiNamespacedCacheBuilder([]string{testNamespace}), 93 | }) 94 | Expect(err).ToNot(HaveOccurred()) 95 | 96 | err = (&bookkeepercontroller.BookkeeperClusterReconciler{ 97 | Client: k8sManager.GetClient(), 98 | Scheme: k8sManager.GetScheme(), 99 | }).SetupWithManager(k8sManager) 100 | Expect(err).ToNot(HaveOccurred()) 101 | 102 | go func() { 103 | defer GinkgoRecover() 104 | err = k8sManager.Start(ctrl.SetupSignalHandler()) 105 | Expect(err).ToNot(HaveOccurred(), "failed to run manager") 106 | }() 107 | } 108 | 109 | }, 60) 110 | 111 | /* 112 | Kubebuilder also generates boilerplate functions for cleaning up envtest and actually running your test files in your controllers/ directory. 113 | You won't need to touch these. 114 | */ 115 | 116 | var _ = AfterSuite(func() { 117 | cancel() 118 | By("tearing down the test environment") 119 | err := testEnv.Stop() 120 | Expect(err).NotTo(HaveOccurred()) 121 | }) 122 | 123 | var _ = AfterEach(func() { 124 | bkList := &api.BookkeeperClusterList{} 125 | listOptions := []client.ListOption{ 126 | client.InNamespace(testNamespace), 127 | } 128 | Expect(k8sClient.List(ctx, bkList, listOptions...)).NotTo(HaveOccurred()) 129 | for _, bk := range bkList.Items { 130 | Expect(k8sClient.Delete(ctx, &bk)).NotTo(HaveOccurred()) 131 | } 132 | }) 133 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | SHELL=/bin/bash -o pipefail 10 | CRD_OPTIONS ?= "crd" 11 | 12 | PROJECT_NAME=bookkeeper-operator 13 | REPO=pravega/$(PROJECT_NAME) 14 | BASE_VERSION=0.1.9 15 | ID=$(shell git rev-list HEAD --count) 16 | GIT_SHA=$(shell git rev-parse --short HEAD) 17 | VERSION=$(BASE_VERSION)-$(ID)-$(GIT_SHA) 18 | GOOS=linux 19 | GOARCH=amd64 20 | TEST_REPO=testbkop/$(PROJECT_NAME) 21 | DOCKER_TEST_PASS=testbkop@123 22 | DOCKER_TEST_USER=testbkop 23 | TEST_IMAGE=$(TEST_REPO)-testimages:$(VERSION) 24 | 25 | .PHONY: all build check clean test 26 | # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) 27 | ifeq (,$(shell go env GOBIN)) 28 | GOBIN=$(shell go env GOPATH)/bin 29 | else 30 | GOBIN=$(shell go env GOBIN) 31 | endif 32 | 33 | all: check build test 34 | 35 | build: build-go build-image 36 | 37 | build-go: 38 | CGO_ENABLED=0 GOOS=$(GOOS) GOARCH=$(GOARCH) go build \ 39 | -ldflags "-X github.com/$(REPO)/pkg/version.Version=$(VERSION) -X github.com/$(REPO)/pkg/version.GitSHA=$(GIT_SHA)" \ 40 | -o bin/$(PROJECT_NAME) main.go 41 | 42 | ## Location to install dependencies to 43 | LOCALBIN ?= $(shell pwd)/bin 44 | $(LOCALBIN): 45 | mkdir -p $(LOCALBIN) 46 | ## Tool Binaries 47 | KUSTOMIZE ?= $(LOCALBIN)/kustomize 48 | CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen 49 | ## Tool Versions 50 | KUSTOMIZE_VERSION ?= v3.5.4 51 | CONTROLLER_TOOLS_VERSION ?= v0.9.0 52 | KUSTOMIZE_INSTALL_SCRIPT ?= "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" 53 | .PHONY: kustomize 54 | kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary. 55 | $(KUSTOMIZE): $(LOCALBIN) 56 | test -s $(LOCALBIN)/kustomize || { curl -s $(KUSTOMIZE_INSTALL_SCRIPT) | bash -s -- $(subst v,,$(KUSTOMIZE_VERSION)) $(LOCALBIN); } 57 | .PHONY: controller-gen 58 | controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary. 59 | $(CONTROLLER_GEN): $(LOCALBIN) 60 | test -s $(LOCALBIN)/controller-gen || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-tools/cmd/controller-gen@$(CONTROLLER_TOOLS_VERSION) 61 | 62 | 63 | 64 | build-image: 65 | echo "$(REPO)" 66 | docker build --no-cache --build-arg VERSION=$(VERSION) --build-arg DOCKER_REGISTRY=$(DOCKER_REGISTRY) --build-arg GIT_SHA=$(GIT_SHA) -t $(REPO):$(VERSION) . 67 | docker tag $(REPO):$(VERSION) $(REPO):latest 68 | 69 | test: test-unit test-e2e 70 | 71 | test-unit: 72 | go test $$(go list ./... | grep -v /vendor/ | grep -v /test/e2e ) -race -coverprofile=coverage.txt -covermode=atomic 73 | 74 | # Generate manifests e.g. CRD, RBAC etc. 75 | manifests: controller-gen 76 | $(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=manager-role webhook paths="./..." output:crd:artifacts:config=config/crd/bases 77 | 78 | # Deploy controller in the configured Kubernetes cluster in ~/.kube/config 79 | deploy: manifests kustomize 80 | cd config/manager && $(KUSTOMIZE) edit set image pravega/bookkeeper-operator=$(TEST_IMAGE) 81 | $(KUSTOMIZE) build config/default | kubectl apply -f - 82 | 83 | 84 | # Undeploy controller in the configured Kubernetes cluster in ~/.kube/config 85 | undeploy: 86 | $(KUSTOMIZE) build config/default | kubectl delete -f - 87 | 88 | test-e2e: test-e2e-remote 89 | 90 | test-e2e-remote: 91 | make login 92 | docker build . -t $(TEST_IMAGE) 93 | docker push $(TEST_IMAGE) 94 | make deploy 95 | RUN_LOCAL=false go test -v -timeout 2h ./test/e2e... 96 | make undeploy 97 | 98 | login: 99 | echo "$(DOCKER_TEST_PASS)" | docker login -u "$(DOCKER_TEST_USER)" --password-stdin 100 | 101 | test-e2e-local: 102 | operator-sdk test local ./test/e2e --namespace default --up-local --go-test-flags "-v -timeout 0" 103 | 104 | run-local: 105 | operator-sdk up local --operator-flags -webhook=false 106 | 107 | login: 108 | echo "$(DOCKER_TEST_PASS)" | docker login -u "$(DOCKER_TEST_USER)" --password-stdin 109 | 110 | push: build login 111 | docker push $(REPO):$(VERSION) 112 | if [[ ${TRAVIS_TAG} =~ ^([0-9]+\.[0-9]+\.[0-9]+)$$ ]]; then docker push $(REPO):latest; fi; 113 | 114 | clean: 115 | rm -f bin/$(PROJECT_NAME) 116 | 117 | check: check-format check-license 118 | 119 | check-format: 120 | ./scripts/check_format.sh 121 | 122 | check-license: 123 | ./scripts/check_license.sh 124 | -------------------------------------------------------------------------------- /test/e2e/cmchanges_test.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package e2e 12 | 13 | import ( 14 | "strings" 15 | 16 | . "github.com/onsi/ginkgo" 17 | . "github.com/onsi/gomega" 18 | bookkeeper_e2eutil "github.com/pravega/bookkeeper-operator/pkg/test/e2e/e2eutil" 19 | ) 20 | 21 | var _ = Describe("Conigmap upadate tets", func() { 22 | Context("Check configmap update operations", func() { 23 | It("Verify configmap updations are valid", func() { 24 | cluster := bookkeeper_e2eutil.NewDefaultCluster(testNamespace) 25 | cluster.WithDefaults() 26 | initialVersion := "0.6.0" 27 | upgradeVersion := "0.7.0" 28 | gcOpts := []string{"-XX:+UseG1GC", "-XX:MaxGCPauseMillis=10"} 29 | gcOptions := strings.Join(gcOpts, " ") 30 | cluster.Spec.Version = initialVersion 31 | cluster.Spec.Options["minorCompactionThreshold"] = "0.4" 32 | cluster.Spec.Options["journalDirectories"] = "/bk/journal" 33 | cluster.Spec.Options["useHostNameAsBookieID"] = "true" 34 | cluster.Spec.JVMOptions.GcOpts = gcOpts 35 | 36 | bookkeeper, err := bookkeeper_e2eutil.CreateBKCluster(&t, k8sClient, cluster) 37 | Expect(err).NotTo(HaveOccurred()) 38 | 39 | Expect(bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, cluster)).NotTo(HaveOccurred()) 40 | 41 | // This is to get the latest Bookkeeper cluster object 42 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 43 | Expect(err).NotTo(HaveOccurred()) 44 | 45 | err = bookkeeper_e2eutil.CheckConfigMap(&t, k8sClient, bookkeeper, "BOOKIE_GC_OPTS", gcOptions) 46 | Expect(err).NotTo(HaveOccurred()) 47 | 48 | // updating modifiable bookkeeper option 49 | gcOpts = []string{"-XX:-UseParallelGC", "-XX:MaxGCPauseMillis=10"} 50 | gcOptions = strings.Join(gcOpts, " ") 51 | bookkeeper.Spec.Version = upgradeVersion 52 | bookkeeper.Spec.Options["minorCompactionThreshold"] = "0.5" 53 | bookkeeper.Spec.JVMOptions.GcOpts = gcOpts 54 | 55 | // updating bookkeepercluster 56 | err = bookkeeper_e2eutil.UpdateBKCluster(&t, k8sClient, bookkeeper) 57 | Expect(err).NotTo(HaveOccurred()) 58 | 59 | // checking if the upgrade of options was successful 60 | err = bookkeeper_e2eutil.WaitForCMBKClusterToUpgrade(&t, k8sClient, bookkeeper) 61 | Expect(err).NotTo(HaveOccurred()) 62 | 63 | // This is to get the latest Bookkeeper cluster object 64 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 65 | Expect(err).NotTo(HaveOccurred()) 66 | err = bookkeeper_e2eutil.CheckConfigMap(&t, k8sClient, bookkeeper, "BOOKIE_GC_OPTS", gcOptions) 67 | Expect(err).NotTo(HaveOccurred()) 68 | Expect(bookkeeper.Spec.Version).To(Equal(upgradeVersion)) 69 | Expect(bookkeeper.Spec.Options["minorCompactionThreshold"]).To(Equal("0.5")) 70 | 71 | // updating non-modifiable bookkeeper option journalDirectories 72 | bookkeeper.Spec.Options["journalDirectories"] = "journal" 73 | 74 | //updating bookkeepercluster 75 | err = bookkeeper_e2eutil.UpdateBKCluster(&t, k8sClient, bookkeeper) 76 | Expect(strings.ContainsAny(err.Error(), "path of journal directories should not be changed")).To(Equal(true)) 77 | 78 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 79 | Expect(err).NotTo(HaveOccurred()) 80 | Expect(bookkeeper.Spec.Options["journalDirectories"]).To(Equal("/bk/journal")) 81 | 82 | // updating non-modifiable bookkeeper option useHostNameAsBookieID 83 | bookkeeper.Spec.Options["useHostNameAsBookieID"] = "false" 84 | 85 | //updating bookkeepercluster 86 | err = bookkeeper_e2eutil.UpdateBKCluster(&t, k8sClient, bookkeeper) 87 | Expect(strings.ContainsAny(err.Error(), "value of useHostNameAsBookieID should not be changed")).To(Equal(true)) 88 | 89 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 90 | Expect(err).NotTo(HaveOccurred()) 91 | Expect(bookkeeper.Spec.Options["useHostNameAsBookieID"]).To(Equal("true")) 92 | 93 | // Delete cluster 94 | err = bookkeeper_e2eutil.DeleteBKCluster(&t, k8sClient, bookkeeper) 95 | Expect(err).NotTo(HaveOccurred()) 96 | 97 | err = bookkeeper_e2eutil.WaitForBKClusterToTerminate(&t, k8sClient, bookkeeper) 98 | Expect(err).NotTo(HaveOccurred()) 99 | 100 | }) 101 | }) 102 | }) 103 | -------------------------------------------------------------------------------- /doc/bookkeeper-options.md: -------------------------------------------------------------------------------- 1 | ## BookKeeper options 2 | 3 | BookKeeper has many configuration options. The available options can be found [here](https://bookkeeper.apache.org/docs/4.7.0/reference/config/) and are expressed through the `options` part of the resource specification. 4 | 5 | All values must be expressed as Strings. 6 | 7 | Take metrics for example, here we choose codahale as our metrics provider. The default is Prometheus. 8 | 9 | ``` 10 | ... 11 | spec: 12 | options: 13 | enableStatistics: "true" 14 | statsProviderClass: "org.apache.bookkeeper.stats.codahale.CodahaleMetricsProvider" 15 | codahaleStatsGraphiteEndpoint: "graphite.example.com:2003" 16 | codahaleStatsOutputFrequencySeconds: "30" 17 | ... 18 | ``` 19 | ### BookKeeper JVM Options 20 | 21 | It is also possible to tune the BookKeeper JVM by passing customized JVM options. BookKeeper JVM Options 22 | are for Bookkeeper JVM whereas the aforementioned BookKeeper options are for BookKeeper server configuration. 23 | 24 | The format is as follows: 25 | ``` 26 | ... 27 | spec: 28 | jvmOptions: 29 | memoryOpts: ["-Xms2g", "-XX:MaxDirectMemorySize=2g"] 30 | gcOpts: ["-XX:MaxGCPauseMillis=20"] 31 | gcLoggingOpts: ["-XX:NumberOfGCLogFiles=10"] 32 | extraOpts: [] 33 | ... 34 | ``` 35 | The reason that we are using such detailed names like `memoryOpts` is because the BookKeeper official [scripts](https://github.com/apache/bookkeeper/blob/master/bin/common.sh#L118) are using those and we need to override it using the same name. JVM options that don't belong to the earlier 3 categories can be mentioned under `extraOpts`. 36 | 37 | There are a bunch of default options in the BookKeeper operator code that is good for general deployment. It is possible to override those default values by just passing the customized options. For example, the default option `"-XX:MaxDirectMemorySize=1g"` can be overridden by passing `"-XX:MaxDirectMemorySize=2g"` to 38 | the BookKeeper operator. The operator will detect `MaxDirectMemorySize` and override its default value if it exists. Check [here](https://www.oracle.com/technetwork/java/javase/tech/vmoptions-jsp-140102.html) for more JVM options. 39 | 40 | Default memoryOpts: 41 | ``` 42 | "-Xms1g", 43 | "-XX:MaxDirectMemorySize=1g", 44 | "-XX:+ExitOnOutOfMemoryError", 45 | "-XX:+CrashOnOutOfMemoryError", 46 | "-XX:+HeapDumpOnOutOfMemoryError", 47 | "-XX:HeapDumpPath=" + heapDumpDir, 48 | ``` 49 | if BookKeeper version is greater or equal to 0.4, then the following options are also added to the default memoryOpts: 50 | ``` 51 | "-XX:+UnlockExperimentalVMOptions", 52 | "-XX:+UseContainerSupport", 53 | "-XX:MaxRAMPercentage=50.0" 54 | ``` 55 | 56 | Default gcOpts: 57 | ``` 58 | "-XX:+UseG1GC", 59 | "-XX:MaxGCPauseMillis=10", 60 | "-XX:+ParallelRefProcEnabled", 61 | "-XX:+DoEscapeAnalysis", 62 | "-XX:ParallelGCThreads=32", 63 | "-XX:ConcGCThreads=32", 64 | "-XX:G1NewSizePercent=50", 65 | "-XX:+DisableExplicitGC", 66 | "-XX:-ResizePLAB", 67 | ``` 68 | 69 | Due to disruptive changes in GC Logging from Java 9, the BookKeeper operator code does not add any default gcLoggingOpts. The appropriate gcLoggingOpts can be provided based on the Java version used within the Bookkeeper version being deployed. If the Java version is 8 or lower, the following options can be provided: 70 | ``` 71 | "-XX:+PrintGCDetails", 72 | "-XX:+PrintGCDateStamps", 73 | "-XX:+PrintGCApplicationStoppedTime", 74 | "-XX:+UseGCLogFileRotation", 75 | "-XX:NumberOfGCLogFiles=5", 76 | "-XX:GCLogFileSize=64m", 77 | ``` 78 | however, if the BookKeeper version uses Java 9 or higher, the following option should be provided to the gcLoggingOpts instead: 79 | ``` 80 | "-Xlog:gc*,safepoint::time,level,tags:filecount=5,filesize=64m" 81 | ``` 82 | 83 | ### BookKeeper Custom Configuration 84 | 85 | It is possible to add additional parameters into the BookKeeper container by allowing users to create a custom ConfigMap and specify its name within the field `envVars` of the BookKeeper Spec. The following values need to be provided within this ConfigMap if we expect the BookKeeper cluster to work with Pravega. 86 | 87 | | KEY | VALUE | 88 | |---|---| 89 | | *PRAVEGA_CLUSTER_NAME* | Name of Pravega Cluster using this BookKeeper Cluster | 90 | | *WAIT_FOR* | Zookeeper URL | 91 | 92 | The user however needs to ensure that the following keys which are present in BookKeeper ConfigMap which is created by the BookKeeper Operator should not be a part of this custom ConfigMap. 93 | 94 | ``` 95 | - BOOKIE_MEM_OPTS 96 | - BOOKIE_GC_OPTS 97 | - BOOKIE_GC_LOGGING_OPTS 98 | - BOOKIE_EXTRA_OPTS 99 | - ZK_URL 100 | - BK_useHostNameAsBookieID 101 | - BK_autoRecoveryDaemonEnabled 102 | - BK_lostBookieRecoveryDelay 103 | ``` 104 | -------------------------------------------------------------------------------- /pkg/util/leader.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package util 12 | 13 | import ( 14 | "context" 15 | "fmt" 16 | "os" 17 | 18 | "log" 19 | 20 | "github.com/operator-framework/operator-lib/leader" 21 | corev1 "k8s.io/api/core/v1" 22 | apierrors "k8s.io/apimachinery/pkg/api/errors" 23 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 24 | "k8s.io/client-go/rest" 25 | k8sClient "sigs.k8s.io/controller-runtime/pkg/client" 26 | ) 27 | 28 | // BecomeLeader with pre-check cluster status - is there a previous pod in bad state? 29 | func BecomeLeader(ctx context.Context, cfg *rest.Config, lockName, namespace string) error { 30 | client, _ := k8sClient.New(cfg, k8sClient.Options{}) 31 | 32 | err := precheckLeaderLock(ctx, client, lockName, namespace) 33 | if err != nil { 34 | log.Printf("Error while pre-checking leader lock: %v", err) 35 | } 36 | 37 | // pre-checks done, proceed with SDK-provided election procedure 38 | err = leader.Become(ctx, lockName) 39 | return err 40 | } 41 | 42 | func precheckLeaderLock(ctx context.Context, client k8sClient.Client, lockName, ns string) error { 43 | existingConfigMap, e := getConfigMapWithLock(ctx, client, lockName, ns) 44 | if existingConfigMap == nil || e != nil { 45 | return e 46 | } 47 | 48 | currentPod := os.Getenv("POD_NAME") 49 | if currentPod == "" { 50 | return fmt.Errorf("required env POD_NAME not set") 51 | } 52 | 53 | log.Printf("Current pod name: %s", currentPod) 54 | 55 | for _, lockOwner := range existingConfigMap.GetOwnerReferences() { 56 | if lockOwner.Name == currentPod { 57 | log.Printf("Leader lock is owned by current pod - am I restarted?") 58 | return nil 59 | } 60 | log.Printf("Leader lock owner is %s %s", lockOwner.Kind, lockOwner.Name) 61 | e := checkupLeaderPodStatus(ctx, client, lockOwner, existingConfigMap, ns) 62 | if e != nil { 63 | return e 64 | } 65 | } 66 | 67 | return nil 68 | } 69 | 70 | // checkupLeaderPodStatus checks if leader pod status is marked with VMware-specific reason 'ProviderFailed' 71 | // then deletes lock and pod 72 | func checkupLeaderPodStatus(ctx context.Context, client k8sClient.Client, leaderRef metav1.OwnerReference, existingLock *corev1.ConfigMap, ns string) error { 73 | if leaderRef.Kind != "Pod" { 74 | log.Printf("Existing lock references non-pod object! Kind: %s", leaderRef.Kind) 75 | return nil 76 | } 77 | 78 | leaderPod := &corev1.Pod{} 79 | err := client.Get(ctx, k8sClient.ObjectKey{Namespace: ns, Name: leaderRef.Name}, leaderPod) 80 | if err != nil { 81 | if apierrors.IsNotFound(err) { 82 | log.Printf("Leader pod %s not found in namespace %s", leaderRef.Name, ns) 83 | return nil 84 | } 85 | log.Printf("Error while reading leader pod: %v", err) 86 | return err 87 | } 88 | 89 | log.Printf("Leader pod is in %s:%s status", leaderPod.Status.Phase, leaderPod.Status.Reason) 90 | 91 | if leaderPod.Status.Reason == "ProviderFailed" { 92 | log.Printf("Leader pod status reason is '%s' - deleting pod and lock config map to unblock leader election", leaderPod.Status.Reason) 93 | if err := deleteLeader(ctx, client, leaderPod, existingLock); err != nil { 94 | return err 95 | } 96 | } 97 | 98 | return nil 99 | } 100 | 101 | func getConfigMapWithLock(ctx context.Context, client k8sClient.Client, lockName, ns string) (*corev1.ConfigMap, error) { 102 | existingConfigMap := &corev1.ConfigMap{} 103 | e := client.Get(ctx, k8sClient.ObjectKey{Namespace: ns, Name: lockName}, existingConfigMap) 104 | if e != nil { 105 | if apierrors.IsNotFound(e) { 106 | log.Printf("Leader lock %s not found in namespace %s", lockName, ns) 107 | return nil, nil 108 | } 109 | log.Printf("Unknown error trying to get lock config map: %v", e) 110 | return nil, e 111 | } 112 | return existingConfigMap, nil 113 | } 114 | 115 | // deleteLeader tries to delete pod and config map 116 | func deleteLeader(ctx context.Context, client k8sClient.Client, leaderPod *corev1.Pod, configMapWithLock *corev1.ConfigMap) error { 117 | err := client.Delete(ctx, leaderPod) 118 | if err != nil { 119 | log.Printf("Error deleting leader pod %s: %v", leaderPod.Name, err) 120 | return err 121 | } 122 | 123 | err = client.Delete(ctx, configMapWithLock) 124 | switch { 125 | case apierrors.IsNotFound(err): 126 | log.Printf("Config map has already been deleted") 127 | return nil 128 | case err != nil: 129 | return err 130 | } 131 | 132 | return nil 133 | } 134 | -------------------------------------------------------------------------------- /pkg/util/leader_test.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package util 12 | 13 | import ( 14 | "context" 15 | "os" 16 | 17 | . "github.com/onsi/ginkgo" 18 | . "github.com/onsi/gomega" 19 | corev1 "k8s.io/api/core/v1" 20 | apierrors "k8s.io/apimachinery/pkg/api/errors" 21 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 22 | "k8s.io/apimachinery/pkg/runtime" 23 | clientscheme "k8s.io/client-go/kubernetes/scheme" 24 | k8sClient "sigs.k8s.io/controller-runtime/pkg/client" 25 | "sigs.k8s.io/controller-runtime/pkg/client/fake" 26 | ) 27 | 28 | const ( 29 | configmapName = "test-map" 30 | namespace = "ns-1" 31 | currentPodName = "current-pod" 32 | otherPodName = "some-other-pod" 33 | ) 34 | 35 | var _ = Describe("Leader election utils", func() { 36 | Context("Election prechecks", func() { 37 | var ( 38 | client k8sClient.Client 39 | err error 40 | ctx context.Context 41 | lockConfigMap *corev1.ConfigMap 42 | currentPod *corev1.Pod 43 | otherPod *corev1.Pod 44 | ) 45 | BeforeEach(func() { 46 | currentPod = &corev1.Pod{ 47 | ObjectMeta: metav1.ObjectMeta{ 48 | Name: currentPodName, 49 | UID: "Uid-" + currentPodName, 50 | Namespace: namespace, 51 | }, 52 | } 53 | otherPod = &corev1.Pod{ 54 | ObjectMeta: metav1.ObjectMeta{ 55 | Name: otherPodName, 56 | UID: "Uid-" + otherPodName, 57 | Namespace: namespace, 58 | }, 59 | } 60 | _ = os.Setenv("POD_NAME", currentPodName) 61 | ctx = context.TODO() 62 | }) 63 | 64 | When("leader lock owned by current pod", func() { 65 | BeforeEach(func() { 66 | lockConfigMap = &corev1.ConfigMap{ 67 | TypeMeta: metav1.TypeMeta{ 68 | Kind: "ConfigMap", 69 | APIVersion: "v1", 70 | }, 71 | ObjectMeta: metav1.ObjectMeta{ 72 | Name: configmapName, 73 | Namespace: namespace, 74 | OwnerReferences: []metav1.OwnerReference{ 75 | {Name: currentPodName, Kind: "Pod"}, 76 | }, 77 | }, 78 | } 79 | 80 | client = fake.NewFakeClientWithScheme(clientscheme.Scheme, []runtime.Object{currentPod, otherPod, lockConfigMap}...) 81 | 82 | err = precheckLeaderLock(ctx, client, configmapName, namespace) 83 | }) 84 | It(" must do nothing", func() { 85 | Expect(err).ShouldNot(HaveOccurred()) 86 | }) 87 | }) 88 | 89 | When("leader lock owned by other pod", func() { 90 | BeforeEach(func() { 91 | lockConfigMap = &corev1.ConfigMap{ 92 | TypeMeta: metav1.TypeMeta{ 93 | Kind: "ConfigMap", 94 | APIVersion: "v1", 95 | }, 96 | ObjectMeta: metav1.ObjectMeta{ 97 | Name: configmapName, 98 | Namespace: namespace, 99 | OwnerReferences: []metav1.OwnerReference{ 100 | {Name: otherPodName, Kind: "Pod"}, 101 | }, 102 | }, 103 | } 104 | client = fake.NewFakeClientWithScheme(clientscheme.Scheme, []runtime.Object{currentPod, otherPod, lockConfigMap}...) 105 | err = precheckLeaderLock(ctx, client, configmapName, namespace) 106 | }) 107 | 108 | Context("when that node is Ready", func() { 109 | It(" must do nothing", func() { 110 | Expect(err).ShouldNot(HaveOccurred()) 111 | 112 | pod := &corev1.Pod{} 113 | err = client.Get(ctx, k8sClient.ObjectKey{Namespace: namespace, Name: otherPodName}, pod) 114 | Expect(err).Should(BeNil()) 115 | 116 | cm := &corev1.ConfigMap{} 117 | err = client.Get(ctx, k8sClient.ObjectKey{Namespace: namespace, Name: configmapName}, cm) 118 | Expect(err).Should(BeNil()) 119 | }) 120 | }) 121 | 122 | Context("when that node is in ProviderFailed state", func() { 123 | BeforeEach(func() { 124 | otherPod.Status.Reason = "ProviderFailed" 125 | _ = client.Update(ctx, otherPod) 126 | 127 | err = precheckLeaderLock(ctx, client, configmapName, namespace) 128 | }) 129 | It(" must delete otherPod and config map", func() { 130 | Expect(err).ShouldNot(HaveOccurred()) 131 | 132 | pod := &corev1.Pod{} 133 | err = client.Get(ctx, k8sClient.ObjectKey{Namespace: namespace, Name: otherPodName}, pod) 134 | Expect(err).ShouldNot(BeNil()) 135 | Expect(apierrors.IsNotFound(err)).To(BeTrue()) 136 | 137 | cm := &corev1.ConfigMap{} 138 | err = client.Get(ctx, k8sClient.ObjectKey{Namespace: namespace, Name: configmapName}, cm) 139 | Expect(err).ShouldNot(BeNil()) 140 | Expect(apierrors.IsNotFound(err)).To(BeTrue()) 141 | }) 142 | }) 143 | }) 144 | }) 145 | }) 146 | -------------------------------------------------------------------------------- /doc/development.md: -------------------------------------------------------------------------------- 1 | ## Development 2 | 3 | ### Contents 4 | 5 | * [Requirements](#requirements) 6 | * [Build the operator image](#build-the-operator-image) 7 | * [Run the Operator locally](#run-the-operator-locally) 8 | * [Installation on Google Kubernetes Engine](#installation-on-google-kubernetes-engine) 9 | * [Install the Operator in Test Mode](#install-the-operator-in-test-mode) 10 | 11 | #### Requirements 12 | - Go 1.16+ 13 | 14 | ##### Install Go 15 | 16 | You can install go directly or use gvm ( go version manager) 17 | 18 | Install gvm: 19 | 20 | ``` 21 | bash < <(curl -s -S -L https://raw.githubusercontent.com/moovweb/gvm/master/binscripts/gvm-installer) 22 | ``` 23 | 24 | See all currently installed go versions: 25 | ``` 26 | gvm list 27 | ``` 28 | 29 | See all available go versions that can be installed using gvm: 30 | ``` 31 | gvm listall 32 | ``` 33 | 34 | Install a new go version: 35 | ``` 36 | gvm install go1.4 -B 37 | gvm use go1.4 38 | gvm install go1.13.8 --binary 39 | gvm use go1.13.8 --default 40 | ``` 41 | Your GOPATH should be be set by now, check using 42 | ``` 43 | echo $GOPATH 44 | ``` 45 | should display something like `/home//.gvm/pkgsets/go1.11/global` 46 | 47 | Clone operator repo: 48 | ``` 49 | cd $GOPATH 50 | go get github.com:pravega/bookkeeper-operator 51 | ``` 52 | This should clone operator code under `$GOPATH/src/github.com/pravega/bookkeeper-operator` 53 | 54 | For pulling the dependencies we are using go modules for more details on go modules refer to the link below:- 55 | 56 | https://blog.golang.org/using-go-modules 57 | 58 | #### Build the operator image 59 | 60 | Use the `make` command to build the Bookkeeper operator image, it will also automatically get all the dependencies by using the go.mod file. 61 | 62 | ``` 63 | $ cd $GOPATH/src/github.com/pravega/bookkeeper-operator 64 | $ make build 65 | ``` 66 | That will generate a Docker image with the format 67 | `-` (it will append-dirty if there are uncommitted changes). The image will also be tagged as `latest`. 68 | 69 | Example image after running `make build`. 70 | 71 | The Bookkeeper Operator image will be available in your Docker environment. 72 | 73 | ``` 74 | $ docker images pravega/bookkeeper-operator 75 | 76 | REPOSITORY TAG IMAGE ID CREATED SIZE 77 | 78 | pravega/bookkeeper-operator 0.1.1-3-dirty 2b2d5bcbedf5 10 minutes ago 41.7MB 79 | 80 | pravega/bookkeeper-operator latest 2b2d5bcbedf5 10 minutes ago 41.7MB 81 | 82 | ``` 83 | 84 | Optionally push it to a Docker registry. 85 | 86 | ``` 87 | docker tag pravega/bookkeeper-operator [REGISTRY_HOST]:[REGISTRY_PORT]/pravega/bookkeeper-operator 88 | docker push [REGISTRY_HOST]:[REGISTRY_PORT]/pravega/bookkeeper-operator 89 | ``` 90 | 91 | where: 92 | 93 | - `[REGISTRY_HOST]` is your registry host or IP (e.g. `registry.example.com`) 94 | - `[REGISTRY_PORT]` is your registry port (e.g. `5000`) 95 | 96 | #### Run the Operator locally 97 | 98 | You can run the Operator locally to help with development, testing, and debugging tasks. 99 | 100 | The following command will run the Operator locally with the default Kubernetes config file present at `$HOME/.kube/config`. Use the `--kubeconfig` flag to provide a different path. 101 | 102 | ``` 103 | $ make run-local 104 | ``` 105 | 106 | #### Installation on Google Kubernetes Engine 107 | 108 | The Operator requires elevated privileges in order to watch for the custom resources. 109 | 110 | According to Google Container Engine docs: 111 | 112 | > Ensure the creation of RoleBinding as it grants all the permissions included in the role that we want to create. Because of the way Container Engine checks permissions when we create a Role or ClusterRole. 113 | > 114 | > An example workaround is to create a RoleBinding that gives your Google identity a cluster-admin role before attempting to create additional Role or ClusterRole permissions. 115 | > 116 | > This is a known issue in the Beta release of Role-Based Access Control in Kubernetes and Container Engine version 1.6. 117 | 118 | On GKE, the following command must be run before installing the Operator, replacing the user with your own details. 119 | 120 | ``` 121 | $ kubectl create clusterrolebinding your-user-cluster-admin-binding --clusterrole=cluster-admin --user=your.google.cloud.email@example.org 122 | ``` 123 | 124 | #### Install the Operator in Test Mode 125 | The Operator can be run in `test mode` if we want to deploy the Bookkeeper Cluster on minikube or on a cluster with very limited resources by setting `testmode: true` in [values.yaml](https://github.com/pravega/charts/blob/master/charts/bookkeeper-operator/values.yaml) file of the [operator charts](https://github.com/pravega/charts/tree/master/charts/bookkeeper-operator). Operator running in test mode skips the minimum replica requirement checks. Test mode provides a bare minimum setup and is not recommended to be used in production environments. 126 | -------------------------------------------------------------------------------- /test/e2e/rollback_test.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package e2e 12 | 13 | import ( 14 | "time" 15 | 16 | . "github.com/onsi/ginkgo" 17 | . "github.com/onsi/gomega" 18 | 19 | bkapi "github.com/pravega/bookkeeper-operator/api/v1alpha1" 20 | bookkeeper_e2eutil "github.com/pravega/bookkeeper-operator/pkg/test/e2e/e2eutil" 21 | 22 | corev1 "k8s.io/api/core/v1" 23 | ) 24 | 25 | var _ = Describe("Rollback", func() { 26 | Context("Check rollback operations", func() { 27 | It("should rollback without error", func() { 28 | 29 | cluster := bookkeeper_e2eutil.NewDefaultCluster(testNamespace) 30 | 31 | cluster.WithDefaults() 32 | initialVersion := "0.6.0" 33 | firstUpgradeVersion := "0.7.0-1" 34 | secondUpgradeVersion := "0.5.0" 35 | cluster.Spec.Version = initialVersion 36 | bookkeeper, err := bookkeeper_e2eutil.CreateBKCluster(&t, k8sClient, cluster) 37 | Expect(err).NotTo(HaveOccurred()) 38 | 39 | err = bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, bookkeeper) 40 | Expect(err).NotTo(HaveOccurred()) 41 | 42 | // This is to get the latest Bookkeeper cluster object 43 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 44 | Expect(err).NotTo(HaveOccurred()) 45 | Expect(bookkeeper.Status.CurrentVersion).To(Equal(initialVersion)) 46 | 47 | // This is to get the latest Bookkeeper cluster object 48 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 49 | Expect(err).NotTo(HaveOccurred()) 50 | 51 | bookkeeper.Spec.Version = firstUpgradeVersion 52 | err = bookkeeper_e2eutil.UpdateBKCluster(&t, k8sClient, bookkeeper) 53 | Expect(err).NotTo(HaveOccurred()) 54 | 55 | // waiting for upgrade to fail 56 | time.Sleep(3 * time.Minute) 57 | 58 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 59 | Expect(err).NotTo(HaveOccurred()) 60 | _, errorCondition := bookkeeper.Status.GetClusterCondition(bkapi.ClusterConditionError) 61 | Expect(errorCondition.Status).To(Equal(corev1.ConditionTrue)) 62 | Expect(errorCondition.Reason).To(Equal("UpgradeFailed")) 63 | Expect(errorCondition.Message).To(ContainSubstring("pod bookkeeper-bookie-0 update failed because of ImagePullBackOff")) 64 | 65 | // checking whether upgrade error event is sent out to the kubernetes event queue 66 | event, err := bookkeeper_e2eutil.CheckEvents(&t, k8sClient, bookkeeper, "UPGRADE_ERROR") 67 | Expect(err).NotTo(HaveOccurred()) 68 | Expect(event).To(BeTrue()) 69 | 70 | // trigger rollback to version other than last stable version 71 | bookkeeper.Spec.Version = secondUpgradeVersion 72 | err = bookkeeper_e2eutil.UpdateBKCluster(&t, k8sClient, bookkeeper) 73 | Expect(err).To(HaveOccurred(), "Should not allow rollback to any version other than the last stable version") 74 | Expect(err.Error()).To(ContainSubstring("Rollback to version 0.5.0 not supported. Only rollback to version 0.6.0 is supported")) 75 | 76 | // trigger rollback to last stable version 77 | bookkeeper.Spec.Version = initialVersion 78 | err = bookkeeper_e2eutil.UpdateBKCluster(&t, k8sClient, bookkeeper) 79 | Expect(err).NotTo(HaveOccurred()) 80 | time.Sleep(2 * time.Second) 81 | 82 | // trigger another upgrade while the last rollback is still ongoing 83 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 84 | Expect(err).NotTo(HaveOccurred()) 85 | bookkeeper.Spec.Version = secondUpgradeVersion 86 | err = bookkeeper_e2eutil.UpdateBKCluster(&t, k8sClient, bookkeeper) 87 | Expect(err).To(HaveOccurred(), "Should reject rollback request while rollback is in progress") 88 | Expect(err.Error()).To(ContainSubstring("failed to process the request, rollback in progress")) 89 | 90 | _, rollbackCondition := bookkeeper.Status.GetClusterCondition(bkapi.ClusterConditionRollback) 91 | Expect(rollbackCondition.Status).To(Equal(corev1.ConditionTrue)) 92 | Expect(rollbackCondition.Reason).To(ContainSubstring("Updating Bookkeeper")) 93 | 94 | err = bookkeeper_e2eutil.WaitForBKClusterToRollback(&t, k8sClient, bookkeeper, initialVersion) 95 | Expect(err).NotTo(HaveOccurred()) 96 | 97 | // This is to get the latest Bookkeeper cluster object 98 | bookkeeper, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bookkeeper) 99 | Expect(err).NotTo(HaveOccurred()) 100 | 101 | // waiting for rollback to complete 102 | Expect(bookkeeper.Spec.Version).To(Equal(initialVersion)) 103 | Expect(bookkeeper.Status.CurrentVersion).To(Equal(initialVersion)) 104 | Expect(bookkeeper.Status.TargetVersion).To(Equal("")) 105 | 106 | // checking version history 107 | Expect(bookkeeper.Status.VersionHistory[0]).To(Equal("0.6.0")) 108 | 109 | // Delete cluster 110 | err = bookkeeper_e2eutil.DeleteBKCluster(&t, k8sClient, bookkeeper) 111 | Expect(err).NotTo(HaveOccurred()) 112 | 113 | err = bookkeeper_e2eutil.WaitForBKClusterToTerminate(&t, k8sClient, bookkeeper) 114 | Expect(err).NotTo(HaveOccurred()) 115 | 116 | }) 117 | }) 118 | }) 119 | -------------------------------------------------------------------------------- /test/e2e/multiple_bk_test.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package e2e 12 | 13 | import ( 14 | // "testing" 15 | . "github.com/onsi/ginkgo" 16 | . "github.com/onsi/gomega" 17 | 18 | bookkeeper_e2eutil "github.com/pravega/bookkeeper-operator/pkg/test/e2e/e2eutil" 19 | 20 | "strconv" 21 | "time" 22 | ) 23 | 24 | var _ = Describe("Multiple BK Clusters", func() { 25 | Context("Check multiple BK cluster operations", func() { 26 | It("Scale, Delete and Update operations are successful", func() { 27 | 28 | // Create first cluster 29 | cluster := bookkeeper_e2eutil.NewDefaultCluster(testNamespace) 30 | cm_name := "configmap1" 31 | cm1 := bookkeeper_e2eutil.NewConfigMap(testNamespace, cm_name, "pr1") 32 | err := bookkeeper_e2eutil.CreateConfigMap(&t, k8sClient, cm1) 33 | Expect(err).NotTo(HaveOccurred()) 34 | cluster.ObjectMeta.Name = "bk1" 35 | autorecovery := true 36 | cluster.Spec.AutoRecovery = &(autorecovery) 37 | cluster.WithDefaults() 38 | 39 | bk1, err := bookkeeper_e2eutil.CreateBKClusterWithCM(&t, k8sClient, cluster, cm_name) 40 | Expect(err).NotTo(HaveOccurred()) 41 | 42 | err = bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, bk1) 43 | Expect(err).NotTo(HaveOccurred()) 44 | 45 | bk1, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bk1) 46 | Expect(err).NotTo(HaveOccurred()) 47 | err = bookkeeper_e2eutil.CheckConfigMap(&t, k8sClient, bk1, "BK_autoRecoveryDaemonEnabled", strconv.FormatBool(autorecovery)) 48 | Expect(err).NotTo(HaveOccurred()) 49 | 50 | // Create second cluster 51 | cluster = bookkeeper_e2eutil.NewDefaultCluster(testNamespace) 52 | cm_name = "configmap2" 53 | cm2 := bookkeeper_e2eutil.NewConfigMap(testNamespace, cm_name, "pr2") 54 | err = bookkeeper_e2eutil.CreateConfigMap(&t, k8sClient, cm2) 55 | Expect(err).NotTo(HaveOccurred()) 56 | cluster.ObjectMeta.Name = "bk2" 57 | autorecovery = false 58 | cluster.Spec.AutoRecovery = &(autorecovery) 59 | cluster.WithDefaults() 60 | 61 | bk2, err := bookkeeper_e2eutil.CreateBKClusterWithCM(&t, k8sClient, cluster, cm_name) 62 | Expect(err).NotTo(HaveOccurred()) 63 | 64 | err = bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, bk2) 65 | Expect(err).NotTo(HaveOccurred()) 66 | 67 | bk2, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bk2) 68 | Expect(err).NotTo(HaveOccurred()) 69 | err = bookkeeper_e2eutil.CheckConfigMap(&t, k8sClient, bk2, "BK_autoRecoveryDaemonEnabled", strconv.FormatBool(autorecovery)) 70 | Expect(err).NotTo(HaveOccurred()) 71 | 72 | // Create third cluster 73 | cluster = bookkeeper_e2eutil.NewDefaultCluster(testNamespace) 74 | cluster.WithDefaults() 75 | 76 | bk3, err := bookkeeper_e2eutil.CreateBKCluster(&t, k8sClient, cluster) 77 | Expect(err).NotTo(HaveOccurred()) 78 | 79 | err = bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, bk3) 80 | Expect(err).NotTo(HaveOccurred()) 81 | 82 | bk3, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bk3) 83 | Expect(err).NotTo(HaveOccurred()) 84 | 85 | // This is to get the latest Bookkeeper cluster object 86 | bk1, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bk1) 87 | Expect(err).NotTo(HaveOccurred()) 88 | 89 | // Scale up replicas in the first Bookkeeper cluster 90 | bk1.Spec.Replicas = 5 91 | 92 | err = bookkeeper_e2eutil.UpdateBKCluster(&t, k8sClient, bk1) 93 | Expect(err).NotTo(HaveOccurred()) 94 | 95 | err = bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, bk1) 96 | Expect(err).NotTo(HaveOccurred()) 97 | 98 | // This is to get the latest Bookkeeper cluster object 99 | bk2, err = bookkeeper_e2eutil.GetBKCluster(&t, k8sClient, bk2) 100 | Expect(err).NotTo(HaveOccurred()) 101 | 102 | // Deleting pods of the second Bookkeeper cluster 103 | podDeleteCount := 3 104 | err = bookkeeper_e2eutil.DeletePods(&t, k8sClient, bk2, podDeleteCount) 105 | Expect(err).NotTo(HaveOccurred()) 106 | time.Sleep(10 * time.Second) 107 | 108 | err = bookkeeper_e2eutil.WaitForBookkeeperClusterToBecomeReady(&t, k8sClient, bk2) 109 | Expect(err).NotTo(HaveOccurred()) 110 | 111 | // deleting all bookkeeper clusters 112 | err = bookkeeper_e2eutil.DeleteBKCluster(&t, k8sClient, bk1) 113 | Expect(err).NotTo(HaveOccurred()) 114 | 115 | err = bookkeeper_e2eutil.WaitForBKClusterToTerminate(&t, k8sClient, bk1) 116 | Expect(err).NotTo(HaveOccurred()) 117 | 118 | err = bookkeeper_e2eutil.DeleteBKCluster(&t, k8sClient, bk2) 119 | Expect(err).NotTo(HaveOccurred()) 120 | 121 | err = bookkeeper_e2eutil.WaitForBKClusterToTerminate(&t, k8sClient, bk2) 122 | Expect(err).NotTo(HaveOccurred()) 123 | 124 | err = bookkeeper_e2eutil.DeleteBKCluster(&t, k8sClient, bk3) 125 | Expect(err).NotTo(HaveOccurred()) 126 | 127 | err = bookkeeper_e2eutil.WaitForBKClusterToTerminate(&t, k8sClient, bk3) 128 | Expect(err).NotTo(HaveOccurred()) 129 | 130 | err = bookkeeper_e2eutil.DeleteConfigMap(&t, k8sClient, cm1) 131 | Expect(err).NotTo(HaveOccurred()) 132 | 133 | err = bookkeeper_e2eutil.DeleteConfigMap(&t, k8sClient, cm2) 134 | Expect(err).NotTo(HaveOccurred()) 135 | 136 | }) 137 | }) 138 | }) 139 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "context" 21 | "errors" 22 | "flag" 23 | "fmt" 24 | "io/ioutil" 25 | "os" 26 | "runtime" 27 | "strings" 28 | 29 | bookkeeperv1alpha1 "github.com/pravega/bookkeeper-operator/api/v1alpha1" 30 | "github.com/pravega/bookkeeper-operator/controllers" 31 | controllerconfig "github.com/pravega/bookkeeper-operator/pkg/controller/config" 32 | "github.com/pravega/bookkeeper-operator/pkg/util" 33 | "github.com/pravega/bookkeeper-operator/pkg/version" 34 | "github.com/sirupsen/logrus" 35 | apimachineryruntime "k8s.io/apimachinery/pkg/runtime" 36 | utilruntime "k8s.io/apimachinery/pkg/util/runtime" 37 | clientgoscheme "k8s.io/client-go/kubernetes/scheme" 38 | _ "k8s.io/client-go/plugin/pkg/client/auth" 39 | _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" 40 | _ "k8s.io/client-go/plugin/pkg/client/auth/oidc" 41 | ctrl "sigs.k8s.io/controller-runtime" 42 | "sigs.k8s.io/controller-runtime/pkg/client/config" 43 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 44 | //+kubebuilder:scaffold:imports 45 | ) 46 | 47 | var ( 48 | versionFlag bool 49 | webhookFlag bool 50 | log = ctrl.Log.WithName("cmd") 51 | scheme = apimachineryruntime.NewScheme() 52 | ) 53 | 54 | func init() { 55 | flag.BoolVar(&versionFlag, "version", false, "Show version and quit") 56 | flag.BoolVar(&controllerconfig.TestMode, "test", false, "Enable test mode. Do not use this flag in production") 57 | flag.BoolVar(&controllerconfig.DisableFinalizer, "disableFinalizer", false, "Disable finalizers for bookkeeperclusters. Use this flag with awareness of the consequences") 58 | flag.BoolVar(&webhookFlag, "webhook", true, "Enable webhook, the default is enabled.") 59 | utilruntime.Must(clientgoscheme.AddToScheme(scheme)) 60 | utilruntime.Must(bookkeeperv1alpha1.AddToScheme(scheme)) 61 | } 62 | 63 | func printVersion() { 64 | log.Info(fmt.Sprintf("bookkeeper-operator Version: %v", version.Version)) 65 | log.Info(fmt.Sprintf("Git SHA: %s", version.GitSHA)) 66 | log.Info(fmt.Sprintf("Go Version: %s", runtime.Version())) 67 | log.Info(fmt.Sprintf("Go OS/Arch: %s/%s", runtime.GOOS, runtime.GOARCH)) 68 | } 69 | 70 | func main() { 71 | var metricsAddr string 72 | flag.StringVar(&metricsAddr, "metrics-bind-address", "127.0.0.1:6000", "The address the metric endpoint binds to.") 73 | 74 | flag.Parse() 75 | 76 | ctrl.SetLogger(zap.New(zap.UseDevMode(false))) 77 | 78 | printVersion() 79 | 80 | if versionFlag { 81 | os.Exit(0) 82 | } 83 | 84 | if controllerconfig.TestMode { 85 | logrus.Warn("----- Running in test mode. Make sure you are NOT in production -----") 86 | } 87 | 88 | if controllerconfig.DisableFinalizer { 89 | logrus.Warn("----- Running with finalizer disabled. -----") 90 | } 91 | 92 | // Get a config to talk to the apiserver 93 | cfg, err := config.GetConfig() 94 | if err != nil { 95 | logrus.Fatal(err) 96 | } 97 | 98 | operatorNs, err := GetOperatorNamespace() 99 | if err != nil { 100 | log.Error(err, "failed to get operator namespace") 101 | os.Exit(1) 102 | } 103 | 104 | namespaces, err := getWatchNamespace() 105 | if err != nil { 106 | log.Error(err, "unable to get WatchNamespace, "+ 107 | "the manager will watch and manage resources in all namespaces") 108 | } 109 | 110 | // Become the leader before proceeding 111 | err = util.BecomeLeader(context.TODO(), cfg, "bookkeeper-operator-lock", operatorNs) 112 | if err != nil { 113 | log.Error(err, "") 114 | os.Exit(1) 115 | } 116 | 117 | mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ 118 | Scheme: scheme, 119 | Namespace: namespaces, 120 | MetricsBindAddress: metricsAddr, 121 | Port: 9443, 122 | }) 123 | if err != nil { 124 | log.Error(err, "unable to start manager") 125 | os.Exit(1) 126 | } 127 | 128 | log.Info("Registering Components") 129 | 130 | if err = (&controllers.BookkeeperClusterReconciler{ 131 | Client: mgr.GetClient(), 132 | Scheme: mgr.GetScheme(), 133 | }).SetupWithManager(mgr); err != nil { 134 | log.Error(err, "unable to create controller", "controller", "BookkeeperCluster") 135 | os.Exit(1) 136 | } 137 | 138 | bookkeeperv1alpha1.Mgr = mgr 139 | if webhookFlag { 140 | if err = (&bookkeeperv1alpha1.BookkeeperCluster{}).SetupWebhookWithManager(mgr); err != nil { 141 | log.Error(err, "unable to create webhook", "webhook", "BookkeeperCluster") 142 | os.Exit(1) 143 | } 144 | } 145 | //+kubebuilder:scaffold:builder 146 | 147 | log.Info("Webhook Setup completed") 148 | log.Info("Starting the Cmd") 149 | 150 | // Start the Cmd 151 | log.Info("starting manager") 152 | if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { 153 | log.Error(err, "problem running manager") 154 | os.Exit(1) 155 | } 156 | } 157 | 158 | func GetOperatorNamespace() (string, error) { 159 | nsBytes, err := ioutil.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/namespace") 160 | if err != nil { 161 | if os.IsNotExist(err) { 162 | return "", errors.New("file does not exist") 163 | } 164 | return "", err 165 | } 166 | ns := strings.TrimSpace(string(nsBytes)) 167 | return ns, nil 168 | } 169 | 170 | // getWatchNamespace returns the Namespace the operator should be watching for changes 171 | func getWatchNamespace() (string, error) { 172 | // WatchNamespaceEnvVar is the constant for env variable WATCH_NAMESPACE 173 | // which specifies the Namespace to watch. 174 | // An empty value means the operator is running with cluster scope. 175 | var watchNamespaceEnvVar = "WATCH_NAMESPACE" 176 | 177 | ns, found := os.LookupEnv(watchNamespaceEnvVar) 178 | if !found { 179 | return "", fmt.Errorf("%s must be set", watchNamespaceEnvVar) 180 | } 181 | return ns, nil 182 | } 183 | -------------------------------------------------------------------------------- /pkg/util/bookkeepercluster.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package util 12 | 13 | import ( 14 | "fmt" 15 | "reflect" 16 | "regexp" 17 | "strconv" 18 | "strings" 19 | 20 | v "github.com/hashicorp/go-version" 21 | 22 | v1 "k8s.io/api/core/v1" 23 | ) 24 | 25 | var ( 26 | versionRegexp *regexp.Regexp 27 | ) 28 | 29 | const ( 30 | MajorMinorVersionRegexp string = `^v?(?P[0-9]+\.[0-9]+\.[0-9]+)` 31 | ) 32 | 33 | func init() { 34 | versionRegexp = regexp.MustCompile(MajorMinorVersionRegexp) 35 | } 36 | 37 | func PdbNameForBookie(clusterName string) string { 38 | return fmt.Sprintf("%s-bookie", clusterName) 39 | } 40 | 41 | func ConfigMapNameForBookie(clusterName string) string { 42 | return fmt.Sprintf("%s-bookie", clusterName) 43 | } 44 | 45 | func StatefulSetNameForBookie(clusterName string) string { 46 | return fmt.Sprintf("%s-bookie", clusterName) 47 | } 48 | 49 | func IsOrphan(k8sObjectName string, replicas int32) bool { 50 | index := strings.LastIndexAny(k8sObjectName, "-") 51 | if index == -1 { 52 | return false 53 | } 54 | 55 | ordinal, err := strconv.Atoi(k8sObjectName[index+1:]) 56 | if err != nil { 57 | return false 58 | } 59 | 60 | return int32(ordinal) >= replicas 61 | } 62 | 63 | func HealthcheckCommand(port int32) []string { 64 | return []string{"/bin/sh", "-c", fmt.Sprintf("netstat -ltn 2> /dev/null | grep %d || ss -ltn 2> /dev/null | grep %d", port, port)} 65 | } 66 | 67 | // Min returns the smaller of x or y. 68 | func Min(x, y int32) int32 { 69 | if x > y { 70 | return y 71 | } 72 | return x 73 | } 74 | 75 | func ContainsStringWithPrefix(slice []string, str string) bool { 76 | for _, item := range slice { 77 | if strings.HasPrefix(item, str) { 78 | return true 79 | } 80 | } 81 | return false 82 | } 83 | 84 | func RemoveString(slice []string, str string) (result []string) { 85 | for _, item := range slice { 86 | if item == str { 87 | continue 88 | } 89 | result = append(result, item) 90 | } 91 | return result 92 | } 93 | 94 | func GetStringWithPrefix(slice []string, str string) (result string) { 95 | for _, item := range slice { 96 | if strings.HasPrefix(item, str) { 97 | return item 98 | } 99 | } 100 | return "" 101 | } 102 | 103 | func GetPodVersion(pod *v1.Pod) string { 104 | return pod.GetAnnotations()["bookkeeper.version"] 105 | } 106 | 107 | func CompareVersions(v1, v2, operator string) (bool, error) { 108 | normv1, err := NormalizeVersion(v1) 109 | if err != nil { 110 | return false, err 111 | } 112 | normv2, err := NormalizeVersion(v2) 113 | if err != nil { 114 | return false, err 115 | } 116 | clusterVersion, err := v.NewSemver(normv1) 117 | if err != nil { 118 | return false, err 119 | } 120 | constraints, err := v.NewConstraint(fmt.Sprintf("%s %s", operator, normv2)) 121 | if err != nil { 122 | return false, err 123 | } 124 | return constraints.Check(clusterVersion), nil 125 | } 126 | 127 | func ContainsVersion(list []string, version string) bool { 128 | result := false 129 | for _, v := range list { 130 | if result, _ = CompareVersions(version, v, "="); result { 131 | break 132 | } 133 | } 134 | return result 135 | } 136 | 137 | func NormalizeVersion(version string) (string, error) { 138 | matches := versionRegexp.FindStringSubmatch(version) 139 | if matches == nil || len(matches) <= 1 { 140 | return "", fmt.Errorf("failed to parse version %s", version) 141 | } 142 | return matches[1], nil 143 | } 144 | 145 | // OrderedMap is a map that has insertion order when iterating. The iteration of 146 | // map in GO is in random order by default. 147 | type OrderedMap struct { 148 | m map[string]string 149 | keys []string 150 | } 151 | 152 | // This method will parse the JVM options into a key value pair and store it 153 | // in the OrderedMap 154 | func UpdateOneJVMOption(arg string, om *OrderedMap) { 155 | // Parse "-Xms" 156 | if strings.HasPrefix(arg, "-Xms") { 157 | if _, ok := om.m["-Xms"]; !ok { 158 | om.keys = append(om.keys, "-Xms") 159 | } 160 | om.m["-Xms"] = arg[4:] 161 | return 162 | } 163 | 164 | // Parse option starting with "-XX" 165 | if strings.HasPrefix(arg, "-XX:") { 166 | if arg[4] == '+' || arg[4] == '-' { 167 | if _, ok := om.m[arg[5:]]; !ok { 168 | om.keys = append(om.keys, arg[5:]) 169 | } 170 | om.m[arg[5:]] = string(arg[4]) 171 | return 172 | } 173 | s := strings.Split(arg[4:], "=") 174 | if _, ok := om.m[s[0]]; !ok { 175 | om.keys = append(om.keys, s[0]) 176 | } 177 | om.m[s[0]] = s[1] 178 | return 179 | } 180 | 181 | // Not in those formats, just keep the option as a key 182 | if _, ok := om.m[arg]; !ok { 183 | om.keys = append(om.keys, arg) 184 | } 185 | om.m[arg] = "" 186 | return 187 | } 188 | 189 | // Concatenate the key value pair to be a JVM option string. 190 | func GenerateJVMOption(k, v string) string { 191 | if v == "" { 192 | return k 193 | } 194 | 195 | if k == "-Xms" { 196 | return fmt.Sprintf("%v%v", k, v) 197 | } 198 | 199 | if v == "+" || v == "-" { 200 | return fmt.Sprintf("-XX:%v%v", v, k) 201 | } 202 | 203 | return fmt.Sprintf("-XX:%v=%v", k, v) 204 | } 205 | 206 | // This method will override the default JVM options with user provided custom options 207 | func OverrideDefaultJVMOptions(defaultOpts []string, customOpts []string) []string { 208 | 209 | // Nothing to be overriden, just return the default options 210 | if customOpts == nil { 211 | return defaultOpts 212 | } 213 | 214 | om := &OrderedMap{m: map[string]string{}, keys: []string{}} 215 | 216 | // Firstly, store the default options in an ordered map. The ordered map is a 217 | // map that has insertion order guarantee when iterating. 218 | for _, option := range defaultOpts { 219 | UpdateOneJVMOption(option, om) 220 | } 221 | 222 | // Secondly, update the ordered map with custom options. If the option has been 223 | // found in the map, its value will be updated by the custom options. If not, the 224 | // the map will just add a new key value pair. 225 | for _, option := range customOpts { 226 | UpdateOneJVMOption(option, om) 227 | } 228 | 229 | jvmOpts := []string{} 230 | // Iterate the ordered map in its insertion order. 231 | for _, key := range om.keys { 232 | jvmOpts = append(jvmOpts, GenerateJVMOption(key, om.m[key])) 233 | } 234 | 235 | return jvmOpts 236 | } 237 | 238 | func CompareConfigMap(cm1 *v1.ConfigMap, cm2 *v1.ConfigMap) bool { 239 | eq := reflect.DeepEqual(cm1.Data, cm2.Data) 240 | if eq { 241 | return true 242 | } 243 | return false 244 | } 245 | -------------------------------------------------------------------------------- /pkg/util/bookkeepercluster_test.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | package util 11 | 12 | import ( 13 | "testing" 14 | 15 | . "github.com/onsi/ginkgo" 16 | . "github.com/onsi/gomega" 17 | v1 "k8s.io/api/core/v1" 18 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 19 | ) 20 | 21 | func TestBookkeepercluster(t *testing.T) { 22 | RegisterFailHandler(Fail) 23 | RunSpecs(t, "Pravegacluster") 24 | } 25 | 26 | var _ = Describe("bookkeepercluster", func() { 27 | 28 | Context("PdbNameForBookie", func() { 29 | var str1 string 30 | BeforeEach(func() { 31 | str1 = PdbNameForBookie("bk") 32 | }) 33 | It("should return pdb name", func() { 34 | Ω(str1).To(Equal("bk-bookie")) 35 | }) 36 | 37 | }) 38 | Context("ConfigMapNameForBookie", func() { 39 | var str1 string 40 | BeforeEach(func() { 41 | str1 = ConfigMapNameForBookie("bk") 42 | }) 43 | It("should return configmap name", func() { 44 | Ω(str1).To(Equal("bk-bookie")) 45 | }) 46 | 47 | }) 48 | Context("StatefulSetNameForBookie", func() { 49 | var str1 string 50 | BeforeEach(func() { 51 | str1 = StatefulSetNameForBookie("bk") 52 | }) 53 | It("should return statefulset name", func() { 54 | Ω(str1).To(Equal("bk-bookie")) 55 | }) 56 | }) 57 | 58 | Context("IsOrphan", func() { 59 | var result1, result2, result3, result4 bool 60 | BeforeEach(func() { 61 | 62 | result1 = IsOrphan("bookie-4", 3) 63 | result2 = IsOrphan("bookie-2", 3) 64 | result3 = IsOrphan("bookie", 1) 65 | result4 = IsOrphan("bookie-1ab", 1) 66 | }) 67 | It("should return true for result2", func() { 68 | Ω(result1).To(Equal(true)) 69 | }) 70 | It("should return false for result1", func() { 71 | Ω(result2).To(Equal(false)) 72 | }) 73 | It("should return false for result3", func() { 74 | Ω(result3).To(Equal(false)) 75 | }) 76 | It("should return false for result4", func() { 77 | Ω(result4).To(Equal(false)) 78 | }) 79 | }) 80 | Context("HealthcheckCommand()", func() { 81 | 82 | out := HealthcheckCommand(1234) 83 | It("should not be nil", func() { 84 | Ω(len(out)).ShouldNot(Equal(0)) 85 | }) 86 | 87 | }) 88 | Context("Min()", func() { 89 | 90 | It("Min should be 10", func() { 91 | Ω(Min(10, 20)).Should(Equal(int32(10))) 92 | }) 93 | It("Min should be 20", func() { 94 | Ω(Min(30, 20)).Should(Equal(int32(20))) 95 | }) 96 | 97 | }) 98 | Context("ContainsStringWithPrefix", func() { 99 | var result, result1 bool 100 | BeforeEach(func() { 101 | opts := []string{ 102 | "-Xms512m", 103 | "-XX:+ExitOnOutOfMemoryError", 104 | } 105 | 106 | result = ContainsStringWithPrefix(opts, "-Xms") 107 | result1 = ContainsStringWithPrefix(opts, "-abc") 108 | }) 109 | It("should return true for result", func() { 110 | Ω(result).To(Equal(true)) 111 | 112 | }) 113 | It("should return false for result1", func() { 114 | Ω(result1).To(Equal(false)) 115 | }) 116 | 117 | }) 118 | Context("RemoveString", func() { 119 | var result bool 120 | BeforeEach(func() { 121 | opts := []string{ 122 | "abc-test", 123 | "test1", 124 | } 125 | opts = RemoveString(opts, "abc-test") 126 | result = ContainsStringWithPrefix(opts, "abc") 127 | 128 | }) 129 | It("should return false for result", func() { 130 | Ω(result).To(Equal(false)) 131 | 132 | }) 133 | 134 | }) 135 | Context("GetStringWithPrefix", func() { 136 | var out, out1 string 137 | BeforeEach(func() { 138 | opts := []string{ 139 | "abc-test", 140 | "test1", 141 | } 142 | out = GetStringWithPrefix(opts, "abc") 143 | out1 = GetStringWithPrefix(opts, "bk") 144 | 145 | }) 146 | It("should return string with prefix", func() { 147 | Ω(out).To(Equal("abc-test")) 148 | 149 | }) 150 | It("should return empty string", func() { 151 | Ω(out1).To(Equal("")) 152 | }) 153 | }) 154 | 155 | Context("ContainsVersion fn", func() { 156 | var result1, result2, result3 bool 157 | BeforeEach(func() { 158 | input := []string{"0.4.0", "0.5.0", "a.b.c"} 159 | result1 = ContainsVersion(input, "0.4.0") 160 | result2 = ContainsVersion(input, "0.7.0") 161 | result3 = ContainsVersion(input, "") 162 | 163 | }) 164 | It("should return true for result", func() { 165 | Ω(result1).To(Equal(true)) 166 | }) 167 | It("should return false for result", func() { 168 | Ω(result2).To(Equal(false)) 169 | }) 170 | It("should return false for result", func() { 171 | Ω(result3).To(Equal(false)) 172 | }) 173 | }) 174 | Context("GetPodVersion", func() { 175 | var out string 176 | BeforeEach(func() { 177 | annotationsMap := map[string]string{ 178 | "bookkeeper.version": "0.7.0", 179 | } 180 | testpod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Namespace: "default", Annotations: annotationsMap}} 181 | out = GetPodVersion(testpod) 182 | }) 183 | It("should return correct version", func() { 184 | Ω(out).To(Equal("0.7.0")) 185 | }) 186 | }) 187 | Context("OverrideDefaultJVMOptions", func() { 188 | var result, result1 []string 189 | BeforeEach(func() { 190 | jvmOpts := []string{ 191 | "-Xms512m", 192 | "-XX:+ExitOnOutOfMemoryError", 193 | "-XX:+CrashOnOutOfMemoryError", 194 | "-XX:+HeapDumpOnOutOfMemoryError", 195 | "-XX:HeapDumpPath=/heap", 196 | } 197 | customOpts := []string{ 198 | "-Xms1024m", 199 | "-XX:+ExitOnOutOfMemoryError", 200 | "-XX:+CrashOnOutOfMemoryError", 201 | "-XX:+HeapDumpOnOutOfMemoryError", 202 | "-XX:HeapDumpPath=/heap", 203 | "-yy:mem", 204 | "", 205 | } 206 | 207 | result = OverrideDefaultJVMOptions(jvmOpts, customOpts) 208 | result1 = OverrideDefaultJVMOptions(jvmOpts, result1) 209 | 210 | }) 211 | It("should contain updated string", func() { 212 | Ω(len(result)).ShouldNot(Equal(0)) 213 | Ω(result[0]).To(Equal("-Xms1024m")) 214 | Ω(result1[0]).To(Equal("-Xms512m")) 215 | }) 216 | }) 217 | Context("CompareConfigMap", func() { 218 | var output1, output2 bool 219 | BeforeEach(func() { 220 | configData1 := map[string]string{ 221 | "TEST_DATA": "testdata", 222 | } 223 | configData2 := map[string]string{ 224 | "TEST_DATA": "testdata1", 225 | } 226 | configMap1 := &v1.ConfigMap{ 227 | TypeMeta: metav1.TypeMeta{ 228 | Kind: "ConfigMap", 229 | APIVersion: "v1", 230 | }, 231 | Data: configData1, 232 | } 233 | configMap2 := &v1.ConfigMap{ 234 | TypeMeta: metav1.TypeMeta{ 235 | Kind: "ConfigMap", 236 | APIVersion: "v1", 237 | }, 238 | Data: configData1, 239 | } 240 | configMap3 := &v1.ConfigMap{ 241 | TypeMeta: metav1.TypeMeta{ 242 | Kind: "ConfigMap", 243 | APIVersion: "v1", 244 | }, 245 | Data: configData2, 246 | } 247 | output1 = CompareConfigMap(configMap1, configMap2) 248 | output2 = CompareConfigMap(configMap1, configMap3) 249 | }) 250 | 251 | It("output1 should be true", func() { 252 | Ω(output1).To(Equal(true)) 253 | }) 254 | It("output2 should be false", func() { 255 | Ω(output2).To(Equal(false)) 256 | }) 257 | }) 258 | }) 259 | -------------------------------------------------------------------------------- /doc/rollback-cluster.md: -------------------------------------------------------------------------------- 1 | # Bookkeeper Cluster Rollback 2 | 3 | This document details how rollback can be triggered after a Bookkeeper cluster upgrade fails. 4 | Note that a rollback can be triggered only after an Upgrade Failure. 5 | 6 | ## Upgrade Failure 7 | 8 | An Upgrade can fail because of following reasons: 9 | 10 | 1. Incorrect configuration (wrong quota, permissions, limit ranges) 11 | 2. Network issues (ImagePullError) 12 | 3. K8s Cluster Issues. 13 | 4. Application issues (Application runtime misconfiguration or code bugs) 14 | 15 | An upgrade failure can manifest through a Pod staying in `Pending` state forever or continuously restarting or crashing (CrashLoopBackOff). 16 | A component deployment failure needs to be tracked and mapped to "Upgrade Failure" for Bookkeeper Cluster. 17 | Here we try to fail-fast by explicitly checking for some common causes for deployment failure like image pull errors or CrashLoopBackOff State and failing the upgrade if any pod runs into this state during upgrade. 18 | 19 | The following Bookkeeper Cluster Status Condition indicates a Failed Upgrade: 20 | 21 | ``` 22 | ClusterConditionType: Error 23 | Status: True 24 | Reason: UpgradeFailed 25 | Message:
26 | ``` 27 | After an Upgrade Failure the output of `kubectl describe bk [CLUSTER_NAME]` would look like this: 28 | 29 | ``` 30 | $> kubectl describe bk bookkeeper 31 | . . . 32 | Spec: 33 | . . . 34 | Version: 0.6.0-2252.b6f6512 35 | . . . 36 | Status: 37 | . . . 38 | Conditions: 39 | Last Transition Time: 2019-09-06T09:00:13Z 40 | Last Update Time: 2019-09-06T09:00:13Z 41 | Status: False 42 | Type: Upgrading 43 | Last Transition Time: 2019-09-06T08:58:40Z 44 | Last Update Time: 2019-09-06T08:58:40Z 45 | Status: False 46 | Type: PodsReady 47 | Last Transition Time: 2019-09-06T09:00:13Z 48 | Last Update Time: 2019-09-06T09:00:13Z 49 | Message: pod bookkeeper-bookie-0 update failed because of ImagePullBackOff 50 | Reason: UpgradeFailed 51 | Status: True 52 | Type: Error 53 | . . . 54 | Current Version: 0.6.0-2239.6e24df7 55 | . . . 56 | Version History: 57 | 0.6.0-2239.6e24df7 58 | ``` 59 | where `0.6.0-2252.b6f6512` is the version we tried upgrading to and `0.6.0-2239.6e24df7` is the cluster version prior to triggering the upgrade. 60 | 61 | ## Manual Rollback Trigger 62 | 63 | A Rollback is triggered when a Bookkeeper Cluster is in `UpgradeFailed` Error State and a user manually updates version field in the BookkeeperCluster spec to point to the last stable cluster version. 64 | 65 | A Rollback involves moving all components in the cluster back to the last stable cluster version. As with upgrades, the operator rolls back one component at a time and one pod at a time to preserve high-availability. 66 | 67 | Note: 68 | 1. A Rollback to only the last stable cluster version is supported at this point. 69 | 2. Changing the cluster spec version to the previous cluster version, when cluster is not in `UpgradeFailed` state, will not trigger a rollback. 70 | 71 | ## Rollback via Helm (Experimental) 72 | 73 | The following command prints the historical revisions of a particular helm release 74 | ``` 75 | $ helm history [BOOKKEEPER_RELEASE_NAME] 76 | ``` 77 | 78 | Rollback can be triggered via helm using the following command 79 | ``` 80 | $ helm rollback [BOOKKEEPER_RELEASE_NAME] [REVISION_NUMBER] --wait --timeout 600s 81 | ``` 82 | Rollback will be successfully triggered only if a [REVISION_NUMBER] corresponding to the last stable cluster version is provided. 83 | >Note: Helm rollbacks are still an experimental feature and are not encouraged. We strongly recommend using manual rollbacks. 84 | 85 | ## Rollback Implementation 86 | 87 | When Rollback is triggered the cluster moves into ClusterCondition `RollbackInProgress`. 88 | Once the Rollback completes, this condition is set to false. 89 | 90 | During a Rollback, the Cluster Status should look something like: 91 | ``` 92 | $> kubectl describe bk bookkeeper 93 | . . . 94 | Status: 95 | Conditions: 96 | Last Transition Time: 2019-09-20T10:41:10Z 97 | Last Update Time: 2019-09-20T10:41:10Z 98 | Status: False 99 | Type: Upgrading 100 | Last Transition Time: 2019-09-20T10:45:12Z 101 | Last Update Time: 2019-09-20T10:45:12Z 102 | Status: True 103 | Type: PodsReady 104 | Last Transition Time: 2019-09-20T10:41:10Z 105 | Last Update Time: 2019-09-20T10:41:10Z 106 | Message: pod bookkeeper-bookie-0 update failed because of ImagePullBackOff 107 | Reason: UpgradeFailed 108 | Status: True 109 | Type: Error 110 | Last Update Time: 2019-09-20T10:45:12Z 111 | Message: 1 112 | Reason: Updating Bookkeeper 113 | Status: True 114 | Type: RollbackInProgress 115 | . . . 116 | ``` 117 | Here the `RollbackInProgress` condition being `true` indicates that a Rollback is in Progress. 118 | Also `Reason` and `Message` fields of this condition indicate the component being rolled back and number of updated replicas respectively. 119 | 120 | A `versionHistory` field in the BookkeeperClusterSpec maintains the history of upgrades. 121 | 122 | ## Rollback Outcome 123 | 124 | ### Success 125 | If the Rollback completes successfully, the cluster state goes back to condition `PodsReady`, which would mean the cluster is now in a stable state. All other conditions should be `false`. 126 | ``` 127 | Last Transition Time: 2019-09-20T09:49:26Z 128 | Last Update Time: 2019-09-20T09:49:26Z 129 | Status: True 130 | Type: PodsReady 131 | 132 | ``` 133 | 134 | Example: 135 | ``` 136 | Status: 137 | Conditions: 138 | Last Transition Time: 2019-09-20T10:12:04Z 139 | Last Update Time: 2019-09-20T10:12:04Z 140 | Status: False 141 | Type: Upgrading 142 | Last Transition Time: 2019-09-20T10:11:34Z 143 | Last Update Time: 2019-09-20T10:11:34Z 144 | Status: True 145 | Type: PodsReady 146 | Last Transition Time: 2019-09-20T10:07:19Z 147 | Last Update Time: 2019-09-20T10:07:19Z 148 | Status: False 149 | Type: Error 150 | Last Transition Time: 2019-09-20T09:50:57Z 151 | Last Update Time: 2019-09-20T09:50:57Z 152 | Status: False 153 | Type: RollbackInProgress 154 | ``` 155 | 156 | ### Failure 157 | If the Rollback Fails, the cluster would move to `Error` state indicated by this cluster condition: 158 | ``` 159 | ClusterConditionType: Error 160 | Status: True 161 | Reason: RollbackFailed 162 | Message:
163 | ``` 164 | 165 | Example: 166 | ``` 167 | Status: 168 | Conditions: 169 | Last Transition Time: 2019-09-20T09:46:24Z 170 | Last Update Time: 2019-09-20T09:46:24Z 171 | Status: False 172 | Type: Upgrading 173 | Last Transition Time: 2019-09-20T09:49:26Z 174 | Last Update Time: 2019-09-20T09:49:26Z 175 | Status: False 176 | Type: PodsReady 177 | Last Transition Time: 2019-09-20T09:46:24Z 178 | Last Update Time: 2019-09-20T09:50:57Z 179 | Message: pod bookkeeper-bookie-0 update failed because of ImagePullBackOff 180 | Reason: RollbackFailed 181 | Status: True 182 | Type: Error 183 | Last Transition Time: 2019-09-20T09:50:57Z 184 | Last Update Time: 2019-09-20T09:50:57Z 185 | Status: False 186 | Type: RollbackInProgress 187 | ``` 188 | 189 | When a rollback failure happens, the operator cannot recover the cluster from this failed state and manual intervention would be required to resolve this. 190 | -------------------------------------------------------------------------------- /api/v1alpha1/bookkeepercluster_webhook.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha1 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | "log" 23 | "strings" 24 | 25 | "github.com/pravega/bookkeeper-operator/pkg/util" 26 | corev1 "k8s.io/api/core/v1" 27 | "k8s.io/apimachinery/pkg/api/errors" 28 | "k8s.io/apimachinery/pkg/runtime" 29 | "k8s.io/apimachinery/pkg/types" 30 | ctrl "sigs.k8s.io/controller-runtime" 31 | logf "sigs.k8s.io/controller-runtime/pkg/log" 32 | "sigs.k8s.io/controller-runtime/pkg/manager" 33 | "sigs.k8s.io/controller-runtime/pkg/webhook" 34 | ) 35 | 36 | var Mgr manager.Manager 37 | 38 | // log is for logging in this package. 39 | var bookkeeperclusterlog = logf.Log.WithName("bookkeepercluster-resource") 40 | 41 | func (r *BookkeeperCluster) SetupWebhookWithManager(mgr ctrl.Manager) error { 42 | return ctrl.NewWebhookManagedBy(mgr). 43 | For(r). 44 | Complete() 45 | } 46 | 47 | //+kubebuilder:webhook:path=/mutate-bookkeeper-pravega-io-v1alpha1-bookkeepercluster,mutating=true,failurePolicy=fail,sideEffects=None,groups=bookkeeper.pravega.io,resources=bookkeeperclusters,verbs=create;update,versions=v1alpha1,name=mbookkeepercluster.kb.io,admissionReviewVersions=v1 48 | 49 | var _ webhook.Defaulter = &BookkeeperCluster{} 50 | 51 | // Default implements webhook.Defaulter so a webhook will be registered for the type 52 | func (bk *BookkeeperCluster) Default() { 53 | bookkeeperclusterlog.Info("default", "name", bk.Name) 54 | 55 | // TODO(user): fill in your defaulting logic. 56 | } 57 | 58 | // TODO(user): change verbs to "verbs=create;update;delete" if you want to enable deletion validation. 59 | //+kubebuilder:webhook:path=/validate-bookkeeper-pravega-io-v1alpha1-bookkeepercluster,mutating=false,failurePolicy=fail,sideEffects=None,groups=bookkeeper.pravega.io,resources=bookkeeperclusters,verbs=create;update,versions=v1alpha1,name=vbookkeepercluster.kb.io,admissionReviewVersions=v1 60 | 61 | var _ webhook.Validator = &BookkeeperCluster{} 62 | 63 | // ValidateCreate implements webhook.Validator so a webhook will be registered for the type 64 | func (bk *BookkeeperCluster) ValidateCreate() error { 65 | bookkeeperclusterlog.Info("validate create", "name", bk.Name) 66 | 67 | err := bk.ValidateBookkeeperVersion() 68 | if err != nil { 69 | return err 70 | } 71 | err = bk.ValidateAbsolutePath([]string{"journalDirectories", "ledgerDirectories", "indexDirectories"}) 72 | if err != nil { 73 | return err 74 | } 75 | return nil 76 | } 77 | 78 | // ValidateUpdate implements webhook.Validator so a webhook will be registered for the type 79 | func (bk *BookkeeperCluster) ValidateUpdate(old runtime.Object) error { 80 | bookkeeperclusterlog.Info("validate update", "name", bk.Name) 81 | 82 | err := bk.ValidateBookkeeperVersion() 83 | if err != nil { 84 | return err 85 | } 86 | err = bk.validateConfigMap() 87 | if err != nil { 88 | return err 89 | } 90 | return nil 91 | } 92 | 93 | // ValidateDelete implements webhook.Validator so a webhook will be registered for the type 94 | func (bk *BookkeeperCluster) ValidateDelete() error { 95 | bookkeeperclusterlog.Info("validate delete", "name", bk.Name) 96 | 97 | // TODO(user): fill in your validation logic upon object deletion. 98 | return nil 99 | } 100 | 101 | func (bk *BookkeeperCluster) ValidateBookkeeperVersion() error { 102 | 103 | if bk.Spec.Version == "" { 104 | bk.Spec.Version = DefaultBookkeeperVersion 105 | } 106 | requestVersion := bk.Spec.Version 107 | 108 | if bk.Status.IsClusterInUpgradingState() && requestVersion != bk.Status.TargetVersion { 109 | return fmt.Errorf("failed to process the request, cluster is upgrading") 110 | } 111 | 112 | if bk.Status.IsClusterInRollbackState() { 113 | if requestVersion != bk.Status.GetLastVersion() { 114 | return fmt.Errorf("failed to process the request, rollback in progress.") 115 | } 116 | } 117 | if bk.Status.IsClusterInUpgradeFailedState() { 118 | if requestVersion != bk.Status.GetLastVersion() { 119 | return fmt.Errorf("Rollback to version %s not supported. Only rollback to version %s is supported.", requestVersion, bk.Status.GetLastVersion()) 120 | } 121 | return nil 122 | } 123 | 124 | if bk.Status.IsClusterInErrorState() { 125 | return fmt.Errorf("failed to process the request, cluster is in error state.") 126 | } 127 | 128 | // Check if the request has a valid Bookkeeper version 129 | normRequestVersion, err := util.NormalizeVersion(requestVersion) 130 | log.Printf("validateBookkeeperVersion:: normRequestVersion %s", normRequestVersion) 131 | if err != nil { 132 | return fmt.Errorf("request version is not in valid format: %v", err) 133 | } 134 | 135 | if bk.Status.CurrentVersion == "" { 136 | // we're deploying for the very first time 137 | return nil 138 | } 139 | 140 | // This is not an upgrade if CurrentVersion == requestVersion 141 | if bk.Status.CurrentVersion == requestVersion { 142 | return nil 143 | } 144 | 145 | // This is an upgrade, check if requested version is in the upgrade path 146 | normFoundVersion, err := util.NormalizeVersion(bk.Status.CurrentVersion) 147 | if err != nil { 148 | // It should never happen 149 | return fmt.Errorf("found version is not in valid format, something bad happens: %v", err) 150 | } 151 | if match, _ := util.CompareVersions(normRequestVersion, normFoundVersion, "<"); match { 152 | return fmt.Errorf("downgrading the cluster from version %s to %s is not supported", bk.Status.CurrentVersion, requestVersion) 153 | } 154 | log.Printf("validateBookkeeperVersion:: normFoundVersion %s", normFoundVersion) 155 | 156 | log.Print("validateBookkeeperVersion:: No error found...returning...") 157 | return nil 158 | } 159 | 160 | func (bk *BookkeeperCluster) ValidateAbsolutePath(dirs []string) error { 161 | for _, dir := range dirs { 162 | if val, ok := bk.Spec.Options[dir]; ok { 163 | paths := strings.Split(val, ",") 164 | for _, path := range paths { 165 | if !strings.HasPrefix(path, "/") { 166 | return fmt.Errorf("path (%s) of %s should start with /", path, dir) 167 | } 168 | } 169 | } 170 | } 171 | return nil 172 | } 173 | 174 | func (bk *BookkeeperCluster) validateConfigMap() error { 175 | configmap := &corev1.ConfigMap{} 176 | err := Mgr.GetClient().Get(context.TODO(), 177 | types.NamespacedName{Name: util.ConfigMapNameForBookie(bk.Name), Namespace: bk.Namespace}, configmap) 178 | if err != nil { 179 | if errors.IsNotFound(err) { 180 | return nil 181 | } else { 182 | return fmt.Errorf("failed to get configmap (%s): %v", configmap.Name, err) 183 | } 184 | } 185 | if val, ok := bk.Spec.Options["useHostNameAsBookieID"]; ok { 186 | eq := configmap.Data["BK_useHostNameAsBookieID"] == val 187 | if !eq { 188 | return fmt.Errorf("value of useHostNameAsBookieID should not be changed") 189 | } 190 | } 191 | if val, ok := bk.Spec.Options["journalDirectories"]; ok { 192 | eq := configmap.Data["BK_journalDirectories"] == val 193 | if !eq { 194 | return fmt.Errorf("path of journal directories should not be changed") 195 | } 196 | } 197 | if val, ok := bk.Spec.Options["ledgerDirectories"]; ok { 198 | eq := configmap.Data["BK_ledgerDirectories"] == val 199 | if !eq { 200 | return fmt.Errorf("path of ledger directories should not be changed") 201 | } 202 | } 203 | if val, ok := bk.Spec.Options["indexDirectories"]; ok { 204 | eq := configmap.Data["BK_indexDirectories"] == val 205 | if !eq { 206 | return fmt.Errorf("path of index directories should not be changed") 207 | } 208 | } 209 | log.Print("validateConfigMap:: No error found...returning...") 210 | return nil 211 | } 212 | -------------------------------------------------------------------------------- /api/v1alpha1/zz_generated.deepcopy_test.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package v1alpha1_test 12 | 13 | import ( 14 | "strings" 15 | 16 | v1 "k8s.io/api/core/v1" 17 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 18 | 19 | . "github.com/onsi/ginkgo" 20 | . "github.com/onsi/gomega" 21 | "github.com/pravega/bookkeeper-operator/api/v1alpha1" 22 | ) 23 | 24 | var _ = Describe("DeepCopy", func() { 25 | Context("with defaults", func() { 26 | var str1, str2 string 27 | var str3, str4 v1.PullPolicy 28 | var bk1, bk2 *v1alpha1.BookkeeperCluster 29 | 30 | BeforeEach(func() { 31 | bk1 = &v1alpha1.BookkeeperCluster{ 32 | ObjectMeta: metav1.ObjectMeta{ 33 | Name: "example", 34 | Namespace: "default", 35 | }, 36 | } 37 | bk1.WithDefaults() 38 | bk1.Status.Init() 39 | bk1.Status.VersionHistory = []string{"0.6.0", "0.7.0"} 40 | bk1.Spec.Options["key"] = "value" 41 | temp := *bk1.DeepCopy() 42 | bk2 = &temp 43 | str1 = bk1.Spec.Image.Repository 44 | str2 = bk2.Spec.Image.Repository 45 | str3 = bk1.Spec.Image.PullPolicy 46 | str4 = bk2.Spec.Image.PullPolicy 47 | bk1.Spec.Image.PullPolicy = "PullIfNotPresent" 48 | bk1.Spec.Image.DeepCopyInto(bk2.Spec.Image) 49 | bk1.Spec.Image.Repository = "bk/exmple" 50 | bk1.Spec.Probes.ReadinessProbe.InitialDelaySeconds = 5 51 | bk1.Spec.Probes.LivenessProbe.FailureThreshold = 2 52 | bk2.Spec.Probes = bk1.Spec.Probes.DeepCopy() 53 | initContainer := []v1.Container{ 54 | v1.Container{ 55 | Name: "testing", 56 | Image: "dummy-image", 57 | Command: []string{"sh", "-c", "ls;pwd"}, 58 | }, 59 | } 60 | bk1.Spec.InitContainers = initContainer 61 | bk1.Spec.Tolerations = []v1.Toleration{ 62 | { 63 | Key: "bookie", 64 | Operator: "Equal", 65 | Value: "val1", 66 | Effect: "NoSchedule", 67 | }, 68 | } 69 | bk1.Spec.JVMOptions.MemoryOpts = []string{"1g"} 70 | bk2.Spec.JVMOptions = bk1.Spec.JVMOptions.DeepCopy() 71 | bk2.Spec.Storage = bk1.Spec.Storage.DeepCopy() 72 | bk1.Spec.Options["ledgers"] = "l1" 73 | bk2.Spec = *bk1.Spec.DeepCopy() 74 | bk2.Status = *bk1.Status.DeepCopy() 75 | bk1.Status.Members.Ready = []string{"bookie-0", "bookie-1"} 76 | bk1.Status.Members.Unready = []string{"bookie-3", "bookie-2"} 77 | bk2.Status.Members = *bk1.Status.Members.DeepCopy() 78 | bk1.Spec.Image.Repository = "bookie/exmple1" 79 | bk2.Spec.Image = bk1.Spec.Image.DeepCopy() 80 | bk2.Spec.Image.ImageSpec = *bk1.Spec.Image.ImageSpec.DeepCopy() 81 | bk1.Status.SetPodsReadyConditionTrue() 82 | bk2.Status.Conditions[0] = *bk1.Status.Conditions[0].DeepCopy() 83 | }) 84 | It("value of str1 and str2 should be equal", func() { 85 | Ω(str2).To(Equal(str1)) 86 | }) 87 | It("value of str3 and str4 should be equal", func() { 88 | Ω(str3).To(Equal(str4)) 89 | }) 90 | It("checking value of bk2 probes", func() { 91 | Ω(bk2.Spec.Probes.ReadinessProbe.InitialDelaySeconds).To(Equal(int32(5))) 92 | Ω(bk2.Spec.Probes.LivenessProbe.FailureThreshold).To(Equal(int32(2))) 93 | bk1.Spec.Probes.ReadinessProbe.InitialDelaySeconds = 0 94 | bk1.Spec.Probes.LivenessProbe.FailureThreshold = 1 95 | bk1.Spec.Probes.ReadinessProbe.DeepCopyInto(bk2.Spec.Probes.ReadinessProbe) 96 | bk2.Spec.Probes.LivenessProbe = bk1.Spec.Probes.LivenessProbe.DeepCopy() 97 | Ω(bk2.Spec.Probes.ReadinessProbe.InitialDelaySeconds).To(Equal(int32(0))) 98 | Ω(bk2.Spec.Probes.LivenessProbe.FailureThreshold).To(Equal(int32(1))) 99 | }) 100 | It("checking bk2 jvm option as 1g", func() { 101 | Ω(bk2.Spec.JVMOptions.MemoryOpts[0]).To(Equal("1g")) 102 | }) 103 | It("checking bk2 options ledger field", func() { 104 | Ω(bk2.Spec.Options["ledgers"]).To(Equal("l1")) 105 | }) 106 | It("checking init containers", func() { 107 | Ω(bk2.Spec.InitContainers[0].Name).To(Equal("testing")) 108 | Ω(bk2.Spec.InitContainers[0].Image).To(Equal("dummy-image")) 109 | Ω(strings.Contains(bk2.Spec.InitContainers[0].Command[2], "ls;pwd")).To(BeTrue()) 110 | }) 111 | It("checking bk2 ready members", func() { 112 | Ω(bk2.Status.Members.Ready[0]).To(Equal("bookie-0")) 113 | }) 114 | It("checking bk2 unready members", func() { 115 | Ω(bk2.Status.Members.Unready[0]).To(Equal("bookie-3")) 116 | }) 117 | It("checking bk2 spec image", func() { 118 | Ω(bk2.Spec.Image.Repository).To(Equal("bookie/exmple1")) 119 | }) 120 | It("checking bk2 spec image", func() { 121 | Ω(bk2.Spec.Image.ImageSpec.Repository).To(Equal("bookie/exmple1")) 122 | }) 123 | It("checking status conditions", func() { 124 | Ω(bk2.Status.Conditions[0].Reason).To(Equal(bk1.Status.Conditions[0].Reason)) 125 | }) 126 | It("checking bk2 spec storage", func() { 127 | Ω(bk2.Spec.Storage).To(Equal(bk1.Spec.Storage)) 128 | }) 129 | It("checking for nil member status", func() { 130 | var memberstatus *v1alpha1.MembersStatus 131 | memberstatus2 := memberstatus.DeepCopy() 132 | Ω(memberstatus2).To(BeNil()) 133 | }) 134 | It("checking for nil cluster status", func() { 135 | var clusterstatus *v1alpha1.BookkeeperClusterStatus 136 | clusterstatus2 := clusterstatus.DeepCopy() 137 | Ω(clusterstatus2).To(BeNil()) 138 | }) 139 | It("checking for nil cluster spec", func() { 140 | var clusterspec *v1alpha1.BookkeeperClusterSpec 141 | clusterspec2 := clusterspec.DeepCopy() 142 | Ω(clusterspec2).To(BeNil()) 143 | }) 144 | It("checking for nil cluster condition", func() { 145 | var clustercond *v1alpha1.ClusterCondition 146 | clustercond2 := clustercond.DeepCopy() 147 | Ω(clustercond2).To(BeNil()) 148 | }) 149 | It("checking for nil bookkeeper cluster", func() { 150 | var cluster *v1alpha1.BookkeeperCluster 151 | cluster2 := cluster.DeepCopy() 152 | Ω(cluster2).To(BeNil()) 153 | }) 154 | It("checking for nil imagespec", func() { 155 | var imagespec *v1alpha1.ImageSpec 156 | imagespec2 := imagespec.DeepCopy() 157 | Ω(imagespec2).To(BeNil()) 158 | }) 159 | It("checking for nil clusterlist", func() { 160 | var clusterlist *v1alpha1.BookkeeperClusterList 161 | clusterlist2 := clusterlist.DeepCopy() 162 | Ω(clusterlist2).To(BeNil()) 163 | }) 164 | It("checking for nil bookkeeper cluster deepcopyobject", func() { 165 | var cluster *v1alpha1.BookkeeperCluster 166 | cluster2 := cluster.DeepCopyObject() 167 | Ω(cluster2).To(BeNil()) 168 | }) 169 | It("checking for nil bookkeeper clusterlist deepcopyobject", func() { 170 | var clusterlist *v1alpha1.BookkeeperClusterList 171 | clusterlist2 := clusterlist.DeepCopyObject() 172 | Ω(clusterlist2).To(BeNil()) 173 | }) 174 | It("checking for nil jvm options", func() { 175 | bk1.Spec.JVMOptions = nil 176 | Ω(bk1.Spec.JVMOptions.DeepCopy()).Should(BeNil()) 177 | }) 178 | It("checking for nil storage options", func() { 179 | bk1.Spec.Storage = nil 180 | Ω(bk1.Spec.Storage.DeepCopy()).Should(BeNil()) 181 | }) 182 | It("checking for nil bookkeeper image spec", func() { 183 | bk1.Spec.Image = nil 184 | Ω(bk1.Spec.Image.DeepCopy()).Should(BeNil()) 185 | }) 186 | It("checking for deepcopyobject for clusterlist", func() { 187 | var clusterlist v1alpha1.BookkeeperClusterList 188 | clusterlist.ResourceVersion = "v1alpha1" 189 | clusterlist2 := clusterlist.DeepCopyObject() 190 | Ω(clusterlist2).ShouldNot(BeNil()) 191 | }) 192 | It("checking for deepcopyobject for clusterlist with items", func() { 193 | var clusterlist v1alpha1.BookkeeperClusterList 194 | clusterlist.ResourceVersion = "v1alpha1" 195 | clusterlist.Items = []v1alpha1.BookkeeperCluster{ 196 | { 197 | Spec: v1alpha1.BookkeeperClusterSpec{}, 198 | }, 199 | } 200 | clusterlist2 := clusterlist.DeepCopyObject() 201 | Ω(clusterlist2).ShouldNot(BeNil()) 202 | }) 203 | It("checking for deepcopy for clusterlist", func() { 204 | var clusterlist v1alpha1.BookkeeperClusterList 205 | clusterlist.ResourceVersion = "v1alpha1" 206 | clusterlist2 := clusterlist.DeepCopy() 207 | Ω(clusterlist2.ResourceVersion).To(Equal("v1alpha1")) 208 | }) 209 | It("checking for Deepcopy object", func() { 210 | bk := bk2.DeepCopyObject() 211 | Ω(bk.GetObjectKind().GroupVersionKind().Version).To(Equal("")) 212 | }) 213 | It("checking pod tolerations", func() { 214 | Ω(bk2.Spec.Tolerations[0].Key).To(Equal("bookie")) 215 | }) 216 | }) 217 | }) 218 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | # workflow name 2 | name: CI 3 | 4 | # on events 5 | on: 6 | push: 7 | branches: 8 | - master 9 | pull_request: 10 | branches: 11 | - master 12 | 13 | env: 14 | PACKET_TOKEN: ${{ secrets.PACKET_TOKEN }} 15 | 16 | # jobs to run 17 | jobs: 18 | check: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - name: Set up Go 1.21 22 | uses: actions/setup-go@v2 23 | with: 24 | go-version: "1.21" 25 | id: go 26 | - name: Set up Go for root 27 | run: | 28 | sudo ln -sf `which go` `sudo which go` || true 29 | sudo go version 30 | - name: Get current date 31 | id: date 32 | run: echo "::set-output name=date::$(date +'%Y%m%d%H%M%S')" 33 | - name: Get output 34 | run: echo ${{ steps.date.outputs.date }} 35 | - name: Set env 36 | run: echo "CLUSTER_NAME=bookkeeper-operator-travis-${{ steps.date.outputs.date }}" >> $GITHUB_ENV 37 | - name: Test 38 | run: echo $CLUSTER_NAME 39 | - name: Check out code into the Go module directory 40 | uses: actions/checkout@v2 41 | - name: Gofmt and License checks 42 | run: make check 43 | - name: unit tests 44 | run: make test-unit 45 | - name: Codecov 46 | uses: codecov/codecov-action@v1.0.12 47 | - name: Installing Packet-cli 48 | run: curl -Lo packet-cli https://github.com/packethost/packet-cli/releases/download/0.0.7/packet-linux-amd64 && chmod +x packet-cli && sudo mv packet-cli /usr/local/bin/ 49 | - name: Installing ssh keys 50 | run: | 51 | ssh-keygen -f ~/.ssh/id_rsa -P "" 52 | pub_key=$(cat ~/.ssh/id_rsa.pub) 53 | echo "publc key is $pub_key" 54 | echo "packet-cli ssh-key create --key \"$(cat ~/.ssh/id_rsa.pub)\" --label \"pravega\"" 55 | packet-cli ssh-key create --key "$(cat ~/.ssh/id_rsa.pub)" --label "pravega-travis" 56 | - name: Creating nodes 57 | run: | 58 | cd .. && tar -czvf bookkeeper-operator.tar.gz bookkeeper-operator 59 | packet-cli device create -H $CLUSTER_NAME"-master" -o "ubuntu_20_04" -P c3.medium.x86 -p 454b8b42-33d3-4e7e-8acf-1d1a5fec7e85 -f da11 60 | packet-cli device get -p 454b8b42-33d3-4e7e-8acf-1d1a5fec7e85 | grep $CLUSTER_NAME"-master" | awk '{print $2}' 61 | CLUSTER_ID=$(packet-cli device get -p 454b8b42-33d3-4e7e-8acf-1d1a5fec7e85 | grep $CLUSTER_NAME"-master" | awk '{print $2}' | tr -d ' ') 62 | echo "cluster id is $CLUSTER_ID" 63 | packet-cli device create -H $CLUSTER_NAME"-worker1" -o "ubuntu_20_04" -P c3.medium.x86 -p 454b8b42-33d3-4e7e-8acf-1d1a5fec7e85 -f da11 64 | packet-cli device create -H $CLUSTER_NAME"-worker2" -o "ubuntu_20_04" -P c3.medium.x86 -p 454b8b42-33d3-4e7e-8acf-1d1a5fec7e85 -f da11 65 | MASTER_STATE=$(packet-cli device get -p 454b8b42-33d3-4e7e-8acf-1d1a5fec7e85 | grep $CLUSTER_NAME"-master" | awk '{print $10}' | tr -d ' ') 66 | while [ "$MASTER_STATE" != "active" ]; do MASTER_STATE=`packet-cli device get -p 454b8b42-33d3-4e7e-8acf-1d1a5fec7e85 | grep $CLUSTER_NAME"-master" | awk '{print $10}' | tr -d ' '`;sleep 30;done 67 | CLUSTER_IP=$(packet-cli device get -i $CLUSTER_ID -y | grep "\- address:" | head -1 |awk '{print $3}' | tr -d ' ') 68 | echo "Cluster IP is $CLUSTER_IP" 69 | WORKER1_STATE=$(packet-cli device get -p 454b8b42-33d3-4e7e-8acf-1d1a5fec7e85 | grep $CLUSTER_NAME"-worker1" | awk '{print $10}' | tr -d ' ') 70 | while [ "$WORKER1_STATE" != "active" ]; do WORKER1_STATE=`packet-cli device get -p 454b8b42-33d3-4e7e-8acf-1d1a5fec7e85 | grep $CLUSTER_NAME"-worker1" | awk '{print $10}' | tr -d ' '`;sleep 30;done 71 | WORKER2_STATE=$(packet-cli device get -p 454b8b42-33d3-4e7e-8acf-1d1a5fec7e85 | grep $CLUSTER_NAME"-worker2" | awk '{print $10}' | tr -d ' ') 72 | while [ "$WORKER2_STATE" != "active" ]; do WORKER2_STATE=`packet-cli device get -p 454b8b42-33d3-4e7e-8acf-1d1a5fec7e85 | grep $CLUSTER_NAME"-worker2" | awk '{print $10}' | tr -d ' '`;sleep 30;done 73 | echo "CLUSTER_ID=$CLUSTER_ID" >> $GITHUB_ENV 74 | echo "CLUSTER_IP=$CLUSTER_IP" >> $GITHUB_ENV 75 | pwd;ls 76 | scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -r bookkeeper-operator.tar.gz root@$CLUSTER_IP:/root/ 77 | - name: Setup k8 78 | run: | 79 | pwd;ls 80 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP "sudo apt-get update;tar -xzvf /root/bookkeeper-operator.tar.gz; ls /root/; ls /root/bookkeeper-operator/test/e2e/resources" 81 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP sudo chmod +x /root/bookkeeper-operator/test/e2e/resources/kubernetes_master_install.sh 82 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP /root/bookkeeper-operator/test/e2e/resources/kubernetes_master_install.sh 83 | CLUSTER_ID1=`packet-cli device get -p 454b8b42-33d3-4e7e-8acf-1d1a5fec7e85 | grep $CLUSTER_NAME"-worker1" | awk '{print $2}' | tr -d ' '` 84 | CLUSTER_IP1=`packet-cli device get -i $CLUSTER_ID1 -y | grep "\- address:" | head -1 |awk '{print $3}' | tr -d ' '` 85 | CLUSTER_ID2=`packet-cli device get -p 454b8b42-33d3-4e7e-8acf-1d1a5fec7e85 | grep $CLUSTER_NAME"-worker2" | awk '{print $2}' | tr -d ' '` 86 | CLUSTER_IP2=`packet-cli device get -i $CLUSTER_ID2 -y | grep "\- address:" | head -1 |awk '{print $3}' | tr -d ' '` 87 | scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -r ../bookkeeper-operator.tar.gz root@$CLUSTER_IP1:/root/ 88 | scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -r ../bookkeeper-operator.tar.gz root@$CLUSTER_IP2:/root/ 89 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP1 "sudo apt-get update;tar -xzvf /root/bookkeeper-operator.tar.gz" 90 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP2 "sudo apt-get update;tar -xzvf /root/bookkeeper-operator.tar.gz" 91 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP1 sudo chmod +x /root/bookkeeper-operator/test/e2e/resources/kubernetes_slave_install.sh 92 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP1 /root/bookkeeper-operator/test/e2e/resources/kubernetes_slave_install.sh 93 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP2 sudo chmod +x /root/bookkeeper-operator/test/e2e/resources/kubernetes_slave_install.sh 94 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP2 /root/bookkeeper-operator/test/e2e/resources/kubernetes_slave_install.sh 95 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP 'kubeadm token create --print-join-command | head -2' >JOIN 96 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP1 $(cat JOIN) 97 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP2 $(cat JOIN) 98 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP "kubectl get nodes" 99 | echo "CLUSTER_ID1=$CLUSTER_ID1" >> $GITHUB_ENV 100 | echo "CLUSTER_ID2=$CLUSTER_ID2" >> $GITHUB_ENV 101 | - name: Make setup before running e2e 102 | run: | 103 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP "mkdir /data;kubectl create -f /root/bookkeeper-operator/test/e2e/resources/local-storage.yaml" 104 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP "curl -L https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash" 105 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP "kubectl create -f /root/bookkeeper-operator/test/e2e/resources/zookeeper_crd.yaml" 106 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP "kubectl create -f /root/bookkeeper-operator/test/e2e/resources/zookeeper.yaml" 107 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP "kubectl apply -f \"https://github.com/jetstack/cert-manager/releases/download/v1.7.0/cert-manager.crds.yaml\"" 108 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP "helm repo add jetstack https://charts.jetstack.io" 109 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP "helm repo update" 110 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP "kubectl create namespace cert-manager;helm install cert-manager jetstack/cert-manager --namespace cert-manager --version v1.7.0 --wait" 111 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP "kubectl -n default create secret docker-registry regcred --docker-server=https://index.docker.io/v1/ --docker-username=testbkop --docker-password=08d50da6-61bd-4953-a2ce-7d7a0e3835bc --docker-email=testbkop@gmail.com" 112 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP "bash < <(curl -s -S -L https://raw.githubusercontent.com/moovweb/gvm/master/binscripts/gvm-installer);source /root/.gvm/scripts/gvm;gvm install go1.21.0 --binary;gvm use go1.21.0 --default;git config --global --add safe.directory /root/bookkeeper-operator" 113 | - name: Running e2e 114 | run: | 115 | ssh -o StrictHostKeyChecking=no root@$CLUSTER_IP "cd /root/bookkeeper-operator;source /root/.gvm/scripts/gvm;make test-e2e" 116 | - name: Deleting cluster 117 | if: ${{ always() }} 118 | run: | 119 | SSHKEY=`packet-cli ssh-key get | grep "pravega-travis" | awk '{print $2}' | tr -d ' '` 120 | echo y | packet-cli ssh-key delete -i $SSHKEY 121 | echo y | packet-cli device delete -i $CLUSTER_ID 122 | echo y | packet-cli device delete -i $CLUSTER_ID1 123 | echo y | packet-cli device delete -i $CLUSTER_ID2 124 | -------------------------------------------------------------------------------- /config/app/crd.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiextensions.k8s.io/v1 2 | kind: CustomResourceDefinition 3 | metadata: 4 | generation: 1 5 | labels: 6 | controller-tools.k8s.io: "1.0" 7 | name: applications.app.k8s.io 8 | selfLink: /apis/apiextensions.k8s.io/v1/customresourcedefinitions/applications.app.k8s.io 9 | spec: 10 | conversion: 11 | strategy: None 12 | group: app.k8s.io 13 | names: 14 | kind: Application 15 | listKind: ApplicationList 16 | plural: applications 17 | singular: application 18 | preserveUnknownFields: true 19 | scope: Namespaced 20 | validation: 21 | openAPIV3Schema: 22 | properties: 23 | apiVersion: 24 | type: string 25 | kind: 26 | type: string 27 | metadata: 28 | type: object 29 | spec: 30 | properties: 31 | assemblyPhase: 32 | type: string 33 | componentKinds: 34 | items: 35 | type: object 36 | type: array 37 | descriptor: 38 | properties: 39 | description: 40 | type: string 41 | icons: 42 | items: 43 | properties: 44 | size: 45 | type: string 46 | src: 47 | type: string 48 | type: 49 | type: string 50 | required: 51 | - src 52 | type: object 53 | type: array 54 | keywords: 55 | items: 56 | type: string 57 | type: array 58 | links: 59 | items: 60 | properties: 61 | description: 62 | type: string 63 | url: 64 | type: string 65 | type: object 66 | type: array 67 | maintainers: 68 | items: 69 | properties: 70 | email: 71 | type: string 72 | name: 73 | type: string 74 | url: 75 | type: string 76 | type: object 77 | type: array 78 | notes: 79 | type: string 80 | owners: 81 | items: 82 | properties: 83 | email: 84 | type: string 85 | name: 86 | type: string 87 | url: 88 | type: string 89 | type: object 90 | type: array 91 | type: 92 | type: string 93 | version: 94 | type: string 95 | type: object 96 | info: 97 | items: 98 | properties: 99 | name: 100 | type: string 101 | type: 102 | type: string 103 | value: 104 | type: string 105 | valueFrom: 106 | properties: 107 | configMapKeyRef: 108 | properties: 109 | apiVersion: 110 | type: string 111 | fieldPath: 112 | type: string 113 | key: 114 | type: string 115 | kind: 116 | type: string 117 | name: 118 | type: string 119 | namespace: 120 | type: string 121 | resourceVersion: 122 | type: string 123 | uid: 124 | type: string 125 | type: object 126 | ingressRef: 127 | properties: 128 | apiVersion: 129 | type: string 130 | fieldPath: 131 | type: string 132 | host: 133 | type: string 134 | kind: 135 | type: string 136 | name: 137 | type: string 138 | namespace: 139 | type: string 140 | path: 141 | type: string 142 | resourceVersion: 143 | type: string 144 | uid: 145 | type: string 146 | type: object 147 | secretKeyRef: 148 | properties: 149 | apiVersion: 150 | type: string 151 | fieldPath: 152 | type: string 153 | key: 154 | type: string 155 | kind: 156 | type: string 157 | name: 158 | type: string 159 | namespace: 160 | type: string 161 | resourceVersion: 162 | type: string 163 | uid: 164 | type: string 165 | type: object 166 | serviceRef: 167 | properties: 168 | apiVersion: 169 | type: string 170 | fieldPath: 171 | type: string 172 | kind: 173 | type: string 174 | name: 175 | type: string 176 | namespace: 177 | type: string 178 | path: 179 | type: string 180 | port: 181 | format: int32 182 | type: integer 183 | resourceVersion: 184 | type: string 185 | uid: 186 | type: string 187 | type: object 188 | type: 189 | type: string 190 | type: object 191 | type: object 192 | type: array 193 | selector: 194 | type: object 195 | type: object 196 | status: 197 | properties: 198 | components: 199 | items: 200 | properties: 201 | group: 202 | type: string 203 | kind: 204 | type: string 205 | link: 206 | type: string 207 | name: 208 | type: string 209 | status: 210 | type: string 211 | type: object 212 | type: array 213 | conditions: 214 | items: 215 | properties: 216 | lastTransitionTime: 217 | format: date-time 218 | type: string 219 | lastUpdateTime: 220 | format: date-time 221 | type: string 222 | message: 223 | type: string 224 | reason: 225 | type: string 226 | status: 227 | type: string 228 | type: 229 | type: string 230 | required: 231 | - type 232 | - status 233 | type: object 234 | type: array 235 | observedGeneration: 236 | format: int64 237 | type: integer 238 | type: object 239 | version: v1beta1 240 | versions: 241 | - name: v1beta1 242 | served: true 243 | storage: true 244 | status: 245 | acceptedNames: 246 | kind: Application 247 | listKind: ApplicationList 248 | plural: applications 249 | singular: application 250 | conditions: 251 | - lastTransitionTime: "2020-03-27T01:59:25Z" 252 | message: no conflicts found 253 | reason: NoConflicts 254 | status: "True" 255 | type: NamesAccepted 256 | - lastTransitionTime: null 257 | message: the initial names have been accepted 258 | reason: InitialNamesAccepted 259 | status: "True" 260 | type: Established 261 | - lastTransitionTime: "2020-03-27T01:59:25Z" 262 | message: 'spec.validation.openAPIV3Schema.type: Required value: must not be empty 263 | at the root' 264 | reason: Violations 265 | status: "True" 266 | type: NonStructuralSchema 267 | storedVersions: 268 | - v1beta1 269 | -------------------------------------------------------------------------------- /doc/upgrade-cluster.md: -------------------------------------------------------------------------------- 1 | # Bookkeeper cluster upgrade 2 | 3 | This document shows how to upgrade a bookkeeper cluster managed by the bookkeeper operator to a desired version while preserving the cluster's state and data whenever possible. 4 | 5 | ## Overview 6 | 7 | The activity diagram below shows the overall upgrade process started by an end-user and performed by the operator. 8 | 9 | ![pravega k8 upgrade 1](https://user-images.githubusercontent.com/3786750/51993601-7908b000-24af-11e9-8149-82fd1b036630.png) 10 | 11 | 12 | ## Prerequisites 13 | 14 | Your Bookkeeper cluster should be in a healthy state. You can check your cluster health by listing it and checking that all members are ready. 15 | 16 | ``` 17 | $ kubectl get bk 18 | NAME VERSION DESIRED MEMBERS READY MEMBERS AGE 19 | bookkeeper 0.4.0 7 7 11m 20 | ``` 21 | 22 | ## Valid Upgrade Paths 23 | Upgrade of bookkeeper cluster to any version will be allowed as long as the user does not try to downgrade the cluster version. 24 | 25 | ## Trigger an upgrade 26 | 27 | ### Upgrading via Helm 28 | 29 | The upgrade of the bookkeeper cluster from a version **[OLD_VERSION]** to **[NEW_VERSION]** can be triggered via helm using the following command 30 | ``` 31 | $ helm upgrade [BOOKKEEPER_RELEASE_NAME] pravega/bookkeeper --version=[NEW_VERSION] --set version=[NEW_VERSION] --reuse-values --timeout 600s 32 | ``` 33 | **Note:** By specifying the `--reuse-values` option, the configuration of all parameters are retained across upgrades. However if some values need to be modified during the upgrade, the `--set` flag can be used to specify the new configuration for these parameters. Also, by skipping the `reuse-values` flag, the values of all parameters are reset to the default configuration that has been specified in the published charts for version [NEW_VERSION]. 34 | 35 | **Note:** If the operator version is 0.1.3 or below and we are upgrading bookkeeper version to 0.9.0 or above, we have to set JVM options as follows 36 | 37 | ``` 38 | $ helm upgrade [BOOKKEEPER_RELEASE_NAME] pravega/bookkeeper --version=[NEW_VERSION] --set version=[NEW_VERSION] --set 'jvmOptions.extraOpts={-XX:+UseContainerSupport,-XX:+IgnoreUnrecognizedVMOptions}' --reuse-values --timeout 600s 39 | ``` 40 | 41 | ### Upgrading manually 42 | 43 | To initiate the upgrade process manually, a user has to update the `spec.version` field on the `BookkeeperCluster` custom resource. This can be done in three different ways using the `kubectl` command. 44 | 1. `kubectl edit BookkeeperCluster [CLUSTER_NAME]`, modify the `version` value in the YAML resource, save, and exit. 45 | 2. If you have the custom resource defined in a local YAML file, e.g. `bookkeeper.yaml`, you can modify the `version` value, and reapply the resource with `kubectl apply -f bookkeeper.yaml`. 46 | 3. `kubectl patch BookkeeperCluster [CLUSTER_NAME] --type='json' -p='[{"op": "replace", "path": "/spec/version", "value": "X.Y.Z"}]'`. 47 | After the `version` field is updated, the operator will detect the version change and it will trigger the upgrade process. 48 | 49 | **Note:** If the operator version is 0.1.3 or below and we are upgrading bookkeeper version to 0.9.0 or above, we have to set JVM options as follows 50 | 51 | ``` 52 | jvmOptions: 53 | extraOpts: ["-XX:+UseContainerSupport","-XX:+IgnoreUnrecognizedVMOptions"] 54 | ``` 55 | 56 | ## Upgrade process 57 | 58 | Once an upgrade request has been received, the operator will apply the rolling upgrade to the Bookkeeper STS. 59 | 60 | The upgrade workflow is as follows: 61 | 62 | - The operator will change the `Upgrade` condition to `True` to indicate that the cluster resource has an upgrade in progress. 63 | - If any of the component pods has errors, the upgrade process will stop (`Upgrade` condition to `False`) and operator will set the `Error` condition to `True` and indicate the reason. 64 | - When all pods are upgraded, the `Upgrade` condition will be set to `False` and `status.currentVersion` will be updated to the desired version. 65 | 66 | 67 | ### Bookkeeper upgrade 68 | 69 | Bookkeeper cluster is deployed as a [StatefulSet](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) due to its requirements on: 70 | 71 | - Persistent storage: each bookie has three persistent volume for ledgers, journals, and indices. If a pod is migrated or recreated (e.g. when it's upgraded), the data in those volumes will remain untouched. 72 | - Stable network names: the `StatefulSet` provides pods with a predictable name and a [Headless service](https://kubernetes.io/docs/concepts/services-networking/service/#headless-services) creates DNS records for pods to be reachable by clients. If a pod is recreated or migrated to a different node, clients will continue to be able to reach the pod despite changing its IP address. 73 | 74 | Statefulset [upgrade strategy](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#update-strategies) is configured in the `updateStrategy` field. It supports two type of strategies. 75 | 76 | - `RollingUpdate`. The statefulset will automatically apply a rolling upgrade to the pods. 77 | - `OnDelete`. The statefulset will not automatically upgrade pods. Pods will be updated when they are recreated after being deleted. 78 | 79 | In both cases, the upgrade is initiated when the Pod template is updated. 80 | 81 | For Bookkeeper, the operator uses an `OnDelete` strategy. With `RollingUpdate` strategy, you can only check the upgrade status once all pods get upgraded. On the other hand, with `OnDelete` you can keep updating pod one by one and keep checking the application status to make sure the upgrade working fine. This allows the operator to have control over the upgrade process and perform verifications and actions before and after a Bookkeeper pod is upgraded. For example, checking that there are no under-replicated ledgers before upgrading the next pod. Also, the operator might be need to apply migrations when upgrading to a certain version. 82 | 83 | Bookkeeper upgrade process is as follows: 84 | 85 | 1. Statefulset Pod template is updated to the new image and tag according to the Pravega version. 86 | 2. Pick one outdated pod 87 | 3. Apply pre-upgrade actions and verifications 88 | 4. Delete the pod. The pod is recreated with an updated spec and version 89 | 5. Wait for the pod to become ready. If it fails to start or times out, the upgrade is cancelled. Check [Recovering from a failed upgrade](#recovering-from-a-failed-upgrade) 90 | 6. Apply post-upgrade actions and verifications 91 | 7. If all pods are updated, Bookkeeper upgrade is completed. Otherwise, go to 2. 92 | 93 | 94 | ### Monitor the upgrade process 95 | 96 | You can monitor the upgrade process by listing the Bookkeeper clusters. If a desired version is shown, it means that the operator is working on updating the version. 97 | 98 | ``` 99 | $ kubectl get bk 100 | NAME VERSION DESIRED VERSION DESIRED MEMBERS READY MEMBERS AGE 101 | bookkeeper 0.4.0 0.5.0 4 3 1h 102 | ``` 103 | 104 | When the upgrade process has finished, the version will be updated. 105 | 106 | ``` 107 | $ kubectl get bk 108 | NAME VERSION DESIRED MEMBERS READY MEMBERS AGE 109 | bookkeeper 0.5.0 4 4 1h 110 | ``` 111 | 112 | The command `kubectl describe` can be used to track progress of the upgrade. 113 | ``` 114 | $ kubectl describe bk bookkeeper 115 | ... 116 | Status: 117 | Conditions: 118 | Status: True 119 | Type: Upgrading 120 | Reason: Updating BookKeeper 121 | Message: 1 122 | Last Transition Time: 2019-04-01T19:42:37+02:00 123 | Last Update Time: 2019-04-01T19:42:37+02:00 124 | Status: False 125 | Type: PodsReady 126 | Last Transition Time: 2019-04-01T19:43:08+02:00 127 | Last Update Time: 2019-04-01T19:43:08+02:00 128 | Status: False 129 | Type: Error 130 | ... 131 | 132 | ``` 133 | The `Reason` field in Upgrading Condition shows the component currently being upgraded and `Message` field reflects number of successfully upgraded replicas in this component. 134 | 135 | If upgrade has failed, please check the `Status` section to understand the reason for failure. 136 | 137 | ``` 138 | $ kubectl describe bk bookkeeper 139 | ... 140 | Status: 141 | Conditions: 142 | Status: False 143 | Type: Upgrading 144 | Last Transition Time: 2019-04-01T19:42:37+02:00 145 | Last Update Time: 2019-04-01T19:42:37+02:00 146 | Status: False 147 | Type: PodsReady 148 | Last Transition Time: 2019-04-01T19:43:08+02:00 149 | Last Update Time: 2019-04-01T19:43:08+02:00 150 | Message: pod bookkeeper-bookie-0 update failed because of ImagePullBackOff 151 | Reason: UpgradeFailed 152 | Status: True 153 | Type: Error 154 | Current Replicas: 8 155 | Current Version: 0.4.0 156 | Members: 157 | Ready: 158 | bookkeeper-bookie-1 159 | bookkeeper-bookie-2 160 | bookkeeper-bookie-3 161 | Unready: 162 | bookkeeper-bookie-0 163 | Ready Replicas: 3 164 | Replicas: 4 165 | ``` 166 | 167 | You can also find useful information at the operator logs. 168 | 169 | ``` 170 | ... 171 | INFO[5884] syncing cluster version from 0.4.0 to 0.5.0-1 172 | INFO[5885] Reconciling BookkeeperCluster default/bookkeeper 173 | INFO[5886] updating statefulset (bookkeeper-bookie) template image to 'pravega/bookkeeper:0.5.0-1' 174 | INFO[5896] Reconciling BookkeeperCluster default/bookkeeper 175 | INFO[5897] statefulset (bookkeeper-bookie) status: 0 updated, 3 ready, 3 target 176 | INFO[5897] updating pod: bookkeeper-bookie-0 177 | INFO[5899] Reconciling BookkeeperCluster default/bookkeeper 178 | INFO[5900] statefulset (bookkeeper-bookie) status: 0 updated, 2 ready, 3 target 179 | INFO[5929] Reconciling BookkeeperCluster default/bookkeeper 180 | INFO[5930] statefulset (bookkeeper-bookie) status: 0 updated, 2 ready, 3 target 181 | INFO[5930] error syncing cluster version, upgrade failed. pod bookkeeper-bookie-0 update failed because of ImagePullBackOff 182 | ... 183 | ``` 184 | 185 | ### Recovering from a failed upgrade 186 | 187 | See [Rollback](rollback-cluster.md) 188 | -------------------------------------------------------------------------------- /api/v1alpha1/status.go: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | */ 10 | 11 | package v1alpha1 12 | 13 | import ( 14 | "log" 15 | "time" 16 | 17 | corev1 "k8s.io/api/core/v1" 18 | ) 19 | 20 | type ClusterConditionType string 21 | 22 | const ( 23 | ClusterConditionPodsReady ClusterConditionType = "PodsReady" 24 | ClusterConditionUpgrading = "Upgrading" 25 | ClusterConditionRollback = "RollbackInProgress" 26 | ClusterConditionError = "Error" 27 | 28 | // Reasons for cluster upgrading condition 29 | UpdatingBookkeeperReason = "Updating Bookkeeper" 30 | UpgradeErrorReason = "Upgrade Error" 31 | RollbackErrorReason = "Rollback Error" 32 | ) 33 | 34 | // BookkeeperClusterStatus defines the observed state of BookkeeperCluster 35 | type BookkeeperClusterStatus struct { 36 | // Conditions list all the applied conditions 37 | Conditions []ClusterCondition `json:"conditions,omitempty"` 38 | 39 | // CurrentVersion is the current cluster version 40 | CurrentVersion string `json:"currentVersion,omitempty"` 41 | 42 | // TargetVersion is the version the cluster upgrading to. 43 | // If the cluster is not upgrading, TargetVersion is empty. 44 | TargetVersion string `json:"targetVersion,omitempty"` 45 | 46 | VersionHistory []string `json:"versionHistory,omitempty"` 47 | 48 | // Replicas is the number of desired replicas in the cluster 49 | // +optional 50 | Replicas int32 `json:"replicas"` 51 | 52 | // CurrentReplicas is the number of current replicas in the cluster 53 | // +optional 54 | CurrentReplicas int32 `json:"currentReplicas"` 55 | 56 | // ReadyReplicas is the number of ready replicas in the cluster 57 | // +optional 58 | ReadyReplicas int32 `json:"readyReplicas"` 59 | 60 | // Members is the Bookkeeper members in the cluster 61 | // +optional 62 | Members MembersStatus `json:"members"` 63 | } 64 | 65 | // MembersStatus is the status of the members of the cluster with both 66 | // ready and unready node membership lists 67 | type MembersStatus struct { 68 | // +optional 69 | // +nullable 70 | Ready []string `json:"ready"` 71 | // +optional 72 | // +nullable 73 | Unready []string `json:"unready"` 74 | } 75 | 76 | // ClusterCondition shows the current condition of a Bookkeeper cluster. 77 | // Comply with k8s API conventions 78 | type ClusterCondition struct { 79 | // Type of Bookkeeper cluster condition. 80 | // +optional 81 | Type ClusterConditionType `json:"type"` 82 | 83 | // Status of the condition, one of True, False, Unknown. 84 | // +optional 85 | Status corev1.ConditionStatus `json:"status"` 86 | 87 | // The reason for the condition's last transition. 88 | Reason string `json:"reason,omitempty"` 89 | 90 | // A human readable message indicating details about the transition. 91 | Message string `json:"message,omitempty"` 92 | 93 | // The last time this condition was updated. 94 | LastUpdateTime string `json:"lastUpdateTime,omitempty"` 95 | 96 | // Last time the condition transitioned from one status to another. 97 | LastTransitionTime string `json:"lastTransitionTime,omitempty"` 98 | } 99 | 100 | func (ps *BookkeeperClusterStatus) Init() { 101 | // Initialise conditions 102 | conditionTypes := []ClusterConditionType{ 103 | ClusterConditionPodsReady, 104 | ClusterConditionUpgrading, 105 | ClusterConditionError, 106 | } 107 | for _, conditionType := range conditionTypes { 108 | if _, condition := ps.GetClusterCondition(conditionType); condition == nil { 109 | c := newClusterCondition(conditionType, corev1.ConditionFalse, "", "") 110 | ps.setClusterCondition(*c) 111 | } 112 | } 113 | 114 | // Set current cluster version in version history, 115 | // so if the first upgrade fails we can rollback to this version 116 | if ps.VersionHistory == nil && ps.CurrentVersion != "" { 117 | ps.VersionHistory = []string{ps.CurrentVersion} 118 | } 119 | } 120 | 121 | func (ps *BookkeeperClusterStatus) SetPodsReadyConditionTrue() { 122 | c := newClusterCondition(ClusterConditionPodsReady, corev1.ConditionTrue, "", "") 123 | ps.setClusterCondition(*c) 124 | } 125 | 126 | func (ps *BookkeeperClusterStatus) SetPodsReadyConditionFalse() { 127 | c := newClusterCondition(ClusterConditionPodsReady, corev1.ConditionFalse, "", "") 128 | ps.setClusterCondition(*c) 129 | } 130 | 131 | func (ps *BookkeeperClusterStatus) SetUpgradingConditionTrue(reason, message string) { 132 | c := newClusterCondition(ClusterConditionUpgrading, corev1.ConditionTrue, reason, message) 133 | ps.setClusterCondition(*c) 134 | } 135 | 136 | func (ps *BookkeeperClusterStatus) SetUpgradingConditionFalse() { 137 | c := newClusterCondition(ClusterConditionUpgrading, corev1.ConditionFalse, "", "") 138 | ps.setClusterCondition(*c) 139 | } 140 | 141 | func (ps *BookkeeperClusterStatus) SetErrorConditionTrue(reason, message string) { 142 | c := newClusterCondition(ClusterConditionError, corev1.ConditionTrue, reason, message) 143 | ps.setClusterCondition(*c) 144 | } 145 | 146 | func (ps *BookkeeperClusterStatus) SetErrorConditionFalse() { 147 | c := newClusterCondition(ClusterConditionError, corev1.ConditionFalse, "", "") 148 | ps.setClusterCondition(*c) 149 | } 150 | 151 | func (ps *BookkeeperClusterStatus) SetRollbackConditionTrue(reason, message string) { 152 | c := newClusterCondition(ClusterConditionRollback, corev1.ConditionTrue, reason, message) 153 | ps.setClusterCondition(*c) 154 | } 155 | func (ps *BookkeeperClusterStatus) SetRollbackConditionFalse() { 156 | c := newClusterCondition(ClusterConditionRollback, corev1.ConditionFalse, "", "") 157 | ps.setClusterCondition(*c) 158 | } 159 | 160 | func newClusterCondition(condType ClusterConditionType, status corev1.ConditionStatus, reason, message string) *ClusterCondition { 161 | return &ClusterCondition{ 162 | Type: condType, 163 | Status: status, 164 | Reason: reason, 165 | Message: message, 166 | LastUpdateTime: "", 167 | LastTransitionTime: "", 168 | } 169 | } 170 | 171 | func (ps *BookkeeperClusterStatus) GetClusterCondition(t ClusterConditionType) (int, *ClusterCondition) { 172 | for i, c := range ps.Conditions { 173 | if t == c.Type { 174 | return i, &c 175 | } 176 | } 177 | return -1, nil 178 | } 179 | 180 | func (ps *BookkeeperClusterStatus) setClusterCondition(newCondition ClusterCondition) { 181 | now := time.Now().Format(time.RFC3339) 182 | position, existingCondition := ps.GetClusterCondition(newCondition.Type) 183 | 184 | if existingCondition == nil { 185 | ps.Conditions = append(ps.Conditions, newCondition) 186 | return 187 | } 188 | 189 | if existingCondition.Status != newCondition.Status { 190 | existingCondition.Status = newCondition.Status 191 | existingCondition.LastTransitionTime = now 192 | existingCondition.LastUpdateTime = now 193 | } 194 | 195 | if existingCondition.Reason != newCondition.Reason || existingCondition.Message != newCondition.Message { 196 | existingCondition.Reason = newCondition.Reason 197 | existingCondition.Message = newCondition.Message 198 | existingCondition.LastUpdateTime = now 199 | } 200 | 201 | ps.Conditions[position] = *existingCondition 202 | } 203 | 204 | func (ps *BookkeeperClusterStatus) AddToVersionHistory(version string) { 205 | lastIndex := len(ps.VersionHistory) - 1 206 | if version != "" && ps.VersionHistory[lastIndex] != version { 207 | ps.VersionHistory = append(ps.VersionHistory, version) 208 | log.Printf("Updating version history adding version %v", version) 209 | } 210 | } 211 | 212 | func (ps *BookkeeperClusterStatus) GetLastVersion() (previousVersion string) { 213 | len := len(ps.VersionHistory) 214 | return ps.VersionHistory[len-1] 215 | } 216 | 217 | func (ps *BookkeeperClusterStatus) IsClusterInErrorState() bool { 218 | _, errorCondition := ps.GetClusterCondition(ClusterConditionError) 219 | if errorCondition != nil && errorCondition.Status == corev1.ConditionTrue { 220 | return true 221 | } 222 | return false 223 | } 224 | 225 | func (ps *BookkeeperClusterStatus) IsClusterInUpgradeFailedState() bool { 226 | _, errorCondition := ps.GetClusterCondition(ClusterConditionError) 227 | if errorCondition == nil { 228 | return false 229 | } 230 | if errorCondition.Status == corev1.ConditionTrue && errorCondition.Reason == "UpgradeFailed" { 231 | return true 232 | } 233 | return false 234 | } 235 | 236 | func (ps *BookkeeperClusterStatus) IsClusterInUpgradeFailedOrRollbackState() bool { 237 | if ps.IsClusterInUpgradeFailedState() || ps.IsClusterInRollbackState() { 238 | return true 239 | } 240 | return false 241 | } 242 | 243 | func (ps *BookkeeperClusterStatus) IsClusterInRollbackState() bool { 244 | _, rollbackCondition := ps.GetClusterCondition(ClusterConditionRollback) 245 | if rollbackCondition == nil { 246 | return false 247 | } 248 | if rollbackCondition.Status == corev1.ConditionTrue { 249 | return true 250 | } 251 | return false 252 | } 253 | 254 | func (ps *BookkeeperClusterStatus) IsClusterInUpgradingState() bool { 255 | _, upgradeCondition := ps.GetClusterCondition(ClusterConditionUpgrading) 256 | if upgradeCondition == nil { 257 | return false 258 | } 259 | if upgradeCondition.Status == corev1.ConditionTrue { 260 | return true 261 | } 262 | return false 263 | } 264 | 265 | func (ps *BookkeeperClusterStatus) IsClusterInRollbackFailedState() bool { 266 | _, errorCondition := ps.GetClusterCondition(ClusterConditionError) 267 | if errorCondition == nil { 268 | return false 269 | } 270 | if errorCondition.Status == corev1.ConditionTrue && errorCondition.Reason == "RollbackFailed" { 271 | return true 272 | } 273 | return false 274 | } 275 | 276 | func (ps *BookkeeperClusterStatus) IsClusterInReadyState() bool { 277 | _, readyCondition := ps.GetClusterCondition(ClusterConditionPodsReady) 278 | if readyCondition != nil && readyCondition.Status == corev1.ConditionTrue { 279 | return true 280 | } 281 | return false 282 | } 283 | 284 | func (ps *BookkeeperClusterStatus) UpdateProgress(reason, updatedReplicas string) { 285 | if ps.IsClusterInUpgradingState() { 286 | // Set the upgrade condition reason to be UpgradingBookkeeperReason, message to be 0 287 | ps.SetUpgradingConditionTrue(reason, updatedReplicas) 288 | } else { 289 | ps.SetRollbackConditionTrue(reason, updatedReplicas) 290 | } 291 | } 292 | 293 | func (ps *BookkeeperClusterStatus) GetLastCondition() (lastCondition *ClusterCondition) { 294 | if ps.IsClusterInUpgradingState() { 295 | _, lastCondition := ps.GetClusterCondition(ClusterConditionUpgrading) 296 | return lastCondition 297 | } else if ps.IsClusterInRollbackState() { 298 | _, lastCondition := ps.GetClusterCondition(ClusterConditionRollback) 299 | return lastCondition 300 | } 301 | // nothing to do if we are neither upgrading nor rolling back, 302 | return nil 303 | } 304 | --------------------------------------------------------------------------------