├── config ├── manager │ ├── kustomization.yaml │ └── manager.yaml ├── prometheus │ ├── kustomization.yaml │ └── monitor.yaml ├── certmanager │ ├── kustomization.yaml │ ├── kustomizeconfig.yaml │ └── certificate.yaml ├── webhook │ ├── kustomization.yaml │ ├── service.yaml │ ├── kustomizeconfig.yaml │ └── manifests.yaml ├── rbac │ ├── kustomization.yaml │ ├── role_binding.yaml │ ├── leader_election_role_binding.yaml │ ├── etcdadmcluster_viewer_role.yaml │ ├── etcdadmcluster_editor_role.yaml │ ├── leader_election_role.yaml │ └── role.yaml ├── samples │ ├── etcdcluster_v1alpha4_etcdcluster.yaml │ ├── etcdcluster_v1alpha3_etcdadmcluster.yaml │ └── etcdcluster_v1beta1_etcdadmcluster.yaml ├── default │ ├── kustomizeconfig.yaml │ ├── manager_webhook_patch.yaml │ ├── webhookcainjection_patch.yaml │ └── kustomization.yaml └── crd │ ├── patches │ ├── cainjection_in_etcdadmclusters.yaml │ └── webhook_in_etcdadmclusters.yaml │ ├── kustomizeconfig.yaml │ └── kustomization.yaml ├── .golangci.yml ├── Dockerfile ├── api ├── v1beta1 │ ├── conversion.go │ ├── doc.go │ ├── groupversion_info.go │ ├── condition_consts.go │ ├── zz_generated.deepcopy.go │ ├── etcdadmcluster_webhook_test.go │ ├── etcdadmcluster_types.go │ └── etcdadmcluster_webhook.go └── v1alpha3 │ ├── doc.go │ ├── groupversion_info.go │ ├── conversion.go │ ├── condition_consts.go │ ├── zz_generated.deepcopy.go │ ├── etcdadmcluster_types.go │ └── zz_generated.conversion.go ├── tilt-provider.json ├── PROJECT ├── CODEOWNERS ├── hack └── boilerplate.go.txt ├── .gitignore ├── .github └── workflows │ ├── golangci-lint.yml │ └── vulnerability.yml ├── README.md ├── controllers ├── mocks │ ├── roundtripper.go │ └── etcdclient.go ├── etcd_plane_test.go ├── helpers_test.go ├── upgrade_test.go ├── upgrade.go ├── healthcheck.go ├── status.go ├── machines.go ├── status_test.go ├── scale.go ├── certs.go ├── testutils.go ├── helpers.go ├── periodic_healthcheck.go ├── periodic_healthcheck_test.go └── etcd_plane.go ├── internal └── thirdparty │ └── api │ └── v1alpha3 │ ├── zz_generated.deepcopy.go │ └── condition_types.go ├── etcdcluster.yaml ├── Makefile ├── go.mod ├── main.go └── LICENSE /config/manager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - manager.yaml 3 | -------------------------------------------------------------------------------- /config/prometheus/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - monitor.yaml 3 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | run: 4 | timeout: 10m 5 | 6 | linters: 7 | disable: 8 | - staticcheck -------------------------------------------------------------------------------- /config/certmanager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - certificate.yaml 3 | 4 | configurations: 5 | - kustomizeconfig.yaml 6 | -------------------------------------------------------------------------------- /config/webhook/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - manifests.yaml 3 | - service.yaml 4 | 5 | configurations: 6 | - kustomizeconfig.yaml 7 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/distroless/static:nonroot 2 | WORKDIR / 3 | COPY bin/manager . 4 | USER nonroot:nonroot 5 | 6 | ENTRYPOINT ["/manager"] 7 | -------------------------------------------------------------------------------- /config/rbac/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - role.yaml 3 | - role_binding.yaml 4 | - leader_election_role.yaml 5 | - leader_election_role_binding.yaml 6 | -------------------------------------------------------------------------------- /config/samples/etcdcluster_v1alpha4_etcdcluster.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: etcdcluster.cluster.x-k8s.io/v1alpha4 2 | kind: EtcdCluster 3 | metadata: 4 | name: etcdcluster-sample 5 | spec: 6 | # Add fields here 7 | foo: bar 8 | -------------------------------------------------------------------------------- /config/default/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # This configuration is for teaching kustomize how to update name ref and var substitution 2 | varReference: 3 | - kind: Deployment 4 | path: spec/template/spec/volumes/secret/secretName -------------------------------------------------------------------------------- /config/samples/etcdcluster_v1alpha3_etcdadmcluster.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: etcdcluster.cluster.x-k8s.io/v1alpha3 2 | kind: EtcdadmCluster 3 | metadata: 4 | name: etcdadmcluster-sample 5 | spec: 6 | # Add fields here 7 | foo: bar 8 | -------------------------------------------------------------------------------- /config/samples/etcdcluster_v1beta1_etcdadmcluster.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: etcdcluster.cluster.x-k8s.io/v1beta1 2 | kind: EtcdadmCluster 3 | metadata: 4 | name: etcdadmcluster-sample 5 | spec: 6 | # Add fields here 7 | foo: bar 8 | -------------------------------------------------------------------------------- /api/v1beta1/conversion.go: -------------------------------------------------------------------------------- 1 | package v1beta1 2 | 3 | // Hub marks EtcdadmCluster as a conversion hub. 4 | func (*EtcdadmCluster) Hub() {} 5 | 6 | // Hub marks EtcdadmClusterList as a conversion hub. 7 | func (*EtcdadmClusterList) Hub() {} 8 | -------------------------------------------------------------------------------- /config/webhook/service.yaml: -------------------------------------------------------------------------------- 1 | 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: webhook-service 6 | namespace: system 7 | spec: 8 | ports: 9 | - port: 443 10 | targetPort: 9443 11 | selector: 12 | control-plane: controller-manager 13 | -------------------------------------------------------------------------------- /tilt-provider.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "etcdadm-controller", 3 | "config": { 4 | "image": "aws/etcdadm-controller:latest", 5 | "live_reload_deps": [ 6 | "main.go", 7 | "go.mod", 8 | "go.sum", 9 | "api", 10 | "controllers", 11 | "pkg", 12 | "exp" 13 | ] 14 | } 15 | } -------------------------------------------------------------------------------- /PROJECT: -------------------------------------------------------------------------------- 1 | domain: cluster.x-k8s.io 2 | repo: github.com/aws/etcdadm-controller 3 | resources: 4 | - group: etcdcluster 5 | kind: EtcdCluster 6 | version: v1alpha4 7 | - group: etcdcluster 8 | kind: EtcdadmCluster 9 | version: v1alpha3 10 | - group: etcdcluster 11 | kind: EtcdadmCluster 12 | version: v1beta1 13 | version: "2" 14 | -------------------------------------------------------------------------------- /config/rbac/role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: manager-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: manager-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: manager 12 | namespace: system 13 | -------------------------------------------------------------------------------- /config/rbac/leader_election_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: leader-election-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: Role 8 | name: leader-election-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: manager 12 | namespace: system 13 | -------------------------------------------------------------------------------- /config/crd/patches/cainjection_in_etcdadmclusters.yaml: -------------------------------------------------------------------------------- 1 | # The following patch adds a directive for certmanager to inject CA into the CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | annotations: 7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 8 | name: etcdadmclusters.etcdcluster.cluster.x-k8s.io 9 | -------------------------------------------------------------------------------- /config/prometheus/monitor.yaml: -------------------------------------------------------------------------------- 1 | 2 | # Prometheus Monitor Service (Metrics) 3 | apiVersion: monitoring.coreos.com/v1 4 | kind: ServiceMonitor 5 | metadata: 6 | labels: 7 | control-plane: controller-manager 8 | name: controller-manager-metrics-monitor 9 | namespace: system 10 | spec: 11 | endpoints: 12 | - path: /metrics 13 | port: https 14 | selector: 15 | matchLabels: 16 | control-plane: controller-manager 17 | -------------------------------------------------------------------------------- /config/certmanager/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # This configuration is for teaching kustomize how to update name ref and var substitution 2 | nameReference: 3 | - kind: Issuer 4 | group: cert-manager.io 5 | fieldSpecs: 6 | - kind: Certificate 7 | group: cert-manager.io 8 | path: spec/issuerRef/name 9 | 10 | varReference: 11 | - kind: Certificate 12 | group: cert-manager.io 13 | path: spec/commonName 14 | - kind: Certificate 15 | group: cert-manager.io 16 | path: spec/dnsNames 17 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | ############################################################## 2 | # 3 | # List of approvers/reviewers 4 | # 5 | ############################################################## 6 | 7 | # These owners will be the default owners for everything in the repo. 8 | * @2ez4szliu @abhay-krishna @abhinavmpandey08 @ahreehong @cxbrowne1207 @drewvanstone @g-gaston @jacobweinstock @jaxesn @mitalipaygude @panktishah26 @pokearu @rahulbabu95 @sp1999 @tatlat @vignesh-goutham @vivek-koppuru @ndeksa @rajeshvenkata 9 | -------------------------------------------------------------------------------- /config/rbac/etcdadmcluster_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view etcdadmclusters. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: etcdadmcluster-viewer-role 6 | rules: 7 | - apiGroups: 8 | - etcdcluster.cluster.x-k8s.io 9 | resources: 10 | - etcdadmclusters 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | - apiGroups: 16 | - etcdcluster.cluster.x-k8s.io 17 | resources: 18 | - etcdadmclusters/status 19 | verbs: 20 | - get 21 | -------------------------------------------------------------------------------- /config/crd/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # This file is for teaching kustomize how to substitute name and namespace reference in CRD 2 | nameReference: 3 | - kind: Service 4 | version: v1 5 | fieldSpecs: 6 | - kind: CustomResourceDefinition 7 | group: apiextensions.k8s.io 8 | path: spec/conversion/webhook/clientConfig/service/name 9 | 10 | namespace: 11 | - kind: CustomResourceDefinition 12 | group: apiextensions.k8s.io 13 | path: spec/conversion/webhook/clientConfig/service/namespace 14 | create: false 15 | 16 | varReference: 17 | - path: metadata/annotations 18 | -------------------------------------------------------------------------------- /config/crd/patches/webhook_in_etcdadmclusters.yaml: -------------------------------------------------------------------------------- 1 | # The following patch enables conversion webhook for CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | name: etcdadmclusters.etcdcluster.cluster.x-k8s.io 7 | spec: 8 | conversion: 9 | strategy: Webhook 10 | webhook: 11 | conversionReviewVersions: ["v1", "v1beta1"] 12 | clientConfig: 13 | service: 14 | namespace: system 15 | name: webhook-service 16 | path: /convert 17 | -------------------------------------------------------------------------------- /config/rbac/etcdadmcluster_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit etcdadmclusters. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: etcdadmcluster-editor-role 6 | rules: 7 | - apiGroups: 8 | - etcdcluster.cluster.x-k8s.io 9 | resources: 10 | - etcdadmclusters 11 | verbs: 12 | - create 13 | - delete 14 | - get 15 | - list 16 | - patch 17 | - update 18 | - watch 19 | - apiGroups: 20 | - etcdcluster.cluster.x-k8s.io 21 | resources: 22 | - etcdadmclusters/status 23 | verbs: 24 | - get 25 | -------------------------------------------------------------------------------- /hack/boilerplate.go.txt: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ -------------------------------------------------------------------------------- /config/default/manager_webhook_patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: manager 11 | ports: 12 | - containerPort: 9443 13 | name: webhook-server 14 | protocol: TCP 15 | volumeMounts: 16 | - mountPath: /tmp/k8s-webhook-server/serving-certs 17 | name: cert 18 | readOnly: true 19 | volumes: 20 | - name: cert 21 | secret: 22 | defaultMode: 420 23 | secretName: webhook-server-cert 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Binaries for programs and plugins 3 | *.exe 4 | *.exe~ 5 | *.dll 6 | *.so 7 | *.dylib 8 | bin 9 | 10 | # Test binary, build with `go test -c` 11 | *.test 12 | 13 | # Output of the go coverage tool, specifically when used with LiteIDE 14 | *.out 15 | 16 | # Kubernetes Generated files - skip generated files, except for vendored files 17 | 18 | !vendor/**/zz_generated.* 19 | 20 | # editor and IDE paraphernalia 21 | .idea 22 | .vscode 23 | *.swp 24 | *.swo 25 | *~ 26 | .DS_Store 27 | 28 | # User-supplied Tiltfile extensions, settings, and builds 29 | tilt.d 30 | tilt-settings.json 31 | .tiltbuild 32 | 33 | ./templates 34 | templates -------------------------------------------------------------------------------- /config/default/webhookcainjection_patch.yaml: -------------------------------------------------------------------------------- 1 | # This patch add annotation to admission webhook config and 2 | # the variables $(CERTIFICATE_NAMESPACE) and $(CERTIFICATE_NAME) will be substituted by kustomize. 3 | apiVersion: admissionregistration.k8s.io/v1 4 | kind: MutatingWebhookConfiguration 5 | metadata: 6 | name: mutating-webhook-configuration 7 | annotations: 8 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 9 | --- 10 | apiVersion: admissionregistration.k8s.io/v1 11 | kind: ValidatingWebhookConfiguration 12 | metadata: 13 | name: validating-webhook-configuration 14 | annotations: 15 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 16 | -------------------------------------------------------------------------------- /.github/workflows/golangci-lint.yml: -------------------------------------------------------------------------------- 1 | name: golangci-lint 2 | on: 3 | push: 4 | tags: 5 | - v* 6 | branches: 7 | - main 8 | pull_request: 9 | jobs: 10 | golangci: 11 | name: lint 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | - uses: actions/setup-go@v5 16 | with: 17 | go-version: '1.24' 18 | check-latest: true 19 | cache: true 20 | - name: golangci-lint 21 | uses: golangci/golangci-lint-action@v8 22 | with: 23 | version: v2.1.0 24 | # Disable package caching to avoid a double cache with setup-go. 25 | skip-pkg-cache: true 26 | args: --timeout 10m 27 | -------------------------------------------------------------------------------- /config/rbac/leader_election_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions to do leader election. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: Role 4 | metadata: 5 | name: leader-election-role 6 | rules: 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - configmaps 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | - create 16 | - update 17 | - patch 18 | - delete 19 | - apiGroups: 20 | - "" 21 | resources: 22 | - configmaps/status 23 | verbs: 24 | - get 25 | - update 26 | - patch 27 | - apiGroups: 28 | - "" 29 | resources: 30 | - events 31 | verbs: 32 | - create 33 | - apiGroups: 34 | - "coordination.k8s.io" 35 | resources: 36 | - leases 37 | verbs: 38 | - get 39 | - list 40 | - watch 41 | - create 42 | - update 43 | - patch 44 | - delete 45 | -------------------------------------------------------------------------------- /api/v1beta1/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // Package v1beta1 contains API Schema definitions for the etcd cluster v1beta1 API group 18 | // +kubebuilder:object:generate=true 19 | // +groupName=etcdcluster.cluster.x-k8s.io 20 | package v1beta1 21 | -------------------------------------------------------------------------------- /config/webhook/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # the following config is for teaching kustomize where to look at when substituting vars. 2 | # It requires kustomize v2.1.0 or newer to work properly. 3 | nameReference: 4 | - kind: Service 5 | version: v1 6 | fieldSpecs: 7 | - kind: MutatingWebhookConfiguration 8 | group: admissionregistration.k8s.io 9 | path: webhooks/clientConfig/service/name 10 | - kind: ValidatingWebhookConfiguration 11 | group: admissionregistration.k8s.io 12 | path: webhooks/clientConfig/service/name 13 | 14 | namespace: 15 | - kind: MutatingWebhookConfiguration 16 | group: admissionregistration.k8s.io 17 | path: webhooks/clientConfig/service/namespace 18 | create: true 19 | - kind: ValidatingWebhookConfiguration 20 | group: admissionregistration.k8s.io 21 | path: webhooks/clientConfig/service/namespace 22 | create: true 23 | 24 | varReference: 25 | - path: metadata/annotations 26 | -------------------------------------------------------------------------------- /api/v1alpha3/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // Package v1alpha3 contains API Schema definitions for the etcd cluster v1alpha3 API group 18 | // +kubebuilder:object:generate=true 19 | // +groupName=etcdcluster.cluster.x-k8s.io 20 | // +k8s:conversion-gen=github.com/aws/etcdadm-controller/api/v1beta1 21 | package v1alpha3 22 | -------------------------------------------------------------------------------- /config/certmanager/certificate.yaml: -------------------------------------------------------------------------------- 1 | # The following manifests contain a self-signed issuer CR and a certificate CR. 2 | # More document can be found at https://docs.cert-manager.io 3 | # WARNING: Targets CertManager 0.11 check https://docs.cert-manager.io/en/latest/tasks/upgrading/index.html for 4 | # breaking changes 5 | apiVersion: cert-manager.io/v1 6 | kind: Issuer 7 | metadata: 8 | name: selfsigned-issuer 9 | namespace: system 10 | spec: 11 | selfSigned: {} 12 | --- 13 | apiVersion: cert-manager.io/v1 14 | kind: Certificate 15 | metadata: 16 | name: serving-cert # this name should match the one appeared in kustomizeconfig.yaml 17 | namespace: system 18 | spec: 19 | # $(SERVICE_NAME) and $(SERVICE_NAMESPACE) will be substituted by kustomize 20 | dnsNames: 21 | - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc 22 | - $(SERVICE_NAME).$(SERVICE_NAMESPACE).svc.cluster.local 23 | issuerRef: 24 | kind: Issuer 25 | name: selfsigned-issuer 26 | secretName: webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize 27 | -------------------------------------------------------------------------------- /.github/workflows/vulnerability.yml: -------------------------------------------------------------------------------- 1 | name: "Vulnerability scan" 2 | on: 3 | push: 4 | tags: 5 | - v* 6 | branches: 7 | - main 8 | pull_request: 9 | workflow_dispatch: 10 | schedule: 11 | # every day at 7am UTC 12 | - cron: '0 7 * * *' 13 | permissions: 14 | contents: read 15 | jobs: 16 | dependency-review: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - name: "Checkout Repository" 20 | uses: actions/checkout@v4 21 | with: 22 | show-progress: false 23 | - name: "Dependency Review" 24 | uses: actions/dependency-review-action@v3 25 | with: 26 | base-ref: ${{ github.event.pull_request.base.sha || github.event.before || github.sha }} 27 | head-ref: ${{ github.event.pull_request.head.sha || github.sha }} 28 | govulncheck: 29 | runs-on: ubuntu-latest 30 | steps: 31 | - name: "Checkout Repository" 32 | uses: actions/checkout@v4 33 | with: 34 | show-progress: false 35 | - name: govulncheck 36 | uses: golang/govulncheck-action@v1 37 | with: 38 | check-latest: true 39 | repo-checkout: false 40 | go-version-file: go.mod 41 | -------------------------------------------------------------------------------- /config/crd/kustomization.yaml: -------------------------------------------------------------------------------- 1 | commonLabels: 2 | cluster.x-k8s.io/v1alpha3: v1alpha3 3 | cluster.x-k8s.io/v1alpha4: v1alpha4 4 | cluster.x-k8s.io/v1beta1: v1alpha3_v1beta1 5 | 6 | 7 | # This kustomization.yaml is not intended to be run by itself, 8 | # since it depends on service name and namespace that are out of this kustomize package. 9 | # It should be run by config/default 10 | resources: 11 | - bases/etcdcluster.cluster.x-k8s.io_etcdadmclusters.yaml 12 | # +kubebuilder:scaffold:crdkustomizeresource 13 | 14 | patchesStrategicMerge: 15 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. 16 | # patches here are for enabling the conversion webhook for each CRD 17 | - patches/webhook_in_etcdadmclusters.yaml 18 | #- patches/webhook_in_etcdadmclusters.yaml 19 | # +kubebuilder:scaffold:crdkustomizewebhookpatch 20 | 21 | # [CERTMANAGER] To enable webhook, uncomment all the sections with [CERTMANAGER] prefix. 22 | # patches here are for enabling the CA injection for each CRD 23 | - patches/cainjection_in_etcdadmclusters.yaml 24 | #- patches/cainjection_in_etcdadmclusters.yaml 25 | # +kubebuilder:scaffold:crdkustomizecainjectionpatch 26 | 27 | # the following config is for teaching kustomize how to do kustomization for CRDs. 28 | configurations: 29 | - kustomizeconfig.yaml 30 | -------------------------------------------------------------------------------- /config/manager/manager.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | labels: 5 | control-plane: controller-manager 6 | name: system 7 | --- 8 | apiVersion: v1 9 | kind: ServiceAccount 10 | metadata: 11 | name: manager 12 | namespace: system 13 | --- 14 | apiVersion: apps/v1 15 | kind: Deployment 16 | metadata: 17 | name: controller-manager 18 | namespace: system 19 | labels: 20 | control-plane: controller-manager 21 | spec: 22 | selector: 23 | matchLabels: 24 | control-plane: controller-manager 25 | replicas: 1 26 | template: 27 | metadata: 28 | labels: 29 | control-plane: controller-manager 30 | spec: 31 | containers: 32 | - command: 33 | - /manager 34 | args: 35 | - --enable-leader-election 36 | - --diagnostics-address=:8443 37 | - --insecure-diagnostics=false 38 | image: ${ETCDADM_CONTROLLER_IMAGE} 39 | name: manager 40 | ports: 41 | - containerPort: 8443 42 | name: metrics 43 | protocol: TCP 44 | resources: 45 | limits: 46 | cpu: 200m 47 | memory: 256Mi 48 | requests: 49 | cpu: 100m 50 | memory: 64Mi 51 | terminationGracePeriodSeconds: 10 52 | serviceAccountName: manager 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## ETCD Admin Controller for Amazon EKS Anywhere 2 | 3 | The etcdadm-controller is responsible for managing external etcd clusters created by Amazon EKS Anywhere (EKS-A) 4 | 5 | Amazon EKS Anywhere is a new deployment option for Amazon EKS that enables you to easily create and operate Kubernetes clusters on-premises with your own virtual machines. 6 | It brings a consistent AWS management experience to your data center, building on the strengths of [Amazon EKS Distro](https://github.com/aws/eks-distro), the same distribution of Kubernetes that powers EKS on AWS. 7 | Its goal is to include full lifecycle management of multiple Kubernetes clusters that are capable of operating completely independently of any AWS services. 8 | 9 | Here are the steps for [getting started](https://anywhere.eks.amazonaws.com/docs/getting-started/) with EKS Anywhere. 10 | Full documentation for releases can be found on [https://anywhere.eks.amazonaws.com](https://anywhere.eks.amazonaws.com/). 11 | 12 | ## Security 13 | 14 | If you discover a potential security issue in this project, or think you may 15 | have discovered a security issue, we ask that you notify AWS Security via our 16 | [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). 17 | Please do **not** create a public GitHub issue. 18 | 19 | ## License 20 | 21 | This project is licensed under the [Apache-2.0 License](LICENSE). -------------------------------------------------------------------------------- /api/v1beta1/groupversion_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // Package v1beta1 contains API Schema definitions for the etcdcluster v1beta1 API group 18 | // +kubebuilder:object:generate=true 19 | // +groupName=etcdcluster.cluster.x-k8s.io 20 | package v1beta1 21 | 22 | import ( 23 | "k8s.io/apimachinery/pkg/runtime/schema" 24 | "sigs.k8s.io/controller-runtime/pkg/scheme" 25 | ) 26 | 27 | var ( 28 | // GroupVersion is group version used to register these objects 29 | GroupVersion = schema.GroupVersion{Group: "etcdcluster.cluster.x-k8s.io", Version: "v1beta1"} 30 | 31 | // SchemeBuilder is used to add go types to the GroupVersionKind scheme 32 | SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} 33 | 34 | // AddToScheme adds the types in this group-version to the given scheme. 35 | AddToScheme = SchemeBuilder.AddToScheme 36 | ) 37 | -------------------------------------------------------------------------------- /config/webhook/manifests.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: admissionregistration.k8s.io/v1 3 | kind: MutatingWebhookConfiguration 4 | metadata: 5 | name: mutating-webhook-configuration 6 | webhooks: 7 | - admissionReviewVersions: 8 | - v1 9 | - v1beta1 10 | clientConfig: 11 | service: 12 | name: webhook-service 13 | namespace: system 14 | path: /mutate-etcdcluster-cluster-x-k8s-io-v1beta1-etcdadmcluster 15 | failurePolicy: Fail 16 | name: metcdadmcluster.kb.io 17 | rules: 18 | - apiGroups: 19 | - etcdcluster.cluster.x-k8s.io 20 | apiVersions: 21 | - v1beta1 22 | operations: 23 | - CREATE 24 | - UPDATE 25 | resources: 26 | - etcdadmclusters 27 | sideEffects: None 28 | --- 29 | apiVersion: admissionregistration.k8s.io/v1 30 | kind: ValidatingWebhookConfiguration 31 | metadata: 32 | name: validating-webhook-configuration 33 | webhooks: 34 | - admissionReviewVersions: 35 | - v1 36 | - v1beta1 37 | clientConfig: 38 | service: 39 | name: webhook-service 40 | namespace: system 41 | path: /validate-etcdcluster-cluster-x-k8s-io-v1beta1-etcdadmcluster 42 | failurePolicy: Fail 43 | name: vetcdadmcluster.kb.io 44 | rules: 45 | - apiGroups: 46 | - etcdcluster.cluster.x-k8s.io 47 | apiVersions: 48 | - v1beta1 49 | operations: 50 | - CREATE 51 | - UPDATE 52 | resources: 53 | - etcdadmclusters 54 | sideEffects: None 55 | -------------------------------------------------------------------------------- /api/v1alpha3/groupversion_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // Package v1alpha3 contains API Schema definitions for the etcdcluster v1alpha3 API group 18 | // +kubebuilder:object:generate=true 19 | // +groupName=etcdcluster.cluster.x-k8s.io 20 | package v1alpha3 21 | 22 | import ( 23 | "k8s.io/apimachinery/pkg/runtime/schema" 24 | "sigs.k8s.io/controller-runtime/pkg/scheme" 25 | ) 26 | 27 | var ( 28 | // GroupVersion is group version used to register these objects 29 | GroupVersion = schema.GroupVersion{Group: "etcdcluster.cluster.x-k8s.io", Version: "v1alpha3"} 30 | 31 | // SchemeBuilder is used to add go types to the GroupVersionKind scheme 32 | SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} 33 | 34 | // AddToScheme adds the types in this group-version to the given scheme. 35 | AddToScheme = SchemeBuilder.AddToScheme 36 | 37 | // localSchemeBuilder is used for type conversions. 38 | localSchemeBuilder = SchemeBuilder.SchemeBuilder 39 | ) 40 | -------------------------------------------------------------------------------- /api/v1alpha3/conversion.go: -------------------------------------------------------------------------------- 1 | package v1alpha3 2 | 3 | import ( 4 | etcdv1beta1 "github.com/aws/etcdadm-controller/api/v1beta1" 5 | "sigs.k8s.io/controller-runtime/pkg/conversion" 6 | ) 7 | 8 | // ConvertTo converts this EtcdadmCluster to the Hub version (v1beta1). 9 | func (src *EtcdadmCluster) ConvertTo(dstRaw conversion.Hub) error { // nolint 10 | dst := dstRaw.(*etcdv1beta1.EtcdadmCluster) 11 | if err := Convert_v1alpha3_EtcdadmCluster_To_v1beta1_EtcdadmCluster(src, dst, nil); err != nil { 12 | return err 13 | } 14 | return nil 15 | } 16 | 17 | // ConvertFrom converts from the Hub version (v1beta1) to this EtcdadmCluster. 18 | func (dst *EtcdadmCluster) ConvertFrom(srcRaw conversion.Hub) error { // nolint 19 | src := srcRaw.(*etcdv1beta1.EtcdadmCluster) 20 | return Convert_v1beta1_EtcdadmCluster_To_v1alpha3_EtcdadmCluster(src, dst, nil) 21 | } 22 | 23 | // ConvertTo converts this EtcdadmClusterList to the Hub version (v1beta1). 24 | func (src *EtcdadmClusterList) ConvertTo(dstRaw conversion.Hub) error { 25 | dst := dstRaw.(*etcdv1beta1.EtcdadmClusterList) 26 | if err := Convert_v1alpha3_EtcdadmClusterList_To_v1beta1_EtcdadmClusterList(src, dst, nil); err != nil { 27 | return err 28 | } 29 | return nil 30 | } 31 | 32 | // ConvertFrom converts from the Hub version (v1beta1) to this EtcdadmCluster. 33 | func (dst *EtcdadmClusterList) ConvertFrom(srcRaw conversion.Hub) error { // nolint 34 | src := srcRaw.(*etcdv1beta1.EtcdadmClusterList) 35 | return Convert_v1beta1_EtcdadmClusterList_To_v1alpha3_EtcdadmClusterList(src, dst, nil) 36 | } 37 | -------------------------------------------------------------------------------- /controllers/mocks/roundtripper.go: -------------------------------------------------------------------------------- 1 | // Code generated by MockGen. DO NOT EDIT. 2 | // Source: net/http (interfaces: RoundTripper) 3 | 4 | // Package mocks is a generated GoMock package. 5 | package mocks 6 | 7 | import ( 8 | http "net/http" 9 | reflect "reflect" 10 | 11 | gomock "github.com/golang/mock/gomock" 12 | ) 13 | 14 | // MockRoundTripper is a mock of RoundTripper interface. 15 | type MockRoundTripper struct { 16 | ctrl *gomock.Controller 17 | recorder *MockRoundTripperMockRecorder 18 | } 19 | 20 | // MockRoundTripperMockRecorder is the mock recorder for MockRoundTripper. 21 | type MockRoundTripperMockRecorder struct { 22 | mock *MockRoundTripper 23 | } 24 | 25 | // NewMockRoundTripper creates a new mock instance. 26 | func NewMockRoundTripper(ctrl *gomock.Controller) *MockRoundTripper { 27 | mock := &MockRoundTripper{ctrl: ctrl} 28 | mock.recorder = &MockRoundTripperMockRecorder{mock} 29 | return mock 30 | } 31 | 32 | // EXPECT returns an object that allows the caller to indicate expected use. 33 | func (m *MockRoundTripper) EXPECT() *MockRoundTripperMockRecorder { 34 | return m.recorder 35 | } 36 | 37 | // RoundTrip mocks base method. 38 | func (m *MockRoundTripper) RoundTrip(arg0 *http.Request) (*http.Response, error) { 39 | m.ctrl.T.Helper() 40 | ret := m.ctrl.Call(m, "RoundTrip", arg0) 41 | ret0, _ := ret[0].(*http.Response) 42 | ret1, _ := ret[1].(error) 43 | return ret0, ret1 44 | } 45 | 46 | // RoundTrip indicates an expected call of RoundTrip. 47 | func (mr *MockRoundTripperMockRecorder) RoundTrip(arg0 interface{}) *gomock.Call { 48 | mr.mock.ctrl.T.Helper() 49 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RoundTrip", reflect.TypeOf((*MockRoundTripper)(nil).RoundTrip), arg0) 50 | } 51 | -------------------------------------------------------------------------------- /config/rbac/role.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: manager-role 6 | rules: 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - configmaps 11 | - events 12 | - secrets 13 | verbs: 14 | - create 15 | - delete 16 | - get 17 | - list 18 | - patch 19 | - update 20 | - watch 21 | - apiGroups: 22 | - authentication.k8s.io 23 | resources: 24 | - tokenreviews 25 | verbs: 26 | - create 27 | - apiGroups: 28 | - authorization.k8s.io 29 | resources: 30 | - subjectaccessreviews 31 | verbs: 32 | - create 33 | - apiGroups: 34 | - bootstrap.cluster.x-k8s.io 35 | resources: 36 | - etcdadmconfigs 37 | - etcdadmconfigs/status 38 | verbs: 39 | - create 40 | - delete 41 | - get 42 | - list 43 | - patch 44 | - update 45 | - watch 46 | - apiGroups: 47 | - cluster.x-k8s.io 48 | resources: 49 | - clusters 50 | - clusters/status 51 | verbs: 52 | - get 53 | - list 54 | - watch 55 | - apiGroups: 56 | - cluster.x-k8s.io 57 | resources: 58 | - machines 59 | - machines/status 60 | verbs: 61 | - create 62 | - delete 63 | - get 64 | - list 65 | - patch 66 | - update 67 | - watch 68 | - apiGroups: 69 | - etcdcluster.cluster.x-k8s.io 70 | resources: 71 | - etcdadmclusters 72 | verbs: 73 | - create 74 | - delete 75 | - get 76 | - list 77 | - patch 78 | - update 79 | - watch 80 | - apiGroups: 81 | - etcdcluster.cluster.x-k8s.io 82 | resources: 83 | - etcdadmclusters/status 84 | verbs: 85 | - get 86 | - patch 87 | - update 88 | - apiGroups: 89 | - infrastructure.cluster.x-k8s.io 90 | resources: 91 | - '*' 92 | verbs: 93 | - create 94 | - delete 95 | - get 96 | - list 97 | - patch 98 | - update 99 | - watch 100 | -------------------------------------------------------------------------------- /internal/thirdparty/api/v1alpha3/zz_generated.deepcopy.go: -------------------------------------------------------------------------------- 1 | //go:build !ignore_autogenerated 2 | // +build !ignore_autogenerated 3 | 4 | /* 5 | 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | */ 19 | 20 | // Code generated by controller-gen. DO NOT EDIT. 21 | 22 | package v1alpha3 23 | 24 | import () 25 | 26 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 27 | func (in *Condition) DeepCopyInto(out *Condition) { 28 | *out = *in 29 | in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) 30 | } 31 | 32 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Condition. 33 | func (in *Condition) DeepCopy() *Condition { 34 | if in == nil { 35 | return nil 36 | } 37 | out := new(Condition) 38 | in.DeepCopyInto(out) 39 | return out 40 | } 41 | 42 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 43 | func (in Conditions) DeepCopyInto(out *Conditions) { 44 | { 45 | in := &in 46 | *out = make(Conditions, len(*in)) 47 | for i := range *in { 48 | (*in)[i].DeepCopyInto(&(*out)[i]) 49 | } 50 | } 51 | } 52 | 53 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Conditions. 54 | func (in Conditions) DeepCopy() Conditions { 55 | if in == nil { 56 | return nil 57 | } 58 | out := new(Conditions) 59 | in.DeepCopyInto(out) 60 | return *out 61 | } 62 | -------------------------------------------------------------------------------- /controllers/etcd_plane_test.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "testing" 5 | 6 | etcdbootstrapv1 "github.com/aws/etcdadm-bootstrap-provider/api/v1beta1" 7 | . "github.com/onsi/gomega" 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" 10 | "sigs.k8s.io/controller-runtime/pkg/client" 11 | "sigs.k8s.io/controller-runtime/pkg/client/fake" 12 | ) 13 | 14 | func TestOutOfDateMachines(t *testing.T) { 15 | g := NewWithT(t) 16 | 17 | cluster := newClusterWithExternalEtcd() 18 | etcdadmCluster := newEtcdadmCluster(cluster) 19 | 20 | machine1 := newEtcdMachine(etcdadmCluster, cluster) 21 | 22 | objects := []client.Object{ 23 | cluster, 24 | etcdadmCluster, 25 | infraTemplate.DeepCopy(), 26 | machine1, 27 | } 28 | 29 | fakeClient := fake.NewClientBuilder().WithScheme(setupScheme()).WithObjects(objects...).Build() 30 | 31 | machines := map[string]*clusterv1.Machine{ 32 | machine1.Name: machine1, 33 | } 34 | 35 | etcdadmConfigs, err := getEtcdadmConfigs(ctx, fakeClient, machines) 36 | g.Expect(err).ToNot(HaveOccurred()) 37 | infraResources, err := getInfraResources(ctx, fakeClient, machines) 38 | g.Expect(err).ToNot(HaveOccurred()) 39 | 40 | // build EtcdPlane for test 41 | ep := &EtcdPlane{ 42 | EC: etcdadmCluster, 43 | Cluster: cluster, 44 | Machines: machines, 45 | etcdadmConfigs: etcdadmConfigs, 46 | infraResources: infraResources, 47 | } 48 | 49 | outdatedMachines := ep.OutOfDateMachines() 50 | g.Expect(len(outdatedMachines)).To(Equal(0)) 51 | 52 | // change etcdadmConfig for machine 53 | ep.etcdadmConfigs[machine1.Name] = &etcdbootstrapv1.EtcdadmConfig{ 54 | ObjectMeta: metav1.ObjectMeta{ 55 | Namespace: testNamespace, 56 | Name: testClusterName, 57 | }, 58 | Spec: etcdbootstrapv1.EtcdadmConfigSpec{ 59 | EtcdadmInstallCommands: []string{"etcdadmInstallCommands is not empty"}, 60 | CloudInitConfig: &etcdbootstrapv1.CloudInitConfig{ 61 | Version: "v3.4.9", 62 | }, 63 | }, 64 | } 65 | 66 | // check that machine is in outdated machines 67 | outdatedMachines = ep.OutOfDateMachines() 68 | g.Expect(len(outdatedMachines)).To(Equal(1)) 69 | } 70 | -------------------------------------------------------------------------------- /controllers/helpers_test.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "testing" 5 | 6 | . "github.com/onsi/gomega" 7 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 8 | clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" 9 | ) 10 | 11 | func TestGetEtcdMachineAddress(t *testing.T) { 12 | g := NewWithT(t) 13 | type test struct { 14 | machine clusterv1.Machine 15 | AvailAddrTypes clusterv1.MachineAddresses 16 | wantAddr string 17 | } 18 | 19 | capiMachine := clusterv1.Machine{ 20 | ObjectMeta: metav1.ObjectMeta{ 21 | Name: "machine", 22 | }, 23 | Spec: clusterv1.MachineSpec{ 24 | ClusterName: "test-cluster", 25 | }, 26 | Status: clusterv1.MachineStatus{}, 27 | } 28 | tests := []test{ 29 | { 30 | machine: capiMachine, 31 | AvailAddrTypes: clusterv1.MachineAddresses{ 32 | clusterv1.MachineAddress{ 33 | Type: clusterv1.MachineInternalIP, 34 | Address: "1.1.1.1", 35 | }, clusterv1.MachineAddress{ 36 | Type: clusterv1.MachineExternalIP, 37 | Address: "2.2.2.2", 38 | }, 39 | }, 40 | wantAddr: "2.2.2.2", 41 | }, { 42 | machine: capiMachine, 43 | AvailAddrTypes: []clusterv1.MachineAddress{ 44 | { 45 | Type: clusterv1.MachineInternalDNS, 46 | Address: "1.1.1.1", 47 | }, { 48 | Type: clusterv1.MachineExternalIP, 49 | Address: "2.2.2.2", 50 | }, 51 | }, 52 | wantAddr: "2.2.2.2", 53 | }, { 54 | machine: capiMachine, 55 | AvailAddrTypes: []clusterv1.MachineAddress{ 56 | { 57 | Type: clusterv1.MachineInternalIP, 58 | Address: "1.1.1.1", 59 | }, { 60 | Type: clusterv1.MachineInternalDNS, 61 | Address: "2.2.2.2", 62 | }, 63 | }, 64 | wantAddr: "2.2.2.2", 65 | }, { 66 | machine: capiMachine, 67 | AvailAddrTypes: []clusterv1.MachineAddress{ 68 | { 69 | Type: clusterv1.MachineExternalDNS, 70 | Address: "1.1.1.1", 71 | }, { 72 | Type: clusterv1.MachineInternalDNS, 73 | Address: "2.2.2.2", 74 | }, 75 | }, 76 | wantAddr: "1.1.1.1", 77 | }, { 78 | machine: capiMachine, 79 | AvailAddrTypes: []clusterv1.MachineAddress{ 80 | { 81 | Type: clusterv1.MachineHostName, 82 | Address: "1.1.1.1", 83 | }, 84 | }, 85 | wantAddr: "", 86 | }, 87 | } 88 | for _, tc := range tests { 89 | capiMachine.Status.Addresses = tc.AvailAddrTypes 90 | g.Expect(getEtcdMachineAddress(&capiMachine)).To(Equal(tc.wantAddr)) 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /api/v1alpha3/condition_consts.go: -------------------------------------------------------------------------------- 1 | package v1alpha3 2 | 3 | import clusterv1 "github.com/aws/etcdadm-controller/internal/thirdparty/api/v1alpha3" 4 | 5 | const ( 6 | // EtcdMachinesSpecUpToDateCondition documents that the spec of the machines controlled by the EtcdadmCluster 7 | // is up to date. When this condition is false, the EtcdadmCluster is executing a rolling upgrade. 8 | EtcdMachinesSpecUpToDateCondition clusterv1.ConditionType = "EtcdMachinesSpecUpToDate" 9 | 10 | // EtcdRollingUpdateInProgressReason (Severity=Warning) documents an EtcdadmCluster object executing a 11 | // rolling upgrade for aligning the machines spec to the desired state. 12 | EtcdRollingUpdateInProgressReason = "EtcdRollingUpdateInProgress" 13 | 14 | // EtcdCertificatesAvailableCondition indicates that the etcdadm controller has generated the etcd certs to be used by new members 15 | // joining the etcd cluster, and to be used by the controlplane 16 | EtcdCertificatesAvailableCondition clusterv1.ConditionType = "EtcdCertificatesAvailable" 17 | 18 | // EtcdClusterResizeCompleted indicates if cluster is finished with scale up/down or is being resized 19 | EtcdClusterResizeCompleted clusterv1.ConditionType = "EtcdClusterResizeCompleted" 20 | 21 | // EtcdScaleUpInProgressReason indicates scale up is in progress 22 | EtcdScaleUpInProgressReason = "ScalingUp" 23 | 24 | // EtcdScaleDownInProgressReason indicates scale down is in progress 25 | EtcdScaleDownInProgressReason = "ScalingDown" 26 | 27 | // InitializedCondition shows if etcd cluster has been initialized, which is when the first etcd member has been initialized 28 | InitializedCondition clusterv1.ConditionType = "Initialized" 29 | 30 | // WaitingForEtcdadmInitReason shows that the first etcd member has not been created yet 31 | WaitingForEtcdadmInitReason = "WaitingForEtcdadmInit" 32 | 33 | // EtcdMachinesReadyCondition stores an aggregate status of all owned machines 34 | EtcdMachinesReadyCondition clusterv1.ConditionType = "EtcdMachinesReady" 35 | 36 | // EtcdClusterHasNoOutdatedMembersCondition indicates that all etcd members are up-to-date. NOTE: this includes even members present on Machines not owned by the 37 | // etcdadm cluster 38 | EtcdClusterHasNoOutdatedMembersCondition clusterv1.ConditionType = "EtcdClusterHasNoOutdatedMachines" 39 | 40 | // EtcdClusterHasOutdatedMembersReason shows that some of the etcd members are out-of-date 41 | EtcdClusterHasOutdatedMembersReason = "EtcdClusterHasOutdatedMachines" 42 | ) 43 | -------------------------------------------------------------------------------- /config/default/kustomization.yaml: -------------------------------------------------------------------------------- 1 | # Adds namespace to all resources. 2 | namespace: etcdadm-controller-system 3 | 4 | # Value of this field is prepended to the 5 | # names of all resources, e.g. a deployment named 6 | # "wordpress" becomes "alices-wordpress". 7 | # Note that it should also match with the prefix (text before '-') of the namespace 8 | # field above. 9 | namePrefix: etcdadm-controller- 10 | 11 | # Labels to add to all resources and selectors. 12 | commonLabels: 13 | cluster.x-k8s.io/provider: "bootstrap-etcdadm-controller" 14 | 15 | bases: 16 | - ../crd 17 | - ../rbac 18 | - ../manager 19 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in 20 | # crd/kustomization.yaml 21 | - ../webhook 22 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. 23 | - ../certmanager 24 | # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. 25 | #- ../prometheus 26 | 27 | patchesStrategicMerge: 28 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in 29 | # crd/kustomization.yaml 30 | - manager_webhook_patch.yaml 31 | 32 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 33 | # Uncomment 'CERTMANAGER' sections in crd/kustomization.yaml to enable the CA injection in the admission webhooks. 34 | # 'CERTMANAGER' needs to be enabled to use ca injection 35 | - webhookcainjection_patch.yaml 36 | 37 | # the following config is for teaching kustomize how to do var substitution 38 | vars: 39 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. 40 | - name: CERTIFICATE_NAMESPACE # namespace of the certificate CR 41 | objref: 42 | kind: Certificate 43 | group: cert-manager.io 44 | version: v1 45 | name: serving-cert # this name should match the one in certificate.yaml 46 | fieldref: 47 | fieldpath: metadata.namespace 48 | - name: CERTIFICATE_NAME 49 | objref: 50 | kind: Certificate 51 | group: cert-manager.io 52 | version: v1 53 | name: serving-cert # this name should match the one in certificate.yaml 54 | - name: SERVICE_NAMESPACE # namespace of the service 55 | objref: 56 | kind: Service 57 | version: v1 58 | name: webhook-service 59 | fieldref: 60 | fieldpath: metadata.namespace 61 | - name: SERVICE_NAME 62 | objref: 63 | kind: Service 64 | version: v1 65 | name: webhook-service 66 | 67 | configurations: 68 | - kustomizeconfig.yaml -------------------------------------------------------------------------------- /controllers/upgrade_test.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | . "github.com/onsi/gomega" 8 | "k8s.io/utils/ptr" 9 | clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" 10 | "sigs.k8s.io/cluster-api/util/collections" 11 | ctrl "sigs.k8s.io/controller-runtime" 12 | "sigs.k8s.io/controller-runtime/pkg/client" 13 | "sigs.k8s.io/controller-runtime/pkg/client/fake" 14 | "sigs.k8s.io/controller-runtime/pkg/log" 15 | ) 16 | 17 | func TestEtcdadmClusterReconciler_upgradeEtcdClusterM_MachineIsRemovedFromOwnedMachines(t *testing.T) { 18 | cluster := newClusterWithExternalEtcd() 19 | baseEtcdadCluster := newEtcdadmCluster(cluster) 20 | 21 | testCases := []struct { 22 | name string 23 | ownedMachines []*clusterv1.Machine 24 | desiredReplicas int32 25 | }{ 26 | { 27 | name: "owned machines same as replicas", 28 | desiredReplicas: 3, 29 | ownedMachines: []*clusterv1.Machine{ 30 | newEtcdMachine(baseEtcdadCluster, cluster), 31 | newEtcdMachine(baseEtcdadCluster, cluster), 32 | newEtcdMachine(baseEtcdadCluster, cluster), 33 | }, 34 | }, 35 | { 36 | name: "more owned machines than replicas", 37 | desiredReplicas: 3, 38 | ownedMachines: []*clusterv1.Machine{ 39 | newEtcdMachine(baseEtcdadCluster, cluster), 40 | newEtcdMachine(baseEtcdadCluster, cluster), 41 | newEtcdMachine(baseEtcdadCluster, cluster), 42 | newEtcdMachine(baseEtcdadCluster, cluster), 43 | }, 44 | }, 45 | } 46 | for _, tc := range testCases { 47 | t.Run(tc.name, func(t *testing.T) { 48 | g := NewWithT(t) 49 | ctx := context.Background() 50 | 51 | objs := []client.Object{} 52 | for _, m := range tc.ownedMachines { 53 | objs = append(objs, m) 54 | } 55 | 56 | fakeClient := fake.NewClientBuilder(). 57 | WithScheme(setupScheme()). 58 | WithObjects( 59 | objs..., 60 | ).Build() 61 | 62 | r := &EtcdadmClusterReconciler{ 63 | Client: fakeClient, 64 | uncachedClient: fakeClient, 65 | Log: log.Log, 66 | } 67 | 68 | etcdCluster := baseEtcdadCluster.DeepCopy() 69 | etcdCluster.Spec.Replicas = ptr.To(int32(tc.desiredReplicas)) 70 | etcdPlane := &EtcdPlane{ 71 | Cluster: cluster, 72 | EC: etcdCluster, 73 | Machines: collections.FromMachines(tc.ownedMachines...), 74 | } 75 | machines := collections.FromMachines(tc.ownedMachines[0]) 76 | 77 | g.Expect( 78 | r.upgradeEtcdCluster(ctx, cluster, etcdCluster, etcdPlane, machines), 79 | ).To(Equal(ctrl.Result{})) 80 | }) 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /controllers/mocks/etcdclient.go: -------------------------------------------------------------------------------- 1 | // Code generated by MockGen. DO NOT EDIT. 2 | // Source: controllers/controller.go 3 | 4 | // Package mocks is a generated GoMock package. 5 | package mocks 6 | 7 | import ( 8 | context "context" 9 | reflect "reflect" 10 | 11 | gomock "github.com/golang/mock/gomock" 12 | clientv3 "go.etcd.io/etcd/client/v3" 13 | ) 14 | 15 | // MockEtcdClient is a mock of EtcdClient interface. 16 | type MockEtcdClient struct { 17 | ctrl *gomock.Controller 18 | recorder *MockEtcdClientMockRecorder 19 | } 20 | 21 | // MockEtcdClientMockRecorder is the mock recorder for MockEtcdClient. 22 | type MockEtcdClientMockRecorder struct { 23 | mock *MockEtcdClient 24 | } 25 | 26 | // NewMockEtcdClient creates a new mock instance. 27 | func NewMockEtcdClient(ctrl *gomock.Controller) *MockEtcdClient { 28 | mock := &MockEtcdClient{ctrl: ctrl} 29 | mock.recorder = &MockEtcdClientMockRecorder{mock} 30 | return mock 31 | } 32 | 33 | // EXPECT returns an object that allows the caller to indicate expected use. 34 | func (m *MockEtcdClient) EXPECT() *MockEtcdClientMockRecorder { 35 | return m.recorder 36 | } 37 | 38 | // Close mocks base method. 39 | func (m *MockEtcdClient) Close() error { 40 | m.ctrl.T.Helper() 41 | ret := m.ctrl.Call(m, "Close") 42 | ret0, _ := ret[0].(error) 43 | return ret0 44 | } 45 | 46 | // Close indicates an expected call of Close. 47 | func (mr *MockEtcdClientMockRecorder) Close() *gomock.Call { 48 | mr.mock.ctrl.T.Helper() 49 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Close", reflect.TypeOf((*MockEtcdClient)(nil).Close)) 50 | } 51 | 52 | // MemberList mocks base method. 53 | func (m *MockEtcdClient) MemberList(ctx context.Context) (*clientv3.MemberListResponse, error) { 54 | m.ctrl.T.Helper() 55 | ret := m.ctrl.Call(m, "MemberList", ctx) 56 | ret0, _ := ret[0].(*clientv3.MemberListResponse) 57 | ret1, _ := ret[1].(error) 58 | return ret0, ret1 59 | } 60 | 61 | // MemberList indicates an expected call of MemberList. 62 | func (mr *MockEtcdClientMockRecorder) MemberList(ctx interface{}) *gomock.Call { 63 | mr.mock.ctrl.T.Helper() 64 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "MemberList", reflect.TypeOf((*MockEtcdClient)(nil).MemberList), ctx) 65 | } 66 | 67 | // MemberRemove mocks base method. 68 | func (m *MockEtcdClient) MemberRemove(ctx context.Context, id uint64) (*clientv3.MemberRemoveResponse, error) { 69 | m.ctrl.T.Helper() 70 | ret := m.ctrl.Call(m, "MemberRemove", ctx, id) 71 | ret0, _ := ret[0].(*clientv3.MemberRemoveResponse) 72 | ret1, _ := ret[1].(error) 73 | return ret0, ret1 74 | } 75 | 76 | // MemberRemove indicates an expected call of MemberRemove. 77 | func (mr *MockEtcdClientMockRecorder) MemberRemove(ctx, id interface{}) *gomock.Call { 78 | mr.mock.ctrl.T.Helper() 79 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "MemberRemove", reflect.TypeOf((*MockEtcdClient)(nil).MemberRemove), ctx, id) 80 | } 81 | -------------------------------------------------------------------------------- /api/v1beta1/condition_consts.go: -------------------------------------------------------------------------------- 1 | package v1beta1 2 | 3 | import clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta1" 4 | 5 | const ( 6 | // EtcdMachinesSpecUpToDateCondition documents that the spec of the machines controlled by the EtcdadmCluster 7 | // is up to date. When this condition is false, the EtcdadmCluster is executing a rolling upgrade. 8 | EtcdMachinesSpecUpToDateCondition clusterv1.ConditionType = "EtcdMachinesSpecUpToDate" 9 | 10 | // EtcdRollingUpdateInProgressReason (Severity=Warning) documents an EtcdadmCluster object executing a 11 | // rolling upgrade for aligning the machines spec to the desired state. 12 | EtcdRollingUpdateInProgressReason = "EtcdRollingUpdateInProgress" 13 | 14 | // EtcdMaxNumberOfMachinesReached (Severity=Warning) indicatest that there are 2X replicas while executing a 15 | // rolling upgrade for aligning the machines spec to the desired state. 16 | MaxNumberOfEtcdMachinesReachedReason = "MaxNumberOfEtcdMachinesReached" 17 | 18 | // EtcdCertificatesAvailableCondition indicates that the etcdadm controller has generated the etcd certs to be used by new members 19 | // joining the etcd cluster, and to be used by the controlplane 20 | EtcdCertificatesAvailableCondition clusterv1.ConditionType = "EtcdCertificatesAvailable" 21 | 22 | // EtcdClusterResizeCompleted indicates if cluster is finished with scale up/down or is being resized 23 | EtcdClusterResizeCompleted clusterv1.ConditionType = "EtcdClusterResizeCompleted" 24 | 25 | // EtcdScaleUpInProgressReason indicates scale up is in progress 26 | EtcdScaleUpInProgressReason = "ScalingUp" 27 | 28 | // EtcdScaleDownInProgressReason indicates scale down is in progress 29 | EtcdScaleDownInProgressReason = "ScalingDown" 30 | 31 | // InitializedCondition shows if etcd cluster has been initialized, which is when the first etcd member has been initialized 32 | InitializedCondition clusterv1.ConditionType = "Initialized" 33 | 34 | // WaitingForEtcdadmInitReason shows that the first etcd member has not been created yet 35 | WaitingForEtcdadmInitReason = "WaitingForEtcdadmInit" 36 | 37 | // EtcdMachinesReadyCondition stores an aggregate status of all owned machines 38 | EtcdMachinesReadyCondition clusterv1.ConditionType = "EtcdMachinesReady" 39 | 40 | // EtcdClusterHasNoOutdatedMembersCondition indicates that all etcd members are up-to-date. NOTE: this includes even members present on Machines not owned by the 41 | // etcdadm cluster 42 | EtcdClusterHasNoOutdatedMembersCondition clusterv1.ConditionType = "EtcdClusterHasNoOutdatedMachines" 43 | 44 | // EtcdClusterHasOutdatedMembersReason shows that some of the etcd members are out-of-date 45 | EtcdClusterHasOutdatedMembersReason = "EtcdClusterHasOutdatedMachines" 46 | 47 | // EtcdEndpointsAvailable shows that all endpoints of the etcd cluster passed healthcheck and are available 48 | EtcdEndpointsAvailable = "EtcdEndpointsAvailable" 49 | 50 | // WaitingForEtcdadmEndpointsToPassHealthcheckReason shows that some of the etcd members are not ready yet 51 | WaitingForEtcdadmEndpointsToPassHealthcheckReason = "WaitingForEtcdadmEndpointsToPassHealthcheck" 52 | ) 53 | -------------------------------------------------------------------------------- /controllers/upgrade.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | etcdv1 "github.com/aws/etcdadm-controller/api/v1beta1" 8 | "github.com/pkg/errors" 9 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 10 | clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" 11 | "sigs.k8s.io/cluster-api/util/collections" 12 | ctrl "sigs.k8s.io/controller-runtime" 13 | ) 14 | 15 | const minEtcdMemberReadySeconds = 60 16 | 17 | func (r *EtcdadmClusterReconciler) upgradeEtcdCluster(ctx context.Context, 18 | cluster *clusterv1.Cluster, 19 | ec *etcdv1.EtcdadmCluster, 20 | ep *EtcdPlane, 21 | machinesToUpgrade collections.Machines, 22 | ) (ctrl.Result, error) { 23 | /*In the absence of static DNS A records as etcd cluster endpoints, IP addresses of the etcd machines are used as etcd cluster endpoints. 24 | During cluster upgrade, etcd machines need to be upgraded first, since the controlplane machines need to know the updated etcd endpoints to pass in 25 | as etcd-servers flag value to the kube-apiserver. However, the older outdated controlplane machines will still try to connect to the older etcd members. 26 | Hence for now, scale down will not delete the machine & remove the etcd member. It will only remove the ownerRef of the EtcdadmCluster object from the Machine*/ 27 | log := r.Log 28 | if *ec.Spec.Replicas == 1 { 29 | // for single node etcd cluster, scale up first followed by a scale down 30 | if int32(ep.Machines.Len()) == *ec.Spec.Replicas { 31 | return r.scaleUpEtcdCluster(ctx, ec, cluster, ep) 32 | } 33 | // remove older etcd member's machine from being an ownedMachine 34 | return ctrl.Result{}, r.removeFromListOfOwnedMachines(ctx, ep, machinesToUpgrade) 35 | } 36 | 37 | // Under normal circumstances, ep.Machines, which are the etcd machines owned by the etcdadm 38 | // cluster should never be higher than the specified number of desired replicas, they should 39 | // be equal at most. However, it's possible that due to stale client caches or even manual 40 | // updates (where a user re-adds the owner reference to an old etcd machine), an etcdadm cluster 41 | // might own at this point more machines that the number of desired replicas. In that case, 42 | // regardless of the reason, we want to remove the owner reference before creating new replicas. 43 | // If not, the next reconciliation loop will still detect an owned machine out of spec and wil 44 | // create a new replica, again without removing ownership of the out of spec machine. This 45 | // causes a loop of new machines being created without a limit. 46 | if int32(ep.Machines.Len()) >= *ec.Spec.Replicas { 47 | log.Info("Scaling down etcd cluster") 48 | return ctrl.Result{}, r.removeFromListOfOwnedMachines(ctx, ep, machinesToUpgrade) 49 | } 50 | log.Info("Scaling up etcd cluster") 51 | return r.scaleUpEtcdCluster(ctx, ec, cluster, ep) 52 | } 53 | 54 | func (r *EtcdadmClusterReconciler) removeFromListOfOwnedMachines(ctx context.Context, ep *EtcdPlane, 55 | machinesToUpgrade collections.Machines) error { 56 | machineToDelete, err := selectMachineForScaleDown(ep, machinesToUpgrade) 57 | if err != nil || machineToDelete == nil { 58 | return errors.Wrap(err, "failed to select machine for scale down") 59 | } 60 | r.Log.Info(fmt.Sprintf("Removing member %s from list of owned Etcd machines", machineToDelete.Name)) 61 | // remove the etcd cluster ownerRef so it's no longer considered a machine owned by the etcd cluster 62 | machineToDelete.OwnerReferences = []metav1.OwnerReference{} 63 | return r.Client.Update(ctx, machineToDelete) 64 | } 65 | -------------------------------------------------------------------------------- /internal/thirdparty/api/v1alpha3/condition_types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // +kubebuilder:object:generate=true 18 | package v1alpha3 19 | 20 | import ( 21 | corev1 "k8s.io/api/core/v1" 22 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 23 | ) 24 | 25 | // ANCHOR: ConditionSeverity 26 | 27 | // ConditionSeverity expresses the severity of a Condition Type failing. 28 | type ConditionSeverity string 29 | 30 | const ( 31 | // ConditionSeverityError specifies that a condition with `Status=False` is an error. 32 | ConditionSeverityError ConditionSeverity = "Error" 33 | 34 | // ConditionSeverityWarning specifies that a condition with `Status=False` is a warning. 35 | ConditionSeverityWarning ConditionSeverity = "Warning" 36 | 37 | // ConditionSeverityInfo specifies that a condition with `Status=False` is informative. 38 | ConditionSeverityInfo ConditionSeverity = "Info" 39 | 40 | // ConditionSeverityNone should apply only to conditions with `Status=True`. 41 | ConditionSeverityNone ConditionSeverity = "" 42 | ) 43 | 44 | // ANCHOR_END: ConditionSeverity 45 | 46 | // ANCHOR: ConditionType 47 | 48 | // ConditionType is a valid value for Condition.Type. 49 | type ConditionType string 50 | 51 | // ANCHOR_END: ConditionType 52 | 53 | // ANCHOR: Condition 54 | 55 | // Condition defines an observation of a Cluster API resource operational state. 56 | type Condition struct { 57 | // Type of condition in CamelCase or in foo.example.com/CamelCase. 58 | // Many .condition.type values are consistent across resources like Available, but because arbitrary conditions 59 | // can be useful (see .node.status.conditions), the ability to deconflict is important. 60 | // +required 61 | Type ConditionType `json:"type"` 62 | 63 | // Status of the condition, one of True, False, Unknown. 64 | // +required 65 | Status corev1.ConditionStatus `json:"status"` 66 | 67 | // Severity provides an explicit classification of Reason code, so the users or machines can immediately 68 | // understand the current situation and act accordingly. 69 | // The Severity field MUST be set only when Status=False. 70 | // +optional 71 | Severity ConditionSeverity `json:"severity,omitempty"` 72 | 73 | // Last time the condition transitioned from one status to another. 74 | // This should be when the underlying condition changed. If that is not known, then using the time when 75 | // the API field changed is acceptable. 76 | // +required 77 | LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"` 78 | 79 | // The reason for the condition's last transition in CamelCase. 80 | // The specific API may choose whether or not this field is considered a guaranteed API. 81 | // This field may not be empty. 82 | // +optional 83 | Reason string `json:"reason,omitempty"` 84 | 85 | // A human readable message indicating details about the transition. 86 | // This field may be empty. 87 | // +optional 88 | Message string `json:"message,omitempty"` 89 | } 90 | 91 | // ANCHOR_END: Condition 92 | 93 | // ANCHOR: Conditions 94 | 95 | // Conditions provide observations of the operational state of a Cluster API resource. 96 | type Conditions []Condition 97 | 98 | // ANCHOR_END: Conditions 99 | -------------------------------------------------------------------------------- /controllers/healthcheck.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "context" 5 | "crypto/tls" 6 | "crypto/x509" 7 | "encoding/json" 8 | "fmt" 9 | "io" 10 | "net" 11 | "net/http" 12 | "net/url" 13 | "time" 14 | 15 | "github.com/pkg/errors" 16 | clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" 17 | ) 18 | 19 | const ( 20 | httpClientTimeout = 10 * time.Second 21 | portCheckTimeout = 2 * time.Second 22 | ) 23 | 24 | type etcdHealthCheckResponse struct { 25 | Health string `json:"health"` 26 | } 27 | 28 | type portNotOpenError struct{} 29 | 30 | func (h *portNotOpenError) Error() string { 31 | return "etcd endpoint port is not open" 32 | } 33 | 34 | var portNotOpenErr = &portNotOpenError{} 35 | 36 | func (r *EtcdadmClusterReconciler) performEndpointHealthCheck(ctx context.Context, cluster *clusterv1.Cluster, endpoint string, logLevelInfo bool) error { 37 | client, err := r.getEtcdHttpClient(ctx, cluster) 38 | if err != nil { 39 | return err 40 | } 41 | 42 | u, err := url.Parse(endpoint) 43 | if err != nil { 44 | return errors.Wrapf(err, "invalid etcd endpoint url") 45 | } 46 | if !r.isPortOpen(ctx, u.Host) { 47 | return portNotOpenErr 48 | } 49 | 50 | healthCheckURL := getMemberHealthCheckEndpoint(endpoint) 51 | if logLevelInfo { 52 | // logging non-failures only for non-periodic checks so as to not log too many events 53 | r.Log.Info("Performing healthcheck on", "endpoint", healthCheckURL) 54 | } 55 | 56 | req, err := http.NewRequest("GET", healthCheckURL, nil) 57 | if err != nil { 58 | return errors.Wrap(err, "error creating healthcheck request") 59 | } 60 | 61 | resp, err := client.Do(req) 62 | if err != nil { 63 | return errors.Wrap(err, "error checking etcd member health") 64 | } 65 | // reuse connection 66 | defer func() { _ = resp.Body.Close() }() 67 | 68 | if resp.StatusCode != http.StatusOK { 69 | return errors.Wrap(err, "Etcd member not ready, retry") 70 | } 71 | 72 | body, err := io.ReadAll(resp.Body) 73 | if err != nil { 74 | return err 75 | } 76 | 77 | if err := parseEtcdHealthCheckOutput(body); err != nil { 78 | return errors.Wrap(err, fmt.Sprintf("etcd member %v failed healthcheck", endpoint)) 79 | } 80 | if logLevelInfo { 81 | r.Log.Info("Etcd member ready", "member", endpoint) 82 | } 83 | 84 | return nil 85 | } 86 | 87 | func parseEtcdHealthCheckOutput(data []byte) error { 88 | obj := etcdHealthCheckResponse{} 89 | if err := json.Unmarshal(data, &obj); err != nil { 90 | return err 91 | } 92 | if obj.Health == "true" { 93 | return nil 94 | } 95 | return fmt.Errorf("/health returned %q", obj.Health) 96 | } 97 | 98 | func (r *EtcdadmClusterReconciler) getEtcdHttpClient(ctx context.Context, cluster *clusterv1.Cluster) (*http.Client, error) { 99 | httpClientVal, httpClientExists := r.etcdHealthCheckConfig.clusterToHttpClient.Load(cluster.UID) 100 | if httpClientExists { 101 | httpClient, ok := httpClientVal.(*http.Client) 102 | if ok { 103 | return httpClient, nil 104 | } 105 | } 106 | 107 | caCertPool := x509.NewCertPool() 108 | caCert, err := r.getCACert(ctx, cluster) 109 | if err != nil { 110 | return nil, err 111 | } 112 | caCertPool.AppendCertsFromPEM(caCert) 113 | 114 | clientCert, err := r.getClientCerts(ctx, cluster) 115 | if err != nil { 116 | return nil, errors.Wrap(err, "Error getting client cert for healthcheck") 117 | } 118 | 119 | etcdHttpClient := &http.Client{ 120 | Timeout: httpClientTimeout, 121 | Transport: &http.Transport{ 122 | TLSClientConfig: &tls.Config{ 123 | RootCAs: caCertPool, 124 | Certificates: []tls.Certificate{clientCert}, 125 | }, 126 | }, 127 | } 128 | r.etcdHealthCheckConfig.clusterToHttpClient.Store(cluster.UID, etcdHttpClient) 129 | return etcdHttpClient, nil 130 | } 131 | 132 | func isPortOpen(ctx context.Context, endpoint string) bool { 133 | conn, err := net.DialTimeout("tcp", endpoint, portCheckTimeout) 134 | if err != nil { 135 | return false 136 | } 137 | 138 | if conn != nil { 139 | _ = conn.Close() 140 | return true 141 | } 142 | 143 | return false 144 | } 145 | -------------------------------------------------------------------------------- /controllers/status.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "context" 5 | "sort" 6 | "strings" 7 | 8 | etcdv1 "github.com/aws/etcdadm-controller/api/v1beta1" 9 | corev1 "k8s.io/api/core/v1" 10 | "k8s.io/klog/v2" 11 | clusterv1beta1 "sigs.k8s.io/cluster-api/api/core/v1beta1" 12 | clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" 13 | v1beta1conditions "sigs.k8s.io/cluster-api/util/deprecated/v1beta1/conditions" 14 | "sigs.k8s.io/controller-runtime/pkg/client" 15 | ) 16 | 17 | func (r *EtcdadmClusterReconciler) updateStatus(ctx context.Context, ec *etcdv1.EtcdadmCluster, cluster *clusterv1.Cluster, ownedMachines etcdMachines) error { 18 | log := r.Log.WithName(ec.Name) 19 | selector := EtcdMachinesSelectorForCluster(cluster.Name, ec.Name) 20 | // Copy label selector to its status counterpart in string format. 21 | // This is necessary for CRDs including scale subresources. 22 | ec.Status.Selector = selector.String() 23 | 24 | machines := make([]*clusterv1.Machine, 0, len(ownedMachines)) 25 | for _, machine := range ownedMachines { 26 | machines = append(machines, machine.Machine) 27 | } 28 | log.Info("Following machines owned by this etcd cluster", "machines", klog.KObjSlice(machines)) 29 | 30 | desiredReplicas := *ec.Spec.Replicas 31 | 32 | // Only consider a healthy machine as a ready replica 33 | // This will prevent an owned machine being deleted due to a catastrophic event from being considered ready. 34 | readyReplicas := int32(0) 35 | for _, m := range ownedMachines { 36 | if m.healthy() { 37 | readyReplicas++ 38 | } 39 | } 40 | ec.Status.ReadyReplicas = readyReplicas 41 | 42 | if !ec.DeletionTimestamp.IsZero() { 43 | return nil 44 | } 45 | 46 | if readyReplicas < desiredReplicas { 47 | v1beta1conditions.MarkFalse(ec, etcdv1.EtcdClusterResizeCompleted, etcdv1.EtcdScaleUpInProgressReason, clusterv1beta1.ConditionSeverityWarning, "Scaling up etcd cluster to %d replicas (actual %d)", desiredReplicas, readyReplicas) 48 | ec.Status.Ready = false 49 | return nil 50 | } 51 | 52 | if readyReplicas > desiredReplicas { 53 | v1beta1conditions.MarkFalse(ec, etcdv1.EtcdClusterResizeCompleted, etcdv1.EtcdScaleDownInProgressReason, clusterv1beta1.ConditionSeverityWarning, "Scaling up etcd cluster to %d replicas (actual %d)", desiredReplicas, readyReplicas) 54 | ec.Status.Ready = false 55 | return nil 56 | } 57 | 58 | for _, m := range ownedMachines { 59 | if !m.healthy() { 60 | if m.listening { 61 | // The machine is listening but not ready/unhealthy 62 | ec.Status.Ready = false 63 | return m.healthError 64 | } else { 65 | // The machine is not listening, probably transient while etcd starts 66 | return nil 67 | } 68 | } 69 | } 70 | 71 | v1beta1conditions.MarkTrue(ec, etcdv1.EtcdClusterResizeCompleted) 72 | 73 | // etcd ready when all machines have address set 74 | ec.Status.Ready = true 75 | v1beta1conditions.MarkTrue(ec, etcdv1.EtcdEndpointsAvailable) 76 | 77 | endpoints := ownedMachines.endpoints() 78 | sort.Strings(endpoints) 79 | currEndpoints := strings.Join(endpoints, ",") 80 | 81 | log.Info("Comparing current and previous endpoints", "current endpoints", currEndpoints, "previous endpoints", ec.Status.Endpoints) 82 | // Checking if endpoints have changed. This avoids unnecessary client calls 83 | // to get and update the Secret containing the endpoints 84 | if ec.Status.Endpoints != currEndpoints { 85 | log.Info("Updating endpoints annotation, and the Secret containing etcdadm join address") 86 | ec.Status.Endpoints = currEndpoints 87 | secretNameNs := client.ObjectKey{Name: ec.Status.InitMachineAddress, Namespace: cluster.Namespace} 88 | secretInitAddress := &corev1.Secret{} 89 | if err := r.Client.Get(ctx, secretNameNs, secretInitAddress); err != nil { 90 | return err 91 | } 92 | if len(endpoints) > 0 { 93 | secretInitAddress.Data["address"] = []byte(getEtcdMachineAddressFromClientURL(endpoints[0])) 94 | } else { 95 | secretInitAddress.Data["address"] = []byte("") 96 | } 97 | secretInitAddress.Data["clientUrls"] = []byte(ec.Status.Endpoints) 98 | r.Log.Info("Updating init secret with endpoints") 99 | if err := r.Client.Update(ctx, secretInitAddress); err != nil { 100 | return err 101 | } 102 | } 103 | 104 | // set creationComplete to true, this is only set once after the first set of endpoints are ready and never unset, to indicate that the cluster has been created 105 | ec.Status.CreationComplete = true 106 | 107 | return nil 108 | } 109 | -------------------------------------------------------------------------------- /api/v1beta1/zz_generated.deepcopy.go: -------------------------------------------------------------------------------- 1 | //go:build !ignore_autogenerated 2 | // +build !ignore_autogenerated 3 | 4 | /* 5 | 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | */ 19 | 20 | // Code generated by controller-gen. DO NOT EDIT. 21 | 22 | package v1beta1 23 | 24 | import ( 25 | "k8s.io/apimachinery/pkg/runtime" 26 | apiv1beta1 "sigs.k8s.io/cluster-api/api/core/v1beta1" 27 | ) 28 | 29 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 30 | func (in *EtcdadmCluster) DeepCopyInto(out *EtcdadmCluster) { 31 | *out = *in 32 | out.TypeMeta = in.TypeMeta 33 | in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) 34 | in.Spec.DeepCopyInto(&out.Spec) 35 | in.Status.DeepCopyInto(&out.Status) 36 | } 37 | 38 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EtcdadmCluster. 39 | func (in *EtcdadmCluster) DeepCopy() *EtcdadmCluster { 40 | if in == nil { 41 | return nil 42 | } 43 | out := new(EtcdadmCluster) 44 | in.DeepCopyInto(out) 45 | return out 46 | } 47 | 48 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 49 | func (in *EtcdadmCluster) DeepCopyObject() runtime.Object { 50 | if c := in.DeepCopy(); c != nil { 51 | return c 52 | } 53 | return nil 54 | } 55 | 56 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 57 | func (in *EtcdadmClusterList) DeepCopyInto(out *EtcdadmClusterList) { 58 | *out = *in 59 | out.TypeMeta = in.TypeMeta 60 | in.ListMeta.DeepCopyInto(&out.ListMeta) 61 | if in.Items != nil { 62 | in, out := &in.Items, &out.Items 63 | *out = make([]EtcdadmCluster, len(*in)) 64 | for i := range *in { 65 | (*in)[i].DeepCopyInto(&(*out)[i]) 66 | } 67 | } 68 | } 69 | 70 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EtcdadmClusterList. 71 | func (in *EtcdadmClusterList) DeepCopy() *EtcdadmClusterList { 72 | if in == nil { 73 | return nil 74 | } 75 | out := new(EtcdadmClusterList) 76 | in.DeepCopyInto(out) 77 | return out 78 | } 79 | 80 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 81 | func (in *EtcdadmClusterList) DeepCopyObject() runtime.Object { 82 | if c := in.DeepCopy(); c != nil { 83 | return c 84 | } 85 | return nil 86 | } 87 | 88 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 89 | func (in *EtcdadmClusterSpec) DeepCopyInto(out *EtcdadmClusterSpec) { 90 | *out = *in 91 | if in.Replicas != nil { 92 | in, out := &in.Replicas, &out.Replicas 93 | *out = new(int32) 94 | **out = **in 95 | } 96 | out.InfrastructureTemplate = in.InfrastructureTemplate 97 | in.EtcdadmConfigSpec.DeepCopyInto(&out.EtcdadmConfigSpec) 98 | } 99 | 100 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EtcdadmClusterSpec. 101 | func (in *EtcdadmClusterSpec) DeepCopy() *EtcdadmClusterSpec { 102 | if in == nil { 103 | return nil 104 | } 105 | out := new(EtcdadmClusterSpec) 106 | in.DeepCopyInto(out) 107 | return out 108 | } 109 | 110 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 111 | func (in *EtcdadmClusterStatus) DeepCopyInto(out *EtcdadmClusterStatus) { 112 | *out = *in 113 | if in.Conditions != nil { 114 | in, out := &in.Conditions, &out.Conditions 115 | *out = make(apiv1beta1.Conditions, len(*in)) 116 | for i := range *in { 117 | (*in)[i].DeepCopyInto(&(*out)[i]) 118 | } 119 | } 120 | } 121 | 122 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EtcdadmClusterStatus. 123 | func (in *EtcdadmClusterStatus) DeepCopy() *EtcdadmClusterStatus { 124 | if in == nil { 125 | return nil 126 | } 127 | out := new(EtcdadmClusterStatus) 128 | in.DeepCopyInto(out) 129 | return out 130 | } 131 | -------------------------------------------------------------------------------- /api/v1alpha3/zz_generated.deepcopy.go: -------------------------------------------------------------------------------- 1 | //go:build !ignore_autogenerated 2 | // +build !ignore_autogenerated 3 | 4 | /* 5 | 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | */ 19 | 20 | // Code generated by controller-gen. DO NOT EDIT. 21 | 22 | package v1alpha3 23 | 24 | import ( 25 | apiv1alpha3 "github.com/aws/etcdadm-controller/internal/thirdparty/api/v1alpha3" 26 | "k8s.io/apimachinery/pkg/runtime" 27 | ) 28 | 29 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 30 | func (in *EtcdadmCluster) DeepCopyInto(out *EtcdadmCluster) { 31 | *out = *in 32 | out.TypeMeta = in.TypeMeta 33 | in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) 34 | in.Spec.DeepCopyInto(&out.Spec) 35 | in.Status.DeepCopyInto(&out.Status) 36 | } 37 | 38 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EtcdadmCluster. 39 | func (in *EtcdadmCluster) DeepCopy() *EtcdadmCluster { 40 | if in == nil { 41 | return nil 42 | } 43 | out := new(EtcdadmCluster) 44 | in.DeepCopyInto(out) 45 | return out 46 | } 47 | 48 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 49 | func (in *EtcdadmCluster) DeepCopyObject() runtime.Object { 50 | if c := in.DeepCopy(); c != nil { 51 | return c 52 | } 53 | return nil 54 | } 55 | 56 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 57 | func (in *EtcdadmClusterList) DeepCopyInto(out *EtcdadmClusterList) { 58 | *out = *in 59 | out.TypeMeta = in.TypeMeta 60 | in.ListMeta.DeepCopyInto(&out.ListMeta) 61 | if in.Items != nil { 62 | in, out := &in.Items, &out.Items 63 | *out = make([]EtcdadmCluster, len(*in)) 64 | for i := range *in { 65 | (*in)[i].DeepCopyInto(&(*out)[i]) 66 | } 67 | } 68 | } 69 | 70 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EtcdadmClusterList. 71 | func (in *EtcdadmClusterList) DeepCopy() *EtcdadmClusterList { 72 | if in == nil { 73 | return nil 74 | } 75 | out := new(EtcdadmClusterList) 76 | in.DeepCopyInto(out) 77 | return out 78 | } 79 | 80 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 81 | func (in *EtcdadmClusterList) DeepCopyObject() runtime.Object { 82 | if c := in.DeepCopy(); c != nil { 83 | return c 84 | } 85 | return nil 86 | } 87 | 88 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 89 | func (in *EtcdadmClusterSpec) DeepCopyInto(out *EtcdadmClusterSpec) { 90 | *out = *in 91 | if in.Replicas != nil { 92 | in, out := &in.Replicas, &out.Replicas 93 | *out = new(int32) 94 | **out = **in 95 | } 96 | out.InfrastructureTemplate = in.InfrastructureTemplate 97 | in.EtcdadmConfigSpec.DeepCopyInto(&out.EtcdadmConfigSpec) 98 | } 99 | 100 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EtcdadmClusterSpec. 101 | func (in *EtcdadmClusterSpec) DeepCopy() *EtcdadmClusterSpec { 102 | if in == nil { 103 | return nil 104 | } 105 | out := new(EtcdadmClusterSpec) 106 | in.DeepCopyInto(out) 107 | return out 108 | } 109 | 110 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 111 | func (in *EtcdadmClusterStatus) DeepCopyInto(out *EtcdadmClusterStatus) { 112 | *out = *in 113 | if in.Conditions != nil { 114 | in, out := &in.Conditions, &out.Conditions 115 | *out = make(apiv1alpha3.Conditions, len(*in)) 116 | for i := range *in { 117 | (*in)[i].DeepCopyInto(&(*out)[i]) 118 | } 119 | } 120 | } 121 | 122 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EtcdadmClusterStatus. 123 | func (in *EtcdadmClusterStatus) DeepCopy() *EtcdadmClusterStatus { 124 | if in == nil { 125 | return nil 126 | } 127 | out := new(EtcdadmClusterStatus) 128 | in.DeepCopyInto(out) 129 | return out 130 | } 131 | -------------------------------------------------------------------------------- /api/v1beta1/etcdadmcluster_webhook_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1beta1 18 | 19 | import ( 20 | "context" 21 | "testing" 22 | 23 | . "github.com/onsi/gomega" 24 | corev1 "k8s.io/api/core/v1" 25 | "k8s.io/utils/ptr" 26 | ) 27 | 28 | func TestValidateCreate(t *testing.T) { 29 | cases := map[string]struct { 30 | in *EtcdadmCluster 31 | expectErr string 32 | }{ 33 | "valid etcdadm cluster": { 34 | in: &EtcdadmCluster{ 35 | Spec: EtcdadmClusterSpec{ 36 | Replicas: ptr.To(int32(3)), 37 | }, 38 | Status: EtcdadmClusterStatus{}, 39 | }, 40 | expectErr: "", 41 | }, 42 | "no replicas field": { 43 | in: &EtcdadmCluster{ 44 | Spec: EtcdadmClusterSpec{}, 45 | Status: EtcdadmClusterStatus{}, 46 | }, 47 | expectErr: "spec.replicas: Required value: is required", 48 | }, 49 | "zero replicas": { 50 | in: &EtcdadmCluster{ 51 | Spec: EtcdadmClusterSpec{ 52 | Replicas: ptr.To(int32(0)), 53 | }, 54 | Status: EtcdadmClusterStatus{}, 55 | }, 56 | expectErr: "cannot be less than or equal to 0", 57 | }, 58 | "even replicas": { 59 | in: &EtcdadmCluster{ 60 | Spec: EtcdadmClusterSpec{ 61 | Replicas: ptr.To(int32(2)), 62 | }, 63 | Status: EtcdadmClusterStatus{}, 64 | }, 65 | expectErr: "Forbidden: etcd cluster cannot have an even number of nodes", 66 | }, 67 | "mismatched namespace": { 68 | in: &EtcdadmCluster{ 69 | Spec: EtcdadmClusterSpec{ 70 | Replicas: ptr.To(int32(3)), 71 | InfrastructureTemplate: corev1.ObjectReference{ 72 | Namespace: "fail", 73 | }, 74 | }, 75 | Status: EtcdadmClusterStatus{}, 76 | }, 77 | expectErr: "Invalid value: \"fail\": must match metadata.namespace", 78 | }, 79 | } 80 | for name, tt := range cases { 81 | t.Run(name, func(t *testing.T) { 82 | g := NewWithT(t) 83 | webhook := &EtcdadmCluster{} 84 | _, err := webhook.ValidateCreate(context.Background(), tt.in) 85 | if tt.expectErr == "" { 86 | g.Expect(err).To(BeNil()) 87 | } else { 88 | g.Expect(err).To(MatchError(ContainSubstring(tt.expectErr))) 89 | } 90 | }) 91 | } 92 | } 93 | func TestValidateUpdate(t *testing.T) { 94 | cases := map[string]struct { 95 | oldConf *EtcdadmCluster 96 | newConf *EtcdadmCluster 97 | expectErr string 98 | }{ 99 | "valid scale up": { 100 | oldConf: &EtcdadmCluster{ 101 | Spec: EtcdadmClusterSpec{ 102 | Replicas: ptr.To(int32(3)), 103 | }, 104 | Status: EtcdadmClusterStatus{}, 105 | }, 106 | newConf: &EtcdadmCluster{ 107 | Spec: EtcdadmClusterSpec{ 108 | Replicas: ptr.To(int32(5)), 109 | }, 110 | Status: EtcdadmClusterStatus{}, 111 | }, 112 | expectErr: "", 113 | }, 114 | "valid scale down": { 115 | oldConf: &EtcdadmCluster{ 116 | Spec: EtcdadmClusterSpec{ 117 | Replicas: ptr.To(int32(3)), 118 | }, 119 | Status: EtcdadmClusterStatus{}, 120 | }, 121 | newConf: &EtcdadmCluster{ 122 | Spec: EtcdadmClusterSpec{ 123 | Replicas: ptr.To(int32(1)), 124 | }, 125 | Status: EtcdadmClusterStatus{}, 126 | }, 127 | expectErr: "", 128 | }, 129 | "zero replicas": { 130 | oldConf: &EtcdadmCluster{ 131 | Spec: EtcdadmClusterSpec{ 132 | Replicas: ptr.To(int32(3)), 133 | }, 134 | Status: EtcdadmClusterStatus{}, 135 | }, 136 | newConf: &EtcdadmCluster{ 137 | Spec: EtcdadmClusterSpec{ 138 | Replicas: ptr.To(int32(0)), 139 | }, 140 | Status: EtcdadmClusterStatus{}, 141 | }, 142 | expectErr: "cannot be less than or equal to 0", 143 | }, 144 | } 145 | for name, tt := range cases { 146 | t.Run(name, func(t *testing.T) { 147 | g := NewWithT(t) 148 | webhook := &EtcdadmCluster{} 149 | _, err := webhook.ValidateUpdate(context.Background(), tt.oldConf, tt.newConf) 150 | if tt.expectErr != "" { 151 | g.Expect(err).To(MatchError(ContainSubstring(tt.expectErr))) 152 | } else { 153 | g.Expect(err).To(BeNil()) 154 | } 155 | }) 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /etcdcluster.yaml: -------------------------------------------------------------------------------- 1 | kind: EtcdCluster 2 | apiVersion: etcdcluster.cluster.x-k8s.io/v1alpha4 3 | metadata: 4 | name: "etcd-cluster" 5 | namespace: default 6 | spec: 7 | # etcdadmConfigSpec: 8 | replicas: 3 9 | infrastructureTemplate: 10 | kind: DockerMachineTemplate 11 | apiVersion: infrastructure.cluster.x-k8s.io/v1alpha4 12 | name: "etcd-plane" 13 | namespace: "default" 14 | --- 15 | apiVersion: infrastructure.cluster.x-k8s.io/v1alpha4 16 | kind: DockerMachineTemplate 17 | metadata: 18 | name: "etcd-plane" 19 | namespace: "default" 20 | spec: 21 | template: 22 | spec: 23 | extraMounts: 24 | - containerPath: "/var/run/docker.sock" 25 | hostPath: "/var/run/docker.sock" 26 | --- 27 | apiVersion: cluster.x-k8s.io/v1alpha4 28 | kind: Cluster 29 | metadata: 30 | name: "abcd" 31 | namespace: "default" 32 | spec: 33 | clusterNetwork: 34 | services: 35 | cidrBlocks: ["10.128.0.0/12"] 36 | pods: 37 | cidrBlocks: ["192.168.0.0/16"] 38 | serviceDomain: "cluster.local" 39 | infrastructureRef: 40 | apiVersion: infrastructure.cluster.x-k8s.io/v1alpha4 41 | kind: DockerCluster 42 | name: "abcd" 43 | namespace: "default" 44 | controlPlaneRef: 45 | kind: KubeadmControlPlane 46 | apiVersion: controlplane.cluster.x-k8s.io/v1alpha4 47 | name: "abcd-control-plane" 48 | namespace: "default" 49 | managedExternalEtcdRef: 50 | kind: EtcdCluster 51 | apiVersion: etcdcluster.cluster.x-k8s.io/v1alpha4 52 | name: "etcd-cluster" 53 | namespace: "default" 54 | --- 55 | apiVersion: infrastructure.cluster.x-k8s.io/v1alpha4 56 | kind: DockerCluster 57 | metadata: 58 | name: "abcd" 59 | namespace: "default" 60 | --- 61 | kind: KubeadmControlPlane 62 | apiVersion: controlplane.cluster.x-k8s.io/v1alpha4 63 | metadata: 64 | name: "abcd-control-plane" 65 | namespace: "default" 66 | spec: 67 | replicas: 1 68 | infrastructureTemplate: 69 | kind: DockerMachineTemplate 70 | apiVersion: infrastructure.cluster.x-k8s.io/v1alpha4 71 | name: "abcd-control-plane" 72 | namespace: "default" 73 | kubeadmConfigSpec: 74 | clusterConfiguration: 75 | controllerManager: 76 | extraArgs: {enable-hostpath-provisioner: 'true'} 77 | apiServer: 78 | certSANs: [localhost, 127.0.0.1] 79 | etcd: 80 | external: 81 | endpoints: [] 82 | caFile: "/etc/kubernetes/pki/etcd/ca.crt" 83 | certFile: "/etc/kubernetes/pki/apiserver-etcd-client.crt" 84 | keyFile: "/etc/kubernetes/pki/apiserver-etcd-client.key" 85 | initConfiguration: 86 | nodeRegistration: 87 | criSocket: /var/run/containerd/containerd.sock 88 | kubeletExtraArgs: {eviction-hard: 'nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0%'} 89 | joinConfiguration: 90 | nodeRegistration: 91 | criSocket: /var/run/containerd/containerd.sock 92 | kubeletExtraArgs: {eviction-hard: 'nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0%'} 93 | version: "v1.20.2" 94 | --- 95 | apiVersion: infrastructure.cluster.x-k8s.io/v1alpha4 96 | kind: DockerMachineTemplate 97 | metadata: 98 | name: "abcd-control-plane" 99 | namespace: "default" 100 | spec: 101 | template: 102 | spec: 103 | extraMounts: 104 | - containerPath: "/var/run/docker.sock" 105 | hostPath: "/var/run/docker.sock" 106 | --- 107 | apiVersion: bootstrap.cluster.x-k8s.io/v1alpha4 108 | kind: KubeadmConfigTemplate 109 | metadata: 110 | name: "abcd-md-0" 111 | namespace: "default" 112 | spec: 113 | template: 114 | spec: 115 | joinConfiguration: 116 | nodeRegistration: 117 | kubeletExtraArgs: {eviction-hard: 'nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0%'} 118 | --- 119 | apiVersion: infrastructure.cluster.x-k8s.io/v1alpha4 120 | kind: DockerMachineTemplate 121 | metadata: 122 | name: "abcd-md-0" 123 | namespace: "default" 124 | spec: 125 | template: 126 | spec: {} 127 | --- 128 | apiVersion: cluster.x-k8s.io/v1alpha4 129 | kind: MachineDeployment 130 | metadata: 131 | name: "abcd-md-0" 132 | spec: 133 | clusterName: "abcd" 134 | replicas: 1 135 | selector: 136 | matchLabels: 137 | template: 138 | spec: 139 | clusterName: "abcd" 140 | version: "v1.20.2" 141 | bootstrap: 142 | configRef: 143 | name: "abcd-md-0" 144 | namespace: "default" 145 | apiVersion: bootstrap.cluster.x-k8s.io/v1alpha4 146 | kind: KubeadmConfigTemplate 147 | infrastructureRef: 148 | name: "abcd-md-0" 149 | namespace: "default" 150 | apiVersion: infrastructure.cluster.x-k8s.io/v1alpha4 151 | kind: DockerMachineTemplate -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | # Image URL to use all building/pushing image targets 3 | IMG ?= controller:latest 4 | # Produce CRDs that work back to Kubernetes 1.11 (no version conversion) 5 | CRD_OPTIONS ?= "crd:crdVersions=v1" 6 | 7 | # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) 8 | ifeq (,$(shell go env GOBIN)) 9 | GOBIN=$(shell go env GOPATH)/bin 10 | else 11 | GOBIN=$(shell go env GOBIN) 12 | endif 13 | 14 | ROOT_DIR:=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) 15 | TOOLS_DIR := hack/tools 16 | BIN_DIR := bin 17 | TOOLS_BIN_DIR := $(TOOLS_DIR)/$(BIN_DIR) 18 | ABS_TOOLS_BIN_DIR := $(abspath $(TOOLS_BIN_DIR)) 19 | CONTROLLER_GEN := $(ABS_TOOLS_BIN_DIR)/controller-gen 20 | CONVERSION_GEN := $(ABS_TOOLS_BIN_DIR)/conversion-gen 21 | 22 | export PATH := $(abspath $(TOOLS_BIN_DIR)):$(PATH) 23 | 24 | # Set --output-base for conversion-gen if we are not within GOPATH 25 | ifneq ($(abspath $(ROOT_DIR)),$(shell go env GOPATH)/src/github.com/aws/etcdadm-controller) 26 | CONVERSION_GEN_OUTPUT_BASE := --output-base=$(ROOT_DIR) 27 | else 28 | export GOPATH := $(shell go env GOPATH) 29 | endif 30 | 31 | all: manager 32 | 33 | # Run tests 34 | test: generate fmt vet manifests 35 | go test ./... -coverprofile cover.out 36 | 37 | # Build manager binary 38 | manager: fmt vet 39 | CGO_ENABLED=0 go build -ldflags='-s -w -extldflags="-static" -buildid=""' -trimpath -o bin/manager main.go 40 | 41 | # Run against the configured Kubernetes cluster in ~/.kube/config 42 | run: generate fmt vet manifests 43 | go run ./main.go 44 | 45 | # Install CRDs into a cluster 46 | install: manifests 47 | kustomize build config/crd | kubectl apply -f - 48 | 49 | # Uninstall CRDs from a cluster 50 | uninstall: manifests 51 | kustomize build config/crd | kubectl delete -f - 52 | 53 | # Deploy controller in the configured Kubernetes cluster in ~/.kube/config 54 | deploy: manifests 55 | cd config/manager && kustomize edit set image controller=${IMG} 56 | kustomize build config/default | kubectl apply -f - 57 | 58 | # Generate manifests e.g. CRD, RBAC etc. 59 | manifests: $(CONTROLLER_GEN) 60 | $(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=manager-role webhook \ 61 | paths="./..." \ 62 | output:rbac:dir=./config/rbac \ 63 | output:crd:artifacts:config=config/crd/bases 64 | 65 | # Run go fmt against code 66 | fmt: 67 | go fmt ./... 68 | 69 | # Run go vet against code 70 | vet: 71 | go vet ./... 72 | 73 | # Generate code 74 | generate: $(CONTROLLER_GEN) 75 | $(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..." 76 | 77 | generate-conversion: $(CONVERSION_GEN) 78 | $(CONVERSION_GEN) \ 79 | --input-dirs=./api/v1alpha3 \ 80 | --build-tag=ignore_autogenerated_etcd_cluster \ 81 | --extra-peer-dirs=github.com/aws/etcdadm-bootstrap-provider/api/v1alpha3 \ 82 | --output-file-base=zz_generated.conversion $(CONVERSION_GEN_OUTPUT_BASE) \ 83 | --go-header-file=hack/boilerplate.go.txt \ 84 | --alsologtostderr 85 | 86 | build: docker-build 87 | 88 | # Build the docker image 89 | docker-build: manager 90 | docker build . -t ${IMG} 91 | 92 | # Push the docker image 93 | docker-push: 94 | docker push ${IMG} 95 | 96 | .PHONY: lint 97 | lint: bin/golangci-lint ## Run golangci-lint 98 | bin/golangci-lint run 99 | 100 | bin/golangci-lint: ## Download golangci-lint 101 | bin/golangci-lint: GOLANGCI_LINT_VERSION?=$(shell cat .github/workflows/golangci-lint.yml | yq e '.jobs.golangci.steps[] | select(.name == "golangci-lint") .with.version' -) 102 | bin/golangci-lint: 103 | curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s $(GOLANGCI_LINT_VERSION) 104 | 105 | .PHONY: clean 106 | clean: 107 | rm -Rf ./bin 108 | 109 | .PHONY: mocks 110 | mocks: export GOPATH := $(shell go env GOPATH) 111 | mocks: MOCKGEN := ${GOPATH}/bin/mockgen --build_flags=--mod=mod 112 | mocks: ## Generate mocks 113 | go install github.com/golang/mock/mockgen@v1.6.0 114 | ${MOCKGEN} -destination controllers/mocks/roundtripper.go -package=mocks net/http RoundTripper 115 | ${MOCKGEN} -destination controllers/mocks/etcdclient.go -package=mocks -source "controllers/controller.go" EtcdClient 116 | 117 | .PHONY: verify-mocks 118 | verify-mocks: mocks ## Verify if mocks need to be updated 119 | $(eval DIFF=$(shell git diff --raw -- '*.go' | wc -c)) 120 | if [[ $(DIFF) != 0 ]]; then \ 121 | echo "Detected out of date mocks"; \ 122 | exit 1;\ 123 | fi 124 | 125 | $(CONTROLLER_GEN): $(TOOLS_BIN_DIR) # Build controller-gen from tools folder. 126 | GOBIN=$(ABS_TOOLS_BIN_DIR) go install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.16.5 127 | 128 | $(CONVERSION_GEN): $(TOOLS_BIN_DIR) 129 | GOBIN=$(ABS_TOOLS_BIN_DIR) go install k8s.io/code-generator/cmd/conversion-gen@v0.26.0 130 | 131 | $(TOOLS_BIN_DIR): 132 | mkdir -p $(TOOLS_BIN_DIR) 133 | -------------------------------------------------------------------------------- /api/v1alpha3/etcdadmcluster_types.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha3 18 | 19 | import ( 20 | etcdbp "github.com/aws/etcdadm-bootstrap-provider/api/v1alpha3" 21 | clusterv1 "github.com/aws/etcdadm-controller/internal/thirdparty/api/v1alpha3" 22 | corev1 "k8s.io/api/core/v1" 23 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 24 | ) 25 | 26 | const ( 27 | UpgradeInProgressAnnotation = "etcdcluster.cluster.x-k8s.io/upgrading" 28 | ) 29 | 30 | // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! 31 | // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. 32 | 33 | // EtcdadmClusterSpec defines the desired state of EtcdadmCluster 34 | type EtcdadmClusterSpec struct { 35 | // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster 36 | // Important: Run "make" to regenerate code after modifying this file 37 | 38 | Replicas *int32 `json:"replicas,omitempty"` 39 | 40 | // InfrastructureTemplate is a required reference to a custom resource 41 | // offered by an infrastructure provider. 42 | InfrastructureTemplate corev1.ObjectReference `json:"infrastructureTemplate"` 43 | 44 | // +optional 45 | EtcdadmConfigSpec etcdbp.EtcdadmConfigSpec `json:"etcdadmConfigSpec"` 46 | } 47 | 48 | // EtcdadmClusterStatus defines the observed state of EtcdadmCluster 49 | type EtcdadmClusterStatus struct { 50 | // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster 51 | // Important: Run "make" to regenerate code after modifying this file 52 | 53 | // Total number of non-terminated machines targeted by this etcd cluster 54 | // (their labels match the selector). 55 | // +optional 56 | ReadyReplicas int32 `json:"replicas,omitempty"` 57 | 58 | // +optional 59 | InitMachineAddress string `json:"initMachineAddress"` 60 | 61 | // +optional 62 | Initialized bool `json:"initialized"` 63 | 64 | // Ready reflects the state of the etcd cluster, whether all of its members have passed healthcheck and are ready to serve requests or not. 65 | // +optional 66 | Ready bool `json:"ready"` 67 | 68 | // CreationComplete gets set to true once the etcd cluster is created. Its value never changes after that. 69 | // It is used as a way to indicate that the periodic healthcheck loop can be run for the particular etcd cluster. 70 | // +optional 71 | CreationComplete bool `json:"creationComplete"` 72 | 73 | // +optional 74 | Endpoints string `json:"endpoints"` 75 | 76 | // Selector is the label selector in string format to avoid introspection 77 | // by clients, and is used to provide the CRD-based integration for the 78 | // scale subresource and additional integrations for things like kubectl 79 | // describe.. The string will be in the same format as the query-param syntax. 80 | // More info about label selectors: http://kubernetes.io/docs/user-guide/labels#label-selectors 81 | // +optional 82 | Selector string `json:"selector,omitempty"` 83 | 84 | // ObservedGeneration is the latest generation observed by the controller. 85 | // +optional 86 | ObservedGeneration int64 `json:"observedGeneration,omitempty"` 87 | 88 | // Conditions defines current service state of the EtcdadmCluster. 89 | // +optional 90 | Conditions clusterv1.Conditions `json:"conditions,omitempty"` 91 | } 92 | 93 | // +kubebuilder:object:root=true 94 | // +kubebuilder:subresource:status 95 | // EtcdadmCluster is the Schema for the etcdadmclusters API 96 | type EtcdadmCluster struct { 97 | metav1.TypeMeta `json:",inline"` 98 | metav1.ObjectMeta `json:"metadata,omitempty"` 99 | 100 | Spec EtcdadmClusterSpec `json:"spec,omitempty"` 101 | Status EtcdadmClusterStatus `json:"status,omitempty"` 102 | } 103 | 104 | func (in *EtcdadmCluster) GetConditions() clusterv1.Conditions { 105 | return in.Status.Conditions 106 | } 107 | 108 | func (in *EtcdadmCluster) SetConditions(conditions clusterv1.Conditions) { 109 | in.Status.Conditions = conditions 110 | } 111 | 112 | // +kubebuilder:object:root=true 113 | 114 | // EtcdadmClusterList contains a list of EtcdadmCluster 115 | type EtcdadmClusterList struct { 116 | metav1.TypeMeta `json:",inline"` 117 | metav1.ListMeta `json:"metadata,omitempty"` 118 | Items []EtcdadmCluster `json:"items"` 119 | } 120 | 121 | func init() { 122 | SchemeBuilder.Register(&EtcdadmCluster{}, &EtcdadmClusterList{}) 123 | } 124 | -------------------------------------------------------------------------------- /controllers/machines.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "context" 5 | 6 | etcdv1 "github.com/aws/etcdadm-controller/api/v1beta1" 7 | "github.com/go-logr/logr" 8 | "github.com/pkg/errors" 9 | "k8s.io/klog/v2" 10 | clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" 11 | "sigs.k8s.io/cluster-api/util/collections" 12 | v1beta1conditions "sigs.k8s.io/cluster-api/util/deprecated/v1beta1/conditions" 13 | "sigs.k8s.io/controller-runtime/pkg/client" 14 | ) 15 | 16 | // TODO(g-gaston): remove this once we have a stable CAPI repo that contains this, 17 | // MachineEtcdReadyLabelName is the label set on machines that have succesfully joined the etcd cluster. 18 | const MachineEtcdReadyLabelName = "cluster.x-k8s.io/etcd-ready" 19 | 20 | type etcdMachines map[string]etcdMachine 21 | 22 | // endpoints returns all the API endpoints for the machines that have one available. 23 | func (e etcdMachines) endpoints() []string { 24 | endpoints := make([]string, 0, len(e)) 25 | for _, m := range e { 26 | if m.endpoint != "" { 27 | endpoints = append(endpoints, m.endpoint) 28 | } 29 | } 30 | 31 | return endpoints 32 | } 33 | 34 | // etcdMachine represents a Machine that should be a member of an etcd cluster. 35 | type etcdMachine struct { 36 | *clusterv1.Machine 37 | endpoint string 38 | listening bool 39 | healthError error 40 | } 41 | 42 | func (e etcdMachine) healthy() bool { 43 | return e.listening && e.healthError == nil 44 | } 45 | 46 | // updateMachinesEtcdReadyLabel adds the etcd-ready label to the machines that have joined the etcd cluster. 47 | func (r *EtcdadmClusterReconciler) updateMachinesEtcdReadyLabel(ctx context.Context, log logr.Logger, machines etcdMachines) error { 48 | for _, m := range machines { 49 | if _, ok := m.Labels[MachineEtcdReadyLabelName]; ok { 50 | continue 51 | } 52 | 53 | if !m.healthy() { 54 | log.Info("Machine not healthy yet", "machine", klog.KObj(m.Machine), "listening", m.listening, "healthError", m.healthError, "endpoint", m.endpoint) 55 | continue 56 | } 57 | 58 | m.Labels[MachineEtcdReadyLabelName] = "true" 59 | if err := r.Client.Update(ctx, m.Machine); err != nil { 60 | return errors.Wrapf(err, "adding etcd ready label to machine %s", m.Name) 61 | } 62 | } 63 | 64 | return nil 65 | } 66 | 67 | // checkOwnedMachines verifies the health of all etcd members. 68 | func (r *EtcdadmClusterReconciler) checkOwnedMachines(ctx context.Context, log logr.Logger, etcdadmCluster *etcdv1.EtcdadmCluster, cluster *clusterv1.Cluster) (etcdMachines, error) { 69 | ownedMachines, err := r.getCurrentOwnedMachines(ctx, etcdadmCluster, cluster) 70 | if err != nil { 71 | return nil, err 72 | } 73 | 74 | machines := make(etcdMachines, len(ownedMachines)) 75 | for k, machine := range ownedMachines { 76 | m := etcdMachine{Machine: machine} 77 | endpoint := getMachineEtcdEndpoint(machine) 78 | if endpoint == "" { 79 | machines[k] = m 80 | continue 81 | } 82 | 83 | err := r.performEndpointHealthCheck(ctx, cluster, endpoint, true) 84 | // This is not ideal, performEndpointHealthCheck uses an error to signal both a not ready/unhealthy member 85 | // and also transient errors when performing such check. 86 | // Ideally we would separate these 2 so we can abort on error and mark as unhealthy separetly 87 | m.healthError = err 88 | if errors.Is(err, portNotOpenErr) { 89 | log.Info("Machine is not listening yet, this is probably transient, while etcd starts", "endpoint", endpoint) 90 | } else { 91 | m.endpoint = endpoint 92 | m.listening = true 93 | } 94 | 95 | machines[k] = m 96 | } 97 | 98 | return machines, nil 99 | } 100 | 101 | // getCurrentOwnedMachines lists all the owned machines by the etcdadm cluster. 102 | func (r *EtcdadmClusterReconciler) getCurrentOwnedMachines(ctx context.Context, etcdadmCluster *etcdv1.EtcdadmCluster, cluster *clusterv1.Cluster) (collections.Machines, error) { 103 | var client client.Reader 104 | if v1beta1conditions.IsFalse(etcdadmCluster, etcdv1.EtcdMachinesSpecUpToDateCondition) { 105 | // During upgrade with current logic, outdated machines don't get deleted right away. 106 | // the controller removes their etcdadmCluster ownerRef and updates the Machine. So using uncachedClient here will fetch those changes 107 | client = r.uncachedClient 108 | } else { 109 | client = r.Client 110 | } 111 | etcdMachines, err := collections.GetFilteredMachinesForCluster(ctx, client, cluster, EtcdClusterMachines(cluster.Name, etcdadmCluster.Name)) 112 | if err != nil { 113 | return nil, errors.Wrap(err, "reading machines for etcd cluster") 114 | } 115 | ownedMachines := etcdMachines.Filter(collections.OwnedMachines(etcdadmCluster)) 116 | 117 | return ownedMachines, nil 118 | } 119 | 120 | // getMachineEtcdEndpoint constructs the full API url for an etcd member Machine. 121 | // If the Machine doesn't have yet the right address, it returns empty string. 122 | func getMachineEtcdEndpoint(machine *clusterv1.Machine) string { 123 | address := getEtcdMachineAddress(machine) 124 | if address == "" { 125 | return "" 126 | } 127 | 128 | return getMemberClientURL(address) 129 | } 130 | -------------------------------------------------------------------------------- /controllers/status_test.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "testing" 5 | 6 | etcdv1 "github.com/aws/etcdadm-controller/api/v1beta1" 7 | v1beta1conditions "sigs.k8s.io/cluster-api/util/deprecated/v1beta1/conditions" 8 | "sigs.k8s.io/controller-runtime/pkg/client" 9 | "sigs.k8s.io/controller-runtime/pkg/client/fake" 10 | "sigs.k8s.io/controller-runtime/pkg/log" 11 | 12 | . "github.com/onsi/gomega" 13 | ) 14 | 15 | func TestUpdateStatusResizeIncomplete(t *testing.T) { 16 | g := NewWithT(t) 17 | 18 | cluster := newClusterWithExternalEtcd() 19 | etcdadmCluster := newEtcdadmCluster(cluster) 20 | 21 | machine1 := newEtcdMachine(etcdadmCluster, cluster) 22 | machine2 := newEtcdMachine(etcdadmCluster, cluster) 23 | 24 | etcdMachine1 := etcdMachine{ 25 | Machine: machine1, 26 | endpoint: "1.1.1.1", 27 | listening: true, 28 | healthError: nil, 29 | } 30 | etcdMachine2 := etcdMachine{ 31 | Machine: machine2, 32 | endpoint: "1.1.1.1", 33 | listening: true, 34 | healthError: nil, 35 | } 36 | 37 | ownedMachines := map[string]etcdMachine{ 38 | "machine1": etcdMachine1, 39 | "machine2": etcdMachine2, 40 | } 41 | 42 | objects := []client.Object{ 43 | cluster, 44 | etcdadmCluster, 45 | infraTemplate.DeepCopy(), 46 | machine1, 47 | machine2, 48 | } 49 | 50 | fakeClient := fake.NewClientBuilder().WithScheme(setupScheme()).WithObjects(objects...).Build() 51 | 52 | r := &EtcdadmClusterReconciler{ 53 | Client: fakeClient, 54 | uncachedClient: fakeClient, 55 | Log: log.Log, 56 | } 57 | 58 | err := r.updateStatus(ctx, etcdadmCluster, cluster, ownedMachines) 59 | g.Expect(err).NotTo(HaveOccurred()) 60 | g.Expect(v1beta1conditions.IsTrue(etcdadmCluster, etcdv1.EtcdClusterResizeCompleted)).To(BeFalse()) 61 | } 62 | 63 | func TestUpdateStatusMachineUnhealthy(t *testing.T) { 64 | g := NewWithT(t) 65 | 66 | cluster := newClusterWithExternalEtcd() 67 | etcdadmCluster := newEtcdadmCluster(cluster) 68 | 69 | machine1 := newEtcdMachine(etcdadmCluster, cluster) 70 | machine2 := newEtcdMachine(etcdadmCluster, cluster) 71 | 72 | etcdMachine1 := etcdMachine{ 73 | Machine: machine1, 74 | endpoint: "1.1.1.1", 75 | listening: true, 76 | healthError: nil, 77 | } 78 | etcdMachine2 := etcdMachine{ 79 | Machine: machine2, 80 | endpoint: "1.1.1.1", 81 | listening: false, 82 | healthError: nil, 83 | } 84 | 85 | ownedMachines := map[string]etcdMachine{ 86 | "machine1": etcdMachine1, 87 | "machine2": etcdMachine2, 88 | } 89 | 90 | objects := []client.Object{ 91 | cluster, 92 | etcdadmCluster, 93 | infraTemplate.DeepCopy(), 94 | machine1, 95 | machine2, 96 | } 97 | 98 | fakeClient := fake.NewClientBuilder().WithScheme(setupScheme()).WithObjects(objects...).Build() 99 | 100 | r := &EtcdadmClusterReconciler{ 101 | Client: fakeClient, 102 | uncachedClient: fakeClient, 103 | Log: log.Log, 104 | } 105 | 106 | err := r.updateStatus(ctx, etcdadmCluster, cluster, ownedMachines) 107 | g.Expect(etcdadmCluster.Status.ReadyReplicas).To(Equal(int32(1))) 108 | g.Expect(err).NotTo(HaveOccurred()) 109 | g.Expect(v1beta1conditions.IsTrue(etcdadmCluster, etcdv1.EtcdClusterResizeCompleted)).To(BeFalse()) 110 | } 111 | 112 | func TestUpdateStatusResizeComplete(t *testing.T) { 113 | g := NewWithT(t) 114 | 115 | cluster := newClusterWithExternalEtcd() 116 | etcdadmCluster := newEtcdadmCluster(cluster) 117 | 118 | machine1 := newEtcdMachine(etcdadmCluster, cluster) 119 | machine2 := newEtcdMachine(etcdadmCluster, cluster) 120 | machine3 := newEtcdMachine(etcdadmCluster, cluster) 121 | 122 | etcdMachine1 := etcdMachine{ 123 | Machine: machine1, 124 | endpoint: "1.1.1.1", 125 | listening: true, 126 | healthError: nil, 127 | } 128 | etcdMachine2 := etcdMachine{ 129 | Machine: machine2, 130 | endpoint: "1.1.1.1", 131 | listening: true, 132 | healthError: nil, 133 | } 134 | etcdMachine3 := etcdMachine{ 135 | Machine: machine3, 136 | endpoint: "1.1.1.1", 137 | listening: true, 138 | healthError: nil, 139 | } 140 | 141 | ownedMachines := map[string]etcdMachine{ 142 | "machine1": etcdMachine1, 143 | "machine2": etcdMachine2, 144 | "machine3": etcdMachine3, 145 | } 146 | 147 | objects := []client.Object{ 148 | cluster, 149 | etcdadmCluster, 150 | infraTemplate.DeepCopy(), 151 | machine1, 152 | machine2, 153 | machine3, 154 | } 155 | 156 | fakeClient := fake.NewClientBuilder().WithScheme(setupScheme()).WithObjects(objects...).Build() 157 | 158 | r := &EtcdadmClusterReconciler{ 159 | Client: fakeClient, 160 | uncachedClient: fakeClient, 161 | Log: log.Log, 162 | } 163 | 164 | err := r.updateStatus(ctx, etcdadmCluster, cluster, ownedMachines) 165 | // Init secret not defined, so error will occur. This test checks that the resizeComplete condition is properly set 166 | // which happens before the updating init secret stage of updateStatus. 167 | g.Expect(err).To(HaveOccurred()) 168 | g.Expect(v1beta1conditions.IsTrue(etcdadmCluster, etcdv1.EtcdClusterResizeCompleted)).To(BeTrue()) 169 | } 170 | -------------------------------------------------------------------------------- /api/v1beta1/etcdadmcluster_types.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1beta1 18 | 19 | import ( 20 | etcdbp "github.com/aws/etcdadm-bootstrap-provider/api/v1beta1" 21 | corev1 "k8s.io/api/core/v1" 22 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 23 | clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta1" 24 | ) 25 | 26 | const ( 27 | UpgradeInProgressAnnotation = "etcdcluster.cluster.x-k8s.io/upgrading" 28 | 29 | // HealthCheckRetriesAnnotation allows users to configure healthcheck retries. When set to 0, it disables healthchecks. 30 | HealthCheckRetriesAnnotation = "etcdcluster.cluster.x-k8s.io/healthcheck-retries" 31 | 32 | // EtcdadmClusterFinalizer is the finalizer applied to EtcdadmCluster resources 33 | // by its managing controller. 34 | EtcdadmClusterFinalizer = "etcdcluster.cluster.x-k8s.io" 35 | ) 36 | 37 | // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! 38 | // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. 39 | 40 | // EtcdadmClusterSpec defines the desired state of EtcdadmCluster 41 | type EtcdadmClusterSpec struct { 42 | // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster 43 | // Important: Run "make" to regenerate code after modifying this file 44 | 45 | Replicas *int32 `json:"replicas,omitempty"` 46 | 47 | // InfrastructureTemplate is a required reference to a custom resource 48 | // offered by an infrastructure provider. 49 | InfrastructureTemplate corev1.ObjectReference `json:"infrastructureTemplate"` 50 | 51 | // +optional 52 | EtcdadmConfigSpec etcdbp.EtcdadmConfigSpec `json:"etcdadmConfigSpec"` 53 | } 54 | 55 | // EtcdadmClusterStatus defines the observed state of EtcdadmCluster 56 | type EtcdadmClusterStatus struct { 57 | // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster 58 | // Important: Run "make" to regenerate code after modifying this file 59 | 60 | // Total number of non-terminated machines targeted by this etcd cluster 61 | // (their labels match the selector). 62 | // +optional 63 | ReadyReplicas int32 `json:"replicas,omitempty"` 64 | 65 | // +optional 66 | InitMachineAddress string `json:"initMachineAddress"` 67 | 68 | // +optional 69 | Initialized bool `json:"initialized"` 70 | 71 | // Ready reflects the state of the etcd cluster, whether all of its members have passed healthcheck and are ready to serve requests or not. 72 | // +optional 73 | Ready bool `json:"ready"` 74 | 75 | // CreationComplete gets set to true once the etcd cluster is created. Its value never changes after that. 76 | // It is used as a way to indicate that the periodic healthcheck loop can be run for the particular etcd cluster. 77 | // +optional 78 | CreationComplete bool `json:"creationComplete"` 79 | 80 | // +optional 81 | Endpoints string `json:"endpoints"` 82 | 83 | // Selector is the label selector in string format to avoid introspection 84 | // by clients, and is used to provide the CRD-based integration for the 85 | // scale subresource and additional integrations for things like kubectl 86 | // describe.. The string will be in the same format as the query-param syntax. 87 | // More info about label selectors: http://kubernetes.io/docs/user-guide/labels#label-selectors 88 | // +optional 89 | Selector string `json:"selector,omitempty"` 90 | 91 | // ObservedGeneration is the latest generation observed by the controller. 92 | // +optional 93 | ObservedGeneration int64 `json:"observedGeneration,omitempty"` 94 | 95 | // Conditions defines current service state of the EtcdadmCluster. 96 | // +optional 97 | Conditions clusterv1.Conditions `json:"conditions,omitempty"` 98 | } 99 | 100 | // +kubebuilder:object:root=true 101 | // +kubebuilder:subresource:status 102 | // +kubebuilder:storageversion 103 | 104 | // EtcdadmCluster is the Schema for the etcdadmclusters API 105 | type EtcdadmCluster struct { 106 | metav1.TypeMeta `json:",inline"` 107 | metav1.ObjectMeta `json:"metadata,omitempty"` 108 | 109 | Spec EtcdadmClusterSpec `json:"spec,omitempty"` 110 | Status EtcdadmClusterStatus `json:"status,omitempty"` 111 | } 112 | 113 | func (in *EtcdadmCluster) GetConditions() clusterv1.Conditions { 114 | return in.Status.Conditions 115 | } 116 | 117 | func (in *EtcdadmCluster) SetConditions(conditions clusterv1.Conditions) { 118 | in.Status.Conditions = conditions 119 | } 120 | 121 | // +kubebuilder:object:root=true 122 | 123 | // EtcdadmClusterList contains a list of EtcdadmCluster 124 | type EtcdadmClusterList struct { 125 | metav1.TypeMeta `json:",inline"` 126 | metav1.ListMeta `json:"metadata,omitempty"` 127 | Items []EtcdadmCluster `json:"items"` 128 | } 129 | 130 | func init() { 131 | SchemeBuilder.Register(&EtcdadmCluster{}, &EtcdadmClusterList{}) 132 | } 133 | -------------------------------------------------------------------------------- /controllers/scale.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "context" 5 | "crypto/tls" 6 | "crypto/x509" 7 | "fmt" 8 | "strings" 9 | "time" 10 | 11 | etcdv1 "github.com/aws/etcdadm-controller/api/v1beta1" 12 | "github.com/pkg/errors" 13 | clientv3 "go.etcd.io/etcd/client/v3" 14 | apierrors "k8s.io/apimachinery/pkg/api/errors" 15 | clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" 16 | "sigs.k8s.io/cluster-api/util/collections" 17 | v1beta1conditions "sigs.k8s.io/cluster-api/util/deprecated/v1beta1/conditions" 18 | ctrl "sigs.k8s.io/controller-runtime" 19 | "sigs.k8s.io/etcdadm/constants" 20 | ) 21 | 22 | const etcdClientTimeout = 5 * time.Second 23 | 24 | func (r *EtcdadmClusterReconciler) intializeEtcdCluster(ctx context.Context, ec *etcdv1.EtcdadmCluster, cluster *clusterv1.Cluster, ep *EtcdPlane) (ctrl.Result, error) { 25 | if err := r.generateCAandClientCertSecrets(ctx, cluster, ec); err != nil { 26 | r.Log.Error(err, "error generating etcd CA certs") 27 | return ctrl.Result{}, err 28 | } 29 | v1beta1conditions.MarkTrue(ec, etcdv1.EtcdCertificatesAvailableCondition) 30 | fd := ep.NextFailureDomainForScaleUp() 31 | return r.cloneConfigsAndGenerateMachine(ctx, ec, cluster, fd) 32 | } 33 | 34 | func (r *EtcdadmClusterReconciler) scaleUpEtcdCluster(ctx context.Context, ec *etcdv1.EtcdadmCluster, cluster *clusterv1.Cluster, ep *EtcdPlane) (ctrl.Result, error) { 35 | fd := ep.NextFailureDomainForScaleUp() 36 | return r.cloneConfigsAndGenerateMachine(ctx, ec, cluster, fd) 37 | } 38 | 39 | func (r *EtcdadmClusterReconciler) scaleDownEtcdCluster(ctx context.Context, ec *etcdv1.EtcdadmCluster, cluster *clusterv1.Cluster, ep *EtcdPlane, outdatedMachines collections.Machines) (ctrl.Result, error) { 40 | // Pick the Machine that we should scale down. 41 | machineToDelete, err := selectMachineForScaleDown(ep, outdatedMachines) 42 | if err != nil || machineToDelete == nil { 43 | return ctrl.Result{}, errors.Wrap(err, "failed to select machine for scale down") 44 | } 45 | machineAddress := getEtcdMachineAddress(machineToDelete) 46 | return ctrl.Result{}, r.removeEtcdMachine(ctx, ec, cluster, machineToDelete, machineAddress) 47 | } 48 | 49 | func (r *EtcdadmClusterReconciler) removeEtcdMachine(ctx context.Context, ec *etcdv1.EtcdadmCluster, cluster *clusterv1.Cluster, machineToDelete *clusterv1.Machine, machineAddress string) error { 50 | peerURL := fmt.Sprintf("https://%s:2380", machineAddress) 51 | etcdClient, err := r.GetEtcdClient(ctx, cluster, ec.Status.Endpoints) 52 | if err != nil { 53 | return fmt.Errorf("error creating etcd client, err: %v", err) 54 | } 55 | if etcdClient == nil { 56 | return fmt.Errorf("could not create etcd client") 57 | } 58 | defer func() { _ = etcdClient.Close() }() 59 | 60 | return r.removeEtcdMemberAndDeleteMachine(ctx, etcdClient, peerURL, machineToDelete) 61 | 62 | } 63 | 64 | func (r *EtcdadmClusterReconciler) generateEtcdClient(ctx context.Context, cluster *clusterv1.Cluster, endpoints string) (EtcdClient, error) { 65 | caCertPool := x509.NewCertPool() 66 | caCert, err := r.getCACert(ctx, cluster) 67 | if err != nil { 68 | return nil, err 69 | } 70 | caCertPool.AppendCertsFromPEM(caCert) 71 | 72 | clientCert, err := r.getClientCerts(ctx, cluster) 73 | if err != nil { 74 | return nil, errors.Wrap(err, "error getting client cert for healthcheck") 75 | } 76 | 77 | etcdClient, err := clientv3.New(clientv3.Config{ 78 | Endpoints: strings.Split(endpoints, ","), 79 | DialTimeout: etcdClientTimeout, 80 | TLS: &tls.Config{ 81 | RootCAs: caCertPool, 82 | Certificates: []tls.Certificate{clientCert}, 83 | }, 84 | }) 85 | 86 | return etcdClient, err 87 | } 88 | 89 | func (r *EtcdadmClusterReconciler) removeEtcdMemberAndDeleteMachine(ctx context.Context, etcdClient EtcdClient, peerURL string, machineToDelete *clusterv1.Machine) error { 90 | log := r.Log 91 | // Etcdadm has a "reset" command to remove an etcd member. But we can't run that command on the CAPI machine object after it's provisioned. 92 | // so the following logic is based on how etcdadm performs "reset" https://github.com/kubernetes-sigs/etcdadm/blob/master/cmd/reset.go#L65 93 | etcdCtx, cancel := context.WithTimeout(ctx, constants.DefaultEtcdRequestTimeout) 94 | mresp, err := etcdClient.MemberList(etcdCtx) 95 | cancel() 96 | if err != nil { 97 | return fmt.Errorf("error listing members: %v", err) 98 | } 99 | localMember, ok := memberForPeerURLs(mresp, []string{peerURL}) 100 | if ok { 101 | if len(mresp.Members) > 1 { 102 | log.Info("Removing", "member", localMember.Name) 103 | etcdCtx, cancel = context.WithTimeout(ctx, constants.DefaultEtcdRequestTimeout) 104 | _, err = etcdClient.MemberRemove(etcdCtx, localMember.ID) 105 | cancel() 106 | if err != nil { 107 | return fmt.Errorf("failed to remove etcd member %s with error %v", localMember.Name, err) 108 | } 109 | if machineToDelete != nil { 110 | if err := r.Client.Delete(ctx, machineToDelete); err != nil && !apierrors.IsNotFound(err) && !apierrors.IsGone(err) { 111 | return fmt.Errorf("failed to delete etcd machine %s with error %v", machineToDelete.Name, err) 112 | } 113 | } 114 | } else { 115 | log.Info("Not removing last member in the cluster", "member", localMember.Name) 116 | } 117 | } else { 118 | log.Info("Member was removed") 119 | if machineToDelete != nil { 120 | // this could happen if the etcd member was removed through etcdctl calls, ensure that the machine gets deleted too 121 | if err := r.Client.Delete(ctx, machineToDelete); err != nil && !apierrors.IsNotFound(err) && !apierrors.IsGone(err) { 122 | return fmt.Errorf("failed to delete etcd machine %s with error %v", machineToDelete.Name, err) 123 | } 124 | } 125 | } 126 | return nil 127 | } 128 | -------------------------------------------------------------------------------- /api/v1beta1/etcdadmcluster_webhook.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1beta1 18 | 19 | import ( 20 | "context" 21 | 22 | apierrors "k8s.io/apimachinery/pkg/api/errors" 23 | "k8s.io/apimachinery/pkg/runtime" 24 | "k8s.io/apimachinery/pkg/util/validation/field" 25 | ctrl "sigs.k8s.io/controller-runtime" 26 | logf "sigs.k8s.io/controller-runtime/pkg/log" 27 | "sigs.k8s.io/controller-runtime/pkg/webhook" 28 | "sigs.k8s.io/controller-runtime/pkg/webhook/admission" 29 | ) 30 | 31 | // log is for logging in this package. 32 | var etcdadmclusterlog = logf.Log.WithName("etcdadmcluster-resource") 33 | 34 | func (r *EtcdadmCluster) SetupWebhookWithManager(mgr ctrl.Manager) error { 35 | return ctrl.NewWebhookManagedBy(mgr). 36 | For(r). 37 | WithDefaulter(r). 38 | WithValidator(r). 39 | Complete() 40 | } 41 | 42 | // +kubebuilder:webhook:verbs=create;update,path=/mutate-etcdcluster-cluster-x-k8s-io-v1beta1-etcdadmcluster,mutating=true,failurePolicy=fail,groups=etcdcluster.cluster.x-k8s.io,resources=etcdadmclusters,versions=v1beta1,name=metcdadmcluster.kb.io,sideEffects=None,admissionReviewVersions=v1;v1beta1 43 | 44 | var _ webhook.CustomDefaulter = &EtcdadmCluster{} 45 | 46 | // +kubebuilder:webhook:verbs=create;update,path=/validate-etcdcluster-cluster-x-k8s-io-v1beta1-etcdadmcluster,mutating=false,failurePolicy=fail,groups=etcdcluster.cluster.x-k8s.io,resources=etcdadmclusters,versions=v1beta1,name=vetcdadmcluster.kb.io,sideEffects=None,admissionReviewVersions=v1;v1beta1 47 | 48 | var _ webhook.CustomValidator = &EtcdadmCluster{} 49 | 50 | // Default implements webhook.CustomDefaulter so a webhook will be registered for the type 51 | func (r *EtcdadmCluster) Default(ctx context.Context, obj runtime.Object) error { 52 | cluster, ok := obj.(*EtcdadmCluster) 53 | if !ok { 54 | return apierrors.NewBadRequest("expected an EtcdadmCluster but got a different type") 55 | } 56 | 57 | etcdadmclusterlog.Info("default", "name", cluster.Name) 58 | 59 | if cluster.Spec.Replicas == nil { 60 | replicas := int32(1) 61 | cluster.Spec.Replicas = &replicas 62 | } 63 | 64 | if cluster.Spec.InfrastructureTemplate.Namespace == "" { 65 | cluster.Spec.InfrastructureTemplate.Namespace = cluster.Namespace 66 | } 67 | 68 | return nil 69 | } 70 | 71 | // ValidateCreate implements webhook.CustomValidator so a webhook will be registered for the type 72 | func (r *EtcdadmCluster) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) { 73 | cluster, ok := obj.(*EtcdadmCluster) 74 | if !ok { 75 | return nil, apierrors.NewBadRequest("expected an EtcdadmCluster but got a different type") 76 | } 77 | 78 | etcdadmclusterlog.Info("validate create", "name", cluster.Name) 79 | 80 | allErrs := cluster.validateCommon() 81 | if len(allErrs) > 0 { 82 | return nil, apierrors.NewInvalid(GroupVersion.WithKind("EtcdadmCluster").GroupKind(), cluster.Name, allErrs) 83 | } 84 | return nil, nil 85 | } 86 | 87 | // ValidateUpdate implements webhook.CustomValidator so a webhook will be registered for the type 88 | func (r *EtcdadmCluster) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) { 89 | cluster, ok := newObj.(*EtcdadmCluster) 90 | if !ok { 91 | return nil, apierrors.NewBadRequest("expected an EtcdadmCluster but got a different type") 92 | } 93 | 94 | etcdadmclusterlog.Info("validate update", "name", cluster.Name) 95 | 96 | allErrs := cluster.validateCommon() 97 | if len(allErrs) > 0 { 98 | return nil, apierrors.NewInvalid(GroupVersion.WithKind("EtcdadmCluster").GroupKind(), cluster.Name, allErrs) 99 | } 100 | return nil, nil 101 | } 102 | 103 | // ValidateDelete implements webhook.CustomValidator so a webhook will be registered for the type 104 | func (r *EtcdadmCluster) ValidateDelete(ctx context.Context, obj runtime.Object) (admission.Warnings, error) { 105 | cluster, ok := obj.(*EtcdadmCluster) 106 | if !ok { 107 | return nil, apierrors.NewBadRequest("expected an EtcdadmCluster but got a different type") 108 | } 109 | 110 | etcdadmclusterlog.Info("validate delete", "name", cluster.Name) 111 | 112 | // TODO(user): fill in your validation logic upon object deletion. 113 | return nil, nil 114 | } 115 | 116 | func (r *EtcdadmCluster) validateCommon() (allErrs field.ErrorList) { 117 | if r.Spec.Replicas == nil { 118 | allErrs = append( 119 | allErrs, 120 | field.Required( 121 | field.NewPath("spec", "replicas"), 122 | "is required", 123 | ), 124 | ) 125 | } else if *r.Spec.Replicas <= 0 { 126 | allErrs = append( 127 | allErrs, 128 | field.Forbidden( 129 | field.NewPath("spec", "replicas"), 130 | "cannot be less than or equal to 0", 131 | ), 132 | ) 133 | } else if r.Spec.Replicas != nil && *r.Spec.Replicas%2 == 0 { 134 | allErrs = append( 135 | allErrs, 136 | field.Forbidden( 137 | field.NewPath("spec", "replicas"), 138 | "etcd cluster cannot have an even number of nodes", 139 | ), 140 | ) 141 | } 142 | 143 | if r.Spec.InfrastructureTemplate.Namespace != r.Namespace { 144 | allErrs = append( 145 | allErrs, 146 | field.Invalid( 147 | field.NewPath("spec", "infrastructureTemplate", "namespace"), 148 | r.Spec.InfrastructureTemplate.Namespace, 149 | "must match metadata.namespace", 150 | ), 151 | ) 152 | } 153 | 154 | return allErrs 155 | } 156 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/aws/etcdadm-controller 2 | 3 | go 1.24.0 4 | 5 | toolchain go1.24.7 6 | 7 | require ( 8 | github.com/aws/etcdadm-bootstrap-provider v1.0.12 9 | github.com/go-logr/logr v1.4.3 10 | github.com/golang/mock v1.4.4 11 | github.com/google/uuid v1.6.0 12 | github.com/hashicorp/go-multierror v1.1.1 13 | github.com/onsi/ginkgo/v2 v2.23.4 14 | github.com/onsi/gomega v1.38.0 15 | github.com/pkg/errors v0.9.1 16 | github.com/spf13/pflag v1.0.7 17 | go.etcd.io/etcd/api/v3 v3.5.22 18 | go.etcd.io/etcd/client/v3 v3.5.22 19 | k8s.io/api v0.33.3 20 | k8s.io/apimachinery v0.33.3 21 | k8s.io/apiserver v0.33.3 22 | k8s.io/client-go v0.33.3 23 | k8s.io/klog/v2 v2.130.1 24 | k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 25 | sigs.k8s.io/cluster-api v1.11.1 26 | sigs.k8s.io/controller-runtime v0.21.0 27 | sigs.k8s.io/etcdadm v0.1.5 28 | ) 29 | 30 | require ( 31 | cel.dev/expr v0.19.1 // indirect 32 | github.com/NYTimes/gziphandler v1.1.1 // indirect 33 | github.com/antlr4-go/antlr/v4 v4.13.0 // indirect 34 | github.com/beorn7/perks v1.0.1 // indirect 35 | github.com/blang/semver/v4 v4.0.0 // indirect 36 | github.com/cenkalti/backoff/v4 v4.3.0 // indirect 37 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 38 | github.com/coreos/go-semver v0.3.1 // indirect 39 | github.com/coreos/go-systemd/v22 v22.5.0 // indirect 40 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect 41 | github.com/emicklei/go-restful/v3 v3.12.2 // indirect 42 | github.com/evanphx/json-patch/v5 v5.9.11 // indirect 43 | github.com/felixge/httpsnoop v1.0.4 // indirect 44 | github.com/fsnotify/fsnotify v1.8.0 // indirect 45 | github.com/fxamacker/cbor/v2 v2.7.0 // indirect 46 | github.com/go-logr/stdr v1.2.2 // indirect 47 | github.com/go-logr/zapr v1.3.0 // indirect 48 | github.com/go-openapi/jsonpointer v0.21.0 // indirect 49 | github.com/go-openapi/jsonreference v0.20.2 // indirect 50 | github.com/go-openapi/swag v0.23.0 // indirect 51 | github.com/go-task/slim-sprig/v3 v3.0.0 // indirect 52 | github.com/gobuffalo/flect v1.0.3 // indirect 53 | github.com/gogo/protobuf v1.3.2 // indirect 54 | github.com/golang/protobuf v1.5.4 // indirect 55 | github.com/google/btree v1.1.3 // indirect 56 | github.com/google/cel-go v0.23.2 // indirect 57 | github.com/google/gnostic-models v0.6.9 // indirect 58 | github.com/google/go-cmp v0.7.0 // indirect 59 | github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect 60 | github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0 // indirect 61 | github.com/hashicorp/errwrap v1.0.0 // indirect 62 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 63 | github.com/josharian/intern v1.0.0 // indirect 64 | github.com/json-iterator/go v1.1.12 // indirect 65 | github.com/kylelemons/godebug v1.1.0 // indirect 66 | github.com/mailru/easyjson v0.7.7 // indirect 67 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 68 | github.com/modern-go/reflect2 v1.0.2 // indirect 69 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 70 | github.com/prometheus/client_golang v1.22.0 // indirect 71 | github.com/prometheus/client_model v0.6.1 // indirect 72 | github.com/prometheus/common v0.62.0 // indirect 73 | github.com/prometheus/procfs v0.15.1 // indirect 74 | github.com/satori/go.uuid v1.2.0 // indirect 75 | github.com/spf13/cobra v1.9.1 // indirect 76 | github.com/stoewer/go-strcase v1.3.0 // indirect 77 | github.com/x448/float16 v0.8.4 // indirect 78 | go.etcd.io/etcd/client/pkg/v3 v3.5.22 // indirect 79 | go.opentelemetry.io/auto/sdk v1.1.0 // indirect 80 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 // indirect 81 | go.opentelemetry.io/otel v1.34.0 // indirect 82 | go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 // indirect 83 | go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 // indirect 84 | go.opentelemetry.io/otel/metric v1.34.0 // indirect 85 | go.opentelemetry.io/otel/sdk v1.34.0 // indirect 86 | go.opentelemetry.io/otel/trace v1.34.0 // indirect 87 | go.opentelemetry.io/proto/otlp v1.4.0 // indirect 88 | go.uber.org/automaxprocs v1.6.0 // indirect 89 | go.uber.org/multierr v1.11.0 // indirect 90 | go.uber.org/zap v1.27.0 // indirect 91 | go.yaml.in/yaml/v2 v2.4.2 // indirect 92 | golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect 93 | golang.org/x/net v0.42.0 // indirect 94 | golang.org/x/oauth2 v0.30.0 // indirect 95 | golang.org/x/sync v0.16.0 // indirect 96 | golang.org/x/sys v0.34.0 // indirect 97 | golang.org/x/term v0.33.0 // indirect 98 | golang.org/x/text v0.27.0 // indirect 99 | golang.org/x/time v0.9.0 // indirect 100 | golang.org/x/tools v0.34.0 // indirect 101 | gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect 102 | google.golang.org/genproto/googleapis/api v0.0.0-20250106144421-5f5ef82da422 // indirect 103 | google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f // indirect 104 | google.golang.org/grpc v1.71.3 // indirect 105 | google.golang.org/protobuf v1.36.6 // indirect 106 | gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect 107 | gopkg.in/inf.v0 v0.9.1 // indirect 108 | gopkg.in/yaml.v3 v3.0.1 // indirect 109 | k8s.io/apiextensions-apiserver v0.33.3 // indirect 110 | k8s.io/cluster-bootstrap v0.33.3 // indirect 111 | k8s.io/component-base v0.33.3 // indirect 112 | k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect 113 | sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect 114 | sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect 115 | sigs.k8s.io/randfill v1.0.0 // indirect 116 | sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect 117 | sigs.k8s.io/yaml v1.6.0 // indirect 118 | ) 119 | 120 | replace ( 121 | github.com/aws/etcdadm-bootstrap-provider => github.com/panktishah26/etcdadm-bootstrap-provider v0.0.0 122 | sigs.k8s.io/cluster-api => github.com/panktishah26/cluster-api v0.1.0 123 | ) 124 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "context" 21 | "flag" 22 | "os" 23 | "os/signal" 24 | "syscall" 25 | "time" 26 | 27 | etcdbp "github.com/aws/etcdadm-bootstrap-provider/api/v1beta1" 28 | "github.com/spf13/pflag" 29 | "k8s.io/apimachinery/pkg/runtime" 30 | clientgoscheme "k8s.io/client-go/kubernetes/scheme" 31 | _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" 32 | clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" 33 | capiflags "sigs.k8s.io/cluster-api/util/flags" 34 | ctrl "sigs.k8s.io/controller-runtime" 35 | "sigs.k8s.io/controller-runtime/pkg/cache" 36 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 37 | 38 | etcdclusterv1alpha3 "github.com/aws/etcdadm-controller/api/v1alpha3" 39 | etcdclusterv1beta1 "github.com/aws/etcdadm-controller/api/v1beta1" 40 | "github.com/aws/etcdadm-controller/controllers" 41 | // +kubebuilder:scaffold:imports 42 | ) 43 | 44 | var ( 45 | scheme = runtime.NewScheme() 46 | setupLog = ctrl.Log.WithName("setup") 47 | watchNamespace string 48 | managerOptions capiflags.ManagerOptions 49 | enableLeaderElection bool 50 | ) 51 | 52 | func init() { 53 | _ = clientgoscheme.AddToScheme(scheme) 54 | 55 | _ = clusterv1.AddToScheme(scheme) 56 | _ = etcdbp.AddToScheme(scheme) 57 | _ = etcdclusterv1alpha3.AddToScheme(scheme) 58 | _ = etcdclusterv1beta1.AddToScheme(scheme) 59 | // +kubebuilder:scaffold:scheme 60 | } 61 | 62 | // +kubebuilder:rbac:groups=authentication.k8s.io,resources=tokenreviews,verbs=create 63 | // +kubebuilder:rbac:groups=authorization.k8s.io,resources=subjectaccessreviews,verbs=create 64 | func main() { 65 | var maxConcurrentReconciles int 66 | var healthcheckInterval int 67 | 68 | pflag.CommandLine.AddGoFlagSet(flag.CommandLine) 69 | capiflags.AddManagerOptions(pflag.CommandLine, &managerOptions) 70 | 71 | pflag.BoolVar(&enableLeaderElection, "enable-leader-election", false, 72 | "Enable leader election for controller manager. "+ 73 | "Enabling this will ensure there is only one active controller manager.") 74 | pflag.StringVar(&watchNamespace, "namespace", "", 75 | "Namespace that the controller watches to reconcile etcdadmCluster objects. If unspecified, the controller watches for objects across all namespaces.") 76 | pflag.IntVar(&maxConcurrentReconciles, "max-concurrent-reconciles", 10, "The maximum number of concurrent etcdadm-controller reconciles.") 77 | pflag.IntVar(&healthcheckInterval, "healthcheck-interval", 30, "The time interval between each healthcheck loop in seconds.") 78 | pflag.Parse() 79 | 80 | ctrl.SetLogger(zap.New(zap.UseDevMode(true))) 81 | 82 | _, metricsServerOpts, err := capiflags.GetManagerOptions(managerOptions) 83 | if err != nil { 84 | setupLog.Error(err, "Unable to start manager: invalid metrics server flags") 85 | os.Exit(1) 86 | } 87 | 88 | opts := ctrl.Options{ 89 | Scheme: scheme, 90 | Metrics: *metricsServerOpts, 91 | LeaderElection: enableLeaderElection, 92 | LeaderElectionID: "cc88008e.cluster.x-k8s.io", 93 | } 94 | 95 | if watchNamespace != "" { 96 | opts.Cache = cache.Options{ 97 | DefaultNamespaces: map[string]cache.Config{watchNamespace: {}}, 98 | } 99 | } 100 | 101 | mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), opts) 102 | if err != nil { 103 | setupLog.Error(err, "unable to start manager") 104 | os.Exit(1) 105 | } 106 | 107 | // Setup the context that's going to be used in controllers and for the manager. 108 | ctx, stopCh := setupSignalHandler() 109 | etcdadmReconciler := &controllers.EtcdadmClusterReconciler{ 110 | Client: mgr.GetClient(), 111 | Log: ctrl.Log.WithName("controllers").WithName("EtcdadmCluster"), 112 | Scheme: mgr.GetScheme(), 113 | MaxConcurrentReconciles: maxConcurrentReconciles, 114 | HealthCheckInterval: time.Second * time.Duration(healthcheckInterval), 115 | } 116 | if err = (etcdadmReconciler).SetupWithManager(ctx, mgr, stopCh); err != nil { 117 | setupLog.Error(err, "unable to create controller", "controller", "EtcdadmCluster") 118 | os.Exit(1) 119 | } 120 | if err = (&etcdclusterv1beta1.EtcdadmCluster{}).SetupWebhookWithManager(mgr); err != nil { 121 | setupLog.Error(err, "unable to create webhook", "webhook", "EtcdadmCluster") 122 | os.Exit(1) 123 | } 124 | // +kubebuilder:scaffold:builder 125 | 126 | setupLog.Info("starting manager") 127 | if err := mgr.Start(ctx); err != nil { 128 | setupLog.Error(err, "problem running manager") 129 | os.Exit(1) 130 | } 131 | } 132 | 133 | var onlyOneSignalHandler = make(chan struct{}) 134 | var shutdownSignals = []os.Signal{os.Interrupt, syscall.SIGTERM} 135 | 136 | /* 137 | Controller runtime 0.5.4 returns a stop channel and 0.7.0 onwards returns a context that can be passed down to SetupWithManager and reconcilers 138 | 139 | Because cluster-api v0.3.x uses controller-runtime 0.5.4 version, etcdadm-controller cannot switch to a higher controller-runtime due to version mismatch errors 140 | So this function setupSignalHandler is a modified version of controller-runtime's SetupSignalHandler that returns both, a stop channel and a context that 141 | is cancelled when this controller exits 142 | */ 143 | func setupSignalHandler() (context.Context, <-chan struct{}) { 144 | close(onlyOneSignalHandler) // panics when called twice 145 | 146 | ctx, cancel := context.WithCancel(context.Background()) 147 | stop := make(chan struct{}) 148 | c := make(chan os.Signal, 2) 149 | signal.Notify(c, shutdownSignals...) 150 | go func() { 151 | <-c 152 | cancel() 153 | close(stop) 154 | <-c 155 | os.Exit(1) // second signal. Exit directly. 156 | }() 157 | 158 | return ctx, stop 159 | } 160 | -------------------------------------------------------------------------------- /controllers/certs.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "context" 5 | "crypto/tls" 6 | "crypto/x509" 7 | "encoding/pem" 8 | "fmt" 9 | 10 | "path/filepath" 11 | 12 | etcdv1 "github.com/aws/etcdadm-controller/api/v1beta1" 13 | "github.com/pkg/errors" 14 | corev1 "k8s.io/api/core/v1" 15 | apierrors "k8s.io/apimachinery/pkg/api/errors" 16 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 17 | certutil "k8s.io/client-go/util/cert" 18 | clusterv1beta1 "sigs.k8s.io/cluster-api/api/core/v1beta1" 19 | clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" 20 | "sigs.k8s.io/cluster-api/util" 21 | "sigs.k8s.io/cluster-api/util/certs" 22 | v1beta1conditions "sigs.k8s.io/cluster-api/util/deprecated/v1beta1/conditions" 23 | "sigs.k8s.io/cluster-api/util/secret" 24 | "sigs.k8s.io/controller-runtime/pkg/client" 25 | "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 26 | "sigs.k8s.io/etcdadm/certs/pkiutil" 27 | "sigs.k8s.io/etcdadm/constants" 28 | ) 29 | 30 | // etcdadm provisioning works as follows: 31 | // machine one runs etcdadm init, generates CA and client certs 32 | // CA certs are copied over to remaining nodes to run etcdadm join 33 | // This provider is going to generate CA cert-key for etcd, and create two Secrets to store CA cert + client cert-key to be used by kube-apiserver 34 | func (r *EtcdadmClusterReconciler) generateCAandClientCertSecrets(ctx context.Context, cluster *clusterv1.Cluster, etcdCluster *etcdv1.EtcdadmCluster) error { 35 | log := r.Log 36 | // Generate external etcd CA cert + key pair 37 | CACertKeyPair := etcdCACertKeyPair() 38 | err := CACertKeyPair.LookupOrGenerate( 39 | ctx, 40 | r.Client, 41 | util.ObjectKey(cluster), 42 | *metav1.NewControllerRef(etcdCluster, etcdv1.GroupVersion.WithKind("EtcdadmCluster")), 43 | ) 44 | if err != nil { 45 | log.Error(err, "Failed to look up or generate CA cert key pair") 46 | return err 47 | } 48 | 49 | caCertKey := CACertKeyPair.GetByPurpose(secret.ManagedExternalEtcdCA) 50 | if caCertKey == nil { 51 | return fmt.Errorf("nil returned from getting etcd CA certificate by purpose %s", secret.ManagedExternalEtcdCA) 52 | } 53 | 54 | // Use the generated CA cert+key pair to generate and sign etcd client cert+key pair 55 | caCertDecoded, _ := pem.Decode(caCertKey.KeyPair.Cert) 56 | caCert, err := x509.ParseCertificate(caCertDecoded.Bytes) 57 | if err != nil { 58 | log.Error(err, "Failed to parse etcd CA cert") 59 | return err 60 | } 61 | caKeyDecoded, _ := pem.Decode(caCertKey.KeyPair.Key) 62 | caKey, err := x509.ParsePKCS1PrivateKey(caKeyDecoded.Bytes) 63 | if err != nil { 64 | log.Error(err, "Failed to parse etcd CA key") 65 | return err 66 | } 67 | 68 | commonName := fmt.Sprintf("%s-kube-apiserver-etcd-client", cluster.Name) 69 | // This certConfig is what etcdadm uses to generate client certs https://github.com/kubernetes-sigs/etcdadm/blob/master/certs/certs.go#L233 70 | certConfig := certutil.Config{ 71 | CommonName: commonName, 72 | Organization: []string{constants.MastersGroup}, 73 | Usages: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth}, 74 | } 75 | apiClientCert, apiClientKey, err := pkiutil.NewCertAndKey(caCert, caKey, certConfig) 76 | if err != nil { 77 | return fmt.Errorf("failure while creating %q etcd client key and certificate: %v", commonName, err) 78 | } 79 | 80 | // Now generate two Secrets, one containing the client cert+key pair and other containing the etcd CA cert. Ech control plane provider should 81 | // use these two Secrets for communicating with etcd. 82 | apiServerClientCertKeyPair := secret.Certificate{ 83 | Purpose: secret.APIServerEtcdClient, 84 | KeyPair: &certs.KeyPair{ 85 | Cert: certs.EncodeCertPEM(apiClientCert), 86 | Key: certs.EncodePrivateKeyPEM(apiClientKey), 87 | }, 88 | Generated: true, 89 | } 90 | s := apiServerClientCertKeyPair.AsSecret(client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}, *metav1.NewControllerRef(etcdCluster, etcdv1.GroupVersion.WithKind("EtcdadmCluster"))) 91 | secretToPatch := s.DeepCopy() 92 | 93 | // CreateOrPatch performs a create operation when the object is not found. 94 | // But if an object is found, the function expects to reconcile the fields we want patched in a callback func. 95 | // CreateOrPatch does a GET call and overwrites the object we pass in with whats on the cluster. 96 | // Hence we keep a copy of the newly generated secret and update the secret Data field in a callback func. 97 | // Ex; https://github.com/kubernetes-sigs/controller-runtime/blob/v0.14.5/pkg/controller/controllerutil/example_test.go 98 | if _, err := controllerutil.CreateOrPatch(ctx, r.Client, s, func() error { 99 | s.Data = secretToPatch.Data 100 | return nil 101 | }); err != nil { 102 | return fmt.Errorf("failure while saving etcd client key and certificate: %v", err) 103 | } 104 | 105 | log.Info("Saved apiserver client cert key as secret") 106 | 107 | s = &corev1.Secret{ 108 | ObjectMeta: metav1.ObjectMeta{ 109 | Namespace: cluster.Namespace, 110 | Name: secret.Name(cluster.Name, secret.EtcdCA), 111 | Labels: map[string]string{ 112 | clusterv1.ClusterNameLabel: cluster.Name, 113 | }, 114 | OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(etcdCluster, etcdv1.GroupVersion.WithKind("EtcdadmCluster"))}, 115 | }, 116 | Data: map[string][]byte{ 117 | secret.TLSCrtDataName: caCertKey.KeyPair.Cert, 118 | }, 119 | Type: clusterv1.ClusterSecretType, 120 | } 121 | if err := r.Client.Create(ctx, s); err != nil && !apierrors.IsAlreadyExists(err) { 122 | return fmt.Errorf("failure while saving etcd CA certificate: %v", err) 123 | } 124 | 125 | log.Info("Saved etcd ca cert as secret") 126 | v1beta1conditions.MarkTrue(etcdCluster, clusterv1beta1.ConditionType(etcdv1.EtcdCertificatesAvailableCondition)) 127 | return nil 128 | } 129 | 130 | func etcdCACertKeyPair() secret.Certificates { 131 | certificatesDir := "/etc/etcd/pki" 132 | certificates := secret.Certificates{ 133 | &secret.Certificate{ 134 | Purpose: secret.ManagedExternalEtcdCA, 135 | CertFile: filepath.Join(certificatesDir, "ca.crt"), 136 | KeyFile: filepath.Join(certificatesDir, "ca.key"), 137 | }, 138 | } 139 | 140 | return certificates 141 | } 142 | 143 | // TODO: save CA and client cert on the reconciler object 144 | func (r *EtcdadmClusterReconciler) getCACert(ctx context.Context, cluster *clusterv1.Cluster) ([]byte, error) { 145 | caCert := &secret.Certificates{ 146 | &secret.Certificate{ 147 | Purpose: secret.ManagedExternalEtcdCA, 148 | }, 149 | } 150 | if err := caCert.Lookup(ctx, r.Client, util.ObjectKey(cluster)); err != nil { 151 | return []byte{}, errors.Wrap(err, "error looking up external etcd CA certs") 152 | } 153 | if caCertKey := caCert.GetByPurpose(secret.ManagedExternalEtcdCA); caCertKey != nil { 154 | if caCertKey.KeyPair == nil { 155 | return []byte{}, errors.New("ca cert key pair not found for cluster") 156 | } 157 | return caCertKey.KeyPair.Cert, nil 158 | } 159 | return []byte{}, fmt.Errorf("nil returned from getting etcd CA certificate by purpose %s", secret.ManagedExternalEtcdCA) 160 | } 161 | 162 | func (r *EtcdadmClusterReconciler) getClientCerts(ctx context.Context, cluster *clusterv1.Cluster) (tls.Certificate, error) { 163 | clientCert := &secret.Certificates{ 164 | &secret.Certificate{ 165 | Purpose: secret.APIServerEtcdClient, 166 | }, 167 | } 168 | if err := clientCert.Lookup(ctx, r.Client, util.ObjectKey(cluster)); err != nil { 169 | return tls.Certificate{}, err 170 | } 171 | if clientCertKey := clientCert.GetByPurpose(secret.APIServerEtcdClient); clientCertKey != nil { 172 | if clientCertKey.KeyPair == nil { 173 | return tls.Certificate{}, fmt.Errorf("client cert key pair not found for cluster") 174 | } 175 | return tls.X509KeyPair(clientCertKey.KeyPair.Cert, clientCertKey.KeyPair.Key) 176 | } 177 | return tls.Certificate{}, fmt.Errorf("nil returned from getting etcd CA certificate by purpose %s", secret.APIServerEtcdClient) 178 | } 179 | -------------------------------------------------------------------------------- /controllers/testutils.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "io" 8 | "math/rand" 9 | "net/http" 10 | "strings" 11 | 12 | etcdbootstrapv1 "github.com/aws/etcdadm-bootstrap-provider/api/v1beta1" 13 | etcdv1 "github.com/aws/etcdadm-controller/api/v1beta1" 14 | "github.com/google/uuid" 15 | "go.etcd.io/etcd/api/v3/etcdserverpb" 16 | clientv3 "go.etcd.io/etcd/client/v3" 17 | corev1 "k8s.io/api/core/v1" 18 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 19 | "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 20 | "k8s.io/apimachinery/pkg/types" 21 | "k8s.io/apiserver/pkg/storage/names" 22 | "k8s.io/utils/ptr" 23 | clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" 24 | "sigs.k8s.io/controller-runtime/pkg/client" 25 | ) 26 | 27 | const ( 28 | testClusterName = "testCluster" 29 | testNamespace = "test" 30 | testEtcdadmClusterName = "testEtcdadmCluster" 31 | testInfrastructureTemplateName = "testInfraTemplate" 32 | etcdClusterNameSuffix = "etcd-cluster" 33 | etcdVersion = "v3.4.9" 34 | ) 35 | 36 | var ( 37 | infraTemplate = &unstructured.Unstructured{ 38 | Object: map[string]interface{}{ 39 | "kind": "InfrastructureTemplate", 40 | "apiVersion": "infra.io/v1", 41 | "metadata": map[string]interface{}{ 42 | "name": testInfrastructureTemplateName, 43 | "namespace": testNamespace, 44 | }, 45 | "spec": map[string]interface{}{ 46 | "template": map[string]interface{}{ 47 | "spec": map[string]interface{}{ 48 | "hello": "world", 49 | }, 50 | }, 51 | }, 52 | }, 53 | } 54 | ) 55 | 56 | type etcdadmClusterTest struct { 57 | replicas int 58 | name string 59 | namespace string 60 | cluster *clusterv1.Cluster 61 | etcdadmCluster *etcdv1.EtcdadmCluster 62 | machines []*clusterv1.Machine 63 | machineCounter int 64 | initSecret *corev1.Secret 65 | } 66 | 67 | func newEtcdadmClusterTest(etcdReplicas int) *etcdadmClusterTest { 68 | return &etcdadmClusterTest{ 69 | name: testClusterName, 70 | namespace: testNamespace, 71 | replicas: etcdReplicas, 72 | machineCounter: 0, 73 | } 74 | } 75 | 76 | func (e *etcdadmClusterTest) buildClusterWithExternalEtcd() *etcdadmClusterTest { 77 | e.cluster = e.newClusterWithExternalEtcd() 78 | e.etcdadmCluster = e.newEtcdadmCluster(e.cluster) 79 | e.machines = []*clusterv1.Machine{} 80 | endpoints := []string{} 81 | for i := 0; i < e.replicas; i++ { 82 | machine := e.newEtcdMachine() 83 | e.machines = append(e.machines, machine) 84 | endpoints = append(endpoints, fmt.Sprintf("https://%v:2379", machine.Status.Addresses[0].Address)) 85 | } 86 | e.etcdadmCluster.Status.Endpoints = strings.Join(endpoints, ",") 87 | return e 88 | } 89 | 90 | func (e *etcdadmClusterTest) withHealthCheckRetries(retries int) *etcdadmClusterTest { 91 | if e.etcdadmCluster.Annotations == nil { 92 | e.etcdadmCluster.Annotations = map[string]string{} 93 | } 94 | e.etcdadmCluster.Annotations[etcdv1.HealthCheckRetriesAnnotation] = fmt.Sprintf("%d", retries) 95 | return e 96 | } 97 | 98 | // newClusterWithExternalEtcd return a CAPI cluster object with managed external etcd ref 99 | func (e *etcdadmClusterTest) newClusterWithExternalEtcd() *clusterv1.Cluster { 100 | return &clusterv1.Cluster{ 101 | TypeMeta: metav1.TypeMeta{ 102 | Kind: "Cluster", 103 | APIVersion: clusterv1.GroupVersion.String(), 104 | }, 105 | ObjectMeta: metav1.ObjectMeta{ 106 | Namespace: e.namespace, 107 | Name: e.name, 108 | UID: types.UID(uuid.New().String()), 109 | }, 110 | Spec: clusterv1.ClusterSpec{ 111 | ManagedExternalEtcdRef: clusterv1.ContractVersionedObjectReference{ 112 | Kind: "EtcdadmCluster", 113 | Name: e.name, 114 | }, 115 | InfrastructureRef: clusterv1.ContractVersionedObjectReference{ 116 | Kind: "InfrastructureTemplate", 117 | Name: testInfrastructureTemplateName, 118 | }, 119 | }, 120 | Status: clusterv1.ClusterStatus{ 121 | Conditions: []metav1.Condition{ 122 | { 123 | Type: clusterv1.ClusterInfrastructureReadyCondition, 124 | Status: metav1.ConditionTrue, 125 | Reason: "Ready", 126 | }, 127 | }, 128 | }, 129 | } 130 | } 131 | 132 | func (e *etcdadmClusterTest) newEtcdadmCluster(cluster *clusterv1.Cluster) *etcdv1.EtcdadmCluster { 133 | return &etcdv1.EtcdadmCluster{ 134 | TypeMeta: metav1.TypeMeta{ 135 | Kind: "EtcdadmCluster", 136 | APIVersion: etcdv1.GroupVersion.String(), 137 | }, 138 | ObjectMeta: metav1.ObjectMeta{ 139 | Namespace: e.namespace, 140 | Name: e.getEtcdClusterName(), 141 | UID: types.UID(uuid.New().String()), 142 | OwnerReferences: []metav1.OwnerReference{ 143 | *metav1.NewControllerRef(e.cluster, clusterv1.GroupVersion.WithKind("Cluster")), 144 | }, 145 | Finalizers: []string{etcdv1.EtcdadmClusterFinalizer}, 146 | }, 147 | Spec: etcdv1.EtcdadmClusterSpec{ 148 | EtcdadmConfigSpec: etcdbootstrapv1.EtcdadmConfigSpec{ 149 | CloudInitConfig: &etcdbootstrapv1.CloudInitConfig{ 150 | Version: etcdVersion, 151 | }, 152 | }, 153 | Replicas: ptr.To(int32(e.replicas)), 154 | InfrastructureTemplate: corev1.ObjectReference{ 155 | Kind: infraTemplate.GetKind(), 156 | APIVersion: infraTemplate.GetAPIVersion(), 157 | Name: infraTemplate.GetName(), 158 | Namespace: e.namespace, 159 | }, 160 | }, 161 | } 162 | } 163 | 164 | func (e *etcdadmClusterTest) newEtcdMachine() *clusterv1.Machine { 165 | etcdMachine := &clusterv1.Machine{ 166 | TypeMeta: metav1.TypeMeta{ 167 | Kind: "Machine", 168 | APIVersion: clusterv1.GroupVersion.String(), 169 | }, 170 | ObjectMeta: metav1.ObjectMeta{ 171 | Name: names.SimpleNameGenerator.GenerateName(e.etcdadmCluster.Name + "-"), 172 | Namespace: e.etcdadmCluster.Namespace, 173 | Labels: EtcdLabelsForCluster(e.cluster.Name, e.etcdadmCluster.Name), 174 | UID: types.UID(uuid.New().String()), 175 | Finalizers: []string{etcdv1.EtcdadmClusterFinalizer}, 176 | OwnerReferences: []metav1.OwnerReference{ 177 | *metav1.NewControllerRef(e.etcdadmCluster, etcdv1.GroupVersion.WithKind("EtcdadmCluster")), 178 | }, 179 | }, 180 | Spec: clusterv1.MachineSpec{ 181 | ClusterName: e.cluster.Name, 182 | InfrastructureRef: clusterv1.ContractVersionedObjectReference{ 183 | Kind: infraTemplate.GetKind(), 184 | Name: infraTemplate.GetName(), 185 | }, 186 | }, 187 | Status: clusterv1.MachineStatus{ 188 | Addresses: []clusterv1.MachineAddress{ 189 | { 190 | Type: clusterv1.MachineExternalIP, 191 | Address: fmt.Sprintf("%d.%d.%d.%d", rand.Intn(256), rand.Intn(256), rand.Intn(256), rand.Intn(256)), 192 | }, 193 | }, 194 | }, 195 | } 196 | e.machineCounter++ 197 | return etcdMachine 198 | } 199 | 200 | func (e *etcdadmClusterTest) gatherObjects() []client.Object { 201 | objects := []client.Object{e.cluster, e.etcdadmCluster} 202 | for _, machine := range e.machines { 203 | objects = append(objects, machine) 204 | } 205 | return objects 206 | } 207 | 208 | func (e *etcdadmClusterTest) getEtcdClusterName() string { 209 | return fmt.Sprintf("%s-%s", e.name, etcdClusterNameSuffix) 210 | } 211 | 212 | func (e *etcdadmClusterTest) getMemberListResponse() *clientv3.MemberListResponse { 213 | members := []*etcdserverpb.Member{} 214 | for _, machine := range e.machines { 215 | members = append(members, &etcdserverpb.Member{ 216 | PeerURLs: []string{fmt.Sprintf("https://%s:2380", machine.Status.Addresses[0].Address)}, 217 | }) 218 | } 219 | return &clientv3.MemberListResponse{ 220 | Members: members, 221 | } 222 | } 223 | 224 | func (e *etcdadmClusterTest) getMemberRemoveResponse() *clientv3.MemberRemoveResponse { 225 | return &clientv3.MemberRemoveResponse{ 226 | Members: []*etcdserverpb.Member{ 227 | { 228 | PeerURLs: []string{fmt.Sprintf("https://%s:2380", e.machines[0].Status.Addresses[0].Address)}, 229 | }, 230 | }, 231 | } 232 | } 233 | 234 | func (e *etcdadmClusterTest) getDeletedMachines(client client.Client) []*clusterv1.Machine { 235 | machines := []*clusterv1.Machine{} 236 | for _, machine := range e.machines { 237 | m := &clusterv1.Machine{} 238 | _ = client.Get(context.Background(), types.NamespacedName{ 239 | Name: machine.Name, 240 | Namespace: machine.Namespace, 241 | }, m) 242 | if m.DeletionTimestamp != nil { 243 | machines = append(machines, m) 244 | } 245 | } 246 | return machines 247 | } 248 | 249 | func getHealthyEtcdResponse() *http.Response { 250 | return &http.Response{ 251 | StatusCode: http.StatusOK, 252 | Body: io.NopCloser(bytes.NewBufferString("{\"Health\": \"true\"}")), 253 | } 254 | } 255 | 256 | func (e *etcdadmClusterTest) newInitSecret() { 257 | e.initSecret = &corev1.Secret{ 258 | TypeMeta: metav1.TypeMeta{ 259 | Kind: "Secret", 260 | APIVersion: "v1", 261 | }, 262 | ObjectMeta: metav1.ObjectMeta{ 263 | Name: e.etcdadmCluster.Status.InitMachineAddress, 264 | Namespace: e.cluster.Namespace, 265 | }, 266 | Data: map[string][]byte{ 267 | "address": []byte(getEtcdMachineAddressFromClientURL(e.etcdadmCluster.Status.InitMachineAddress)), 268 | "clientUrls": []byte(e.etcdadmCluster.Status.Endpoints), 269 | }, 270 | } 271 | } 272 | -------------------------------------------------------------------------------- /controllers/helpers.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "net" 7 | "net/url" 8 | "strings" 9 | 10 | etcdbootstrapv1 "github.com/aws/etcdadm-bootstrap-provider/api/v1beta1" 11 | etcdv1 "github.com/aws/etcdadm-controller/api/v1beta1" 12 | "github.com/pkg/errors" 13 | "go.etcd.io/etcd/api/v3/etcdserverpb" 14 | clientv3 "go.etcd.io/etcd/client/v3" 15 | corev1 "k8s.io/api/core/v1" 16 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 17 | "k8s.io/apimachinery/pkg/labels" 18 | "k8s.io/apimachinery/pkg/runtime/schema" 19 | "k8s.io/apimachinery/pkg/selection" 20 | "k8s.io/apiserver/pkg/storage/names" 21 | clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" 22 | "sigs.k8s.io/cluster-api/controllers/external" 23 | "sigs.k8s.io/cluster-api/util" 24 | "sigs.k8s.io/cluster-api/util/patch" 25 | ctrl "sigs.k8s.io/controller-runtime" 26 | ) 27 | 28 | const ( 29 | httpsPrefix = "https://" 30 | etcdClientURLPort = "2379" 31 | ) 32 | 33 | // EtcdMachinesSelectorForCluster returns the label selector necessary to get etcd machines for a given cluster. 34 | func EtcdMachinesSelectorForCluster(clusterName, etcdClusterName string) labels.Selector { 35 | must := func(r *labels.Requirement, err error) labels.Requirement { 36 | if err != nil { 37 | panic(err) 38 | } 39 | return *r 40 | } 41 | return labels.NewSelector().Add( 42 | must(labels.NewRequirement(clusterv1.ClusterNameLabel, selection.Equals, []string{clusterName})), 43 | must(labels.NewRequirement(clusterv1.MachineEtcdClusterLabelName, selection.Equals, []string{etcdClusterName})), 44 | ) 45 | } 46 | 47 | // EtcdClusterMachines returns a filter to find all etcd machines for a cluster, regardless of ownership. 48 | func EtcdClusterMachines(clusterName, etcdClusterName string) func(machine *clusterv1.Machine) bool { 49 | selector := EtcdMachinesSelectorForCluster(clusterName, etcdClusterName) 50 | return func(machine *clusterv1.Machine) bool { 51 | if machine == nil { 52 | return false 53 | } 54 | return selector.Matches(labels.Set(machine.Labels)) 55 | } 56 | } 57 | 58 | // ControlPlaneLabelsForCluster returns a set of labels to add to a control plane machine for this specific cluster. 59 | func EtcdLabelsForCluster(clusterName string, etcdClusterName string) map[string]string { 60 | return map[string]string{ 61 | clusterv1.ClusterNameLabel: clusterName, 62 | clusterv1.MachineEtcdClusterLabelName: etcdClusterName, 63 | } 64 | } 65 | 66 | func (r *EtcdadmClusterReconciler) cloneConfigsAndGenerateMachine(ctx context.Context, ec *etcdv1.EtcdadmCluster, cluster *clusterv1.Cluster, failureDomain *string) (ctrl.Result, error) { 67 | // Since the cloned resource should eventually have a controller ref for the Machine, we create an 68 | // OwnerReference here without the Controller field set 69 | infraCloneOwner := &metav1.OwnerReference{ 70 | APIVersion: etcdv1.GroupVersion.String(), 71 | Kind: "EtcdadmCluster", 72 | Name: ec.Name, 73 | UID: ec.UID, 74 | } 75 | 76 | // Clone the infrastructure template 77 | infraObj, _, err := external.CreateFromTemplate(ctx, &external.CreateFromTemplateInput{ 78 | Client: r.Client, 79 | TemplateRef: &ec.Spec.InfrastructureTemplate, 80 | Namespace: ec.Namespace, 81 | OwnerRef: infraCloneOwner, 82 | ClusterName: cluster.Name, 83 | Labels: EtcdLabelsForCluster(cluster.Name, ec.Name), 84 | }) 85 | if err != nil { 86 | return ctrl.Result{}, fmt.Errorf("error cloning infrastructure template for etcd machine: %v", err) 87 | } 88 | if infraObj == nil { 89 | return ctrl.Result{}, fmt.Errorf("infrastructure template could not be cloned for etcd machine") 90 | } 91 | 92 | // Convert unstructured object to ContractVersionedObjectReference 93 | gv, err := schema.ParseGroupVersion(infraObj.GetAPIVersion()) 94 | if err != nil { 95 | return ctrl.Result{}, fmt.Errorf("failed to parse infrastructure object API version: %v", err) 96 | } 97 | infraRef := clusterv1.ContractVersionedObjectReference{ 98 | Kind: infraObj.GetKind(), 99 | Name: infraObj.GetName(), 100 | APIGroup: gv.Group, 101 | } 102 | 103 | bootstrapRef, err := r.generateEtcdadmConfig(ctx, ec, cluster) 104 | if err != nil { 105 | return ctrl.Result{}, err 106 | } 107 | 108 | if err := r.generateMachine(ctx, ec, cluster, infraRef, bootstrapRef, failureDomain); err != nil { 109 | r.Log.Error(err, "Failed to create initial etcd machine") 110 | return ctrl.Result{}, err 111 | } 112 | return ctrl.Result{}, nil 113 | } 114 | 115 | func (r *EtcdadmClusterReconciler) generateEtcdadmConfig(ctx context.Context, ec *etcdv1.EtcdadmCluster, cluster *clusterv1.Cluster) (clusterv1.ContractVersionedObjectReference, error) { 116 | owner := metav1.OwnerReference{ 117 | APIVersion: etcdv1.GroupVersion.String(), 118 | Kind: "EtcdadmCluster", 119 | Name: ec.Name, 120 | UID: ec.UID, 121 | } 122 | bootstrapConfig := &etcdbootstrapv1.EtcdadmConfig{ 123 | ObjectMeta: metav1.ObjectMeta{ 124 | Name: names.SimpleNameGenerator.GenerateName(ec.Name + "-"), 125 | Namespace: ec.Namespace, 126 | Labels: EtcdLabelsForCluster(cluster.Name, ec.Name), 127 | OwnerReferences: []metav1.OwnerReference{owner}, 128 | }, 129 | Spec: ec.Spec.EtcdadmConfigSpec, 130 | } 131 | 132 | if err := r.Client.Create(ctx, bootstrapConfig); err != nil { 133 | return clusterv1.ContractVersionedObjectReference{}, errors.Wrap(err, "Failed to create etcdadm bootstrap configuration") 134 | } 135 | 136 | bootstrapRef := clusterv1.ContractVersionedObjectReference{ 137 | Kind: "EtcdadmConfig", 138 | Name: bootstrapConfig.GetName(), 139 | APIGroup: etcdbootstrapv1.GroupVersion.Group, 140 | } 141 | 142 | return bootstrapRef, nil 143 | } 144 | 145 | func (r *EtcdadmClusterReconciler) generateMachine(ctx context.Context, ec *etcdv1.EtcdadmCluster, cluster *clusterv1.Cluster, infraRef clusterv1.ContractVersionedObjectReference, bootstrapRef clusterv1.ContractVersionedObjectReference, failureDomain *string) error { 146 | var failureDomainStr string 147 | if failureDomain != nil { 148 | failureDomainStr = *failureDomain 149 | } 150 | 151 | machine := &clusterv1.Machine{ 152 | ObjectMeta: metav1.ObjectMeta{ 153 | Name: names.SimpleNameGenerator.GenerateName(ec.Name + "-"), 154 | Namespace: ec.Namespace, 155 | Labels: EtcdLabelsForCluster(cluster.Name, ec.Name), 156 | OwnerReferences: []metav1.OwnerReference{ 157 | *metav1.NewControllerRef(ec, etcdv1.GroupVersion.WithKind("EtcdadmCluster")), 158 | }, 159 | }, 160 | Spec: clusterv1.MachineSpec{ 161 | ClusterName: cluster.Name, 162 | InfrastructureRef: infraRef, 163 | Bootstrap: clusterv1.Bootstrap{ 164 | ConfigRef: bootstrapRef, 165 | }, 166 | FailureDomain: failureDomainStr, 167 | }, 168 | } 169 | if err := r.Client.Create(ctx, machine); err != nil { 170 | return errors.Wrap(err, "failed to create machine") 171 | } 172 | return nil 173 | } 174 | 175 | func getEtcdMachineAddress(machine *clusterv1.Machine) string { 176 | var internalIP, internalDNS, externalIP, externalDNS string 177 | 178 | // Check and record all different address types set for the machine and return later according to precedence. 179 | for _, address := range machine.Status.Addresses { 180 | switch address.Type { 181 | case clusterv1.MachineInternalIP: 182 | internalIP = address.Address 183 | case clusterv1.MachineInternalDNS: 184 | internalDNS = address.Address 185 | case clusterv1.MachineExternalIP: 186 | externalIP = address.Address 187 | case clusterv1.MachineExternalDNS: 188 | externalDNS = address.Address 189 | } 190 | } 191 | 192 | // The order of these checks determines the precedence of the address to use 193 | if externalDNS != "" { 194 | return externalDNS 195 | } else if externalIP != "" { 196 | return externalIP 197 | } else if internalDNS != "" { 198 | return internalDNS 199 | } else if internalIP != "" { 200 | return internalIP 201 | } 202 | 203 | return "" 204 | } 205 | 206 | func getMemberClientURL(address string) string { 207 | return fmt.Sprintf("%s%s:%s", httpsPrefix, address, etcdClientURLPort) 208 | } 209 | 210 | func getEtcdMachineAddressFromClientURL(clientURL string) string { 211 | u, err := url.ParseRequestURI(clientURL) 212 | if err != nil { 213 | return "" 214 | } 215 | host, _, err := net.SplitHostPort(u.Host) 216 | if err != nil { 217 | return "" 218 | } 219 | return host 220 | } 221 | 222 | func getMemberHealthCheckEndpoint(clientURL string) string { 223 | return fmt.Sprintf("%s/health", clientURL) 224 | } 225 | 226 | // source: https://github.com/kubernetes-sigs/etcdadm/blob/master/etcd/etcd.go#L53:6 227 | func memberForPeerURLs(members *clientv3.MemberListResponse, peerURLs []string) (*etcdserverpb.Member, bool) { 228 | for _, m := range members.Members { 229 | if stringSlicesEqual(m.PeerURLs, peerURLs) { 230 | return m, true 231 | } 232 | } 233 | return nil, false 234 | } 235 | 236 | // stringSlicesEqual compares two string slices for equality 237 | func stringSlicesEqual(l, r []string) bool { 238 | if len(l) != len(r) { 239 | return false 240 | } 241 | for i := range l { 242 | if l[i] != r[i] { 243 | return false 244 | } 245 | } 246 | return true 247 | } 248 | 249 | // Logic & implementation similar to KCP controller reconciling external MachineTemplate InfrastrucutureReference https://github.com/kubernetes-sigs/cluster-api/blob/master/controlplane/kubeadm/controllers/helpers.go#L123:41 250 | func (r *EtcdadmClusterReconciler) reconcileExternalReference(ctx context.Context, cluster *clusterv1.Cluster, ref corev1.ObjectReference) error { 251 | if !strings.HasSuffix(ref.Kind, clusterv1.TemplateSuffix) { 252 | return nil 253 | } 254 | 255 | obj, err := external.Get(ctx, r.Client, &ref) 256 | if err != nil { 257 | return err 258 | } 259 | 260 | // Note: We intentionally do not handle checking for the paused label on an external template reference 261 | 262 | patchHelper, err := patch.NewHelper(obj, r.Client) 263 | if err != nil { 264 | return err 265 | } 266 | 267 | obj.SetOwnerReferences(util.EnsureOwnerRef(obj.GetOwnerReferences(), metav1.OwnerReference{ 268 | APIVersion: clusterv1.GroupVersion.String(), 269 | Kind: "Cluster", 270 | Name: cluster.Name, 271 | UID: cluster.UID, 272 | })) 273 | 274 | return patchHelper.Patch(ctx, obj) 275 | } 276 | -------------------------------------------------------------------------------- /controllers/periodic_healthcheck.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "strconv" 7 | "sync" 8 | "time" 9 | 10 | etcdv1 "github.com/aws/etcdadm-controller/api/v1beta1" 11 | "github.com/hashicorp/go-multierror" 12 | "k8s.io/apimachinery/pkg/types" 13 | "k8s.io/klog/v2" 14 | clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" 15 | "sigs.k8s.io/cluster-api/util" 16 | "sigs.k8s.io/cluster-api/util/annotations" 17 | "sigs.k8s.io/cluster-api/util/collections" 18 | v1beta1conditions "sigs.k8s.io/cluster-api/util/deprecated/v1beta1/conditions" 19 | ) 20 | 21 | const ( 22 | maxUnhealthyCount = 5 23 | ) 24 | 25 | type etcdHealthCheckConfig struct { 26 | clusterToHttpClient sync.Map 27 | } 28 | 29 | type etcdadmClusterMemberHealthConfig struct { 30 | unhealthyMembersFrequency map[string]int 31 | unhealthyMembersToRemove map[string]*clusterv1.Machine 32 | cluster *clusterv1.Cluster 33 | endpoints string 34 | ownedMachines collections.Machines 35 | } 36 | 37 | func (r *EtcdadmClusterReconciler) startHealthCheckLoop(ctx context.Context, done <-chan struct{}) { 38 | r.Log.Info("Starting periodic healthcheck loop") 39 | etcdadmClusterMapper := make(map[types.UID]etcdadmClusterMemberHealthConfig) 40 | ticker := time.NewTicker(r.HealthCheckInterval) 41 | defer ticker.Stop() 42 | 43 | for { 44 | select { 45 | case <-done: 46 | return 47 | case <-ticker.C: 48 | r.startHealthCheck(ctx, etcdadmClusterMapper) 49 | } 50 | } 51 | } 52 | 53 | func (r *EtcdadmClusterReconciler) startHealthCheck(ctx context.Context, etcdadmClusterMapper map[types.UID]etcdadmClusterMemberHealthConfig) { 54 | etcdClusters := &etcdv1.EtcdadmClusterList{} 55 | err := r.Client.List(ctx, etcdClusters) 56 | if err != nil { 57 | r.Log.Error(err, "Error listing etcdadm cluster objects") 58 | return 59 | } 60 | for _, ec := range etcdClusters.Items { 61 | log := r.Log.WithValues("EtcdadmCluster", klog.KObj(&ec)) 62 | if annotations.HasPaused(&ec) { 63 | log.Info("EtcdadmCluster reconciliation is paused, skipping health checks") 64 | continue 65 | } 66 | if val, set := ec.Annotations[etcdv1.HealthCheckRetriesAnnotation]; set { 67 | if retries, err := strconv.Atoi(val); err != nil || retries < 0 { 68 | log.Info(fmt.Sprintf("healthcheck-retries annotation configured with invalid value: %v", err)) 69 | } else if retries == 0 { 70 | log.Info("healthcheck-retries annotation configured to 0, skipping health checks") 71 | continue 72 | } 73 | } 74 | if v1beta1conditions.IsFalse(&ec, etcdv1.EtcdCertificatesAvailableCondition) { 75 | log.Info("EtcdadmCluster certificates are not ready, skipping health checks") 76 | continue 77 | } 78 | if !ec.Status.CreationComplete { 79 | // etcdCluster not fully provisioned yet 80 | log.Info("EtcdadmCluster is not ready, skipping health checks") 81 | continue 82 | } 83 | if v1beta1conditions.IsFalse(&ec, etcdv1.EtcdMachinesSpecUpToDateCondition) { 84 | // etcdCluster is undergoing upgrade, some machines might not be ready yet, skip periodic healthcheck 85 | log.Info("EtcdadmCluster machine specs are not up to date, skipping health checks") 86 | continue 87 | } 88 | 89 | var cluster *clusterv1.Cluster 90 | if clusterEntry, ok := etcdadmClusterMapper[ec.UID]; !ok { 91 | cluster, err = util.GetOwnerCluster(ctx, r.Client, ec.ObjectMeta) 92 | if err != nil { 93 | log.Error(err, "Failed to retrieve owner Cluster from the API Server") 94 | continue 95 | } 96 | if cluster == nil { 97 | log.Info("Cluster Controller has not yet set OwnerRef on etcd cluster") 98 | continue 99 | } 100 | 101 | ownedMachines := r.getOwnedMachines(ctx, cluster, ec) 102 | 103 | etcdadmClusterMapper[ec.UID] = etcdadmClusterMemberHealthConfig{ 104 | unhealthyMembersFrequency: make(map[string]int), 105 | unhealthyMembersToRemove: make(map[string]*clusterv1.Machine), 106 | cluster: cluster, 107 | ownedMachines: ownedMachines, 108 | } 109 | } else { 110 | cluster = clusterEntry.cluster 111 | if ec.Status.Endpoints != clusterEntry.endpoints { 112 | clusterEntry.endpoints = ec.Status.Endpoints 113 | ownedMachines := r.getOwnedMachines(ctx, cluster, ec) 114 | clusterEntry.ownedMachines = ownedMachines 115 | etcdadmClusterMapper[ec.UID] = clusterEntry 116 | } 117 | } 118 | 119 | if err := r.periodicEtcdMembersHealthCheck(ctx, cluster, &ec, etcdadmClusterMapper); err != nil { 120 | log.Error(err, "Error performing healthcheck") 121 | continue 122 | } 123 | } 124 | } 125 | 126 | func (r *EtcdadmClusterReconciler) periodicEtcdMembersHealthCheck(ctx context.Context, cluster *clusterv1.Cluster, etcdCluster *etcdv1.EtcdadmCluster, etcdadmClusterMapper map[types.UID]etcdadmClusterMemberHealthConfig) error { 127 | log := r.Log.WithValues("EtcdadmCluster", klog.KObj(etcdCluster)) 128 | 129 | if etcdCluster.Spec.Replicas == nil { 130 | err := fmt.Errorf("replicas is nil") 131 | log.Error(err, "Error performing healthcheck") 132 | return err 133 | } 134 | 135 | desiredReplicas := int(*etcdCluster.Spec.Replicas) 136 | etcdMachines, err := collections.GetFilteredMachinesForCluster(ctx, r.uncachedClient, cluster, EtcdClusterMachines(cluster.Name, etcdCluster.Name)) 137 | if err != nil { 138 | log.Error(err, "Error filtering machines for etcd cluster") 139 | } 140 | ownedMachines := etcdMachines.Filter(collections.OwnedMachines(etcdCluster)) 141 | 142 | currClusterHFConfig := etcdadmClusterMapper[etcdCluster.UID] 143 | if len(etcdMachines) == 0 { 144 | log.Info("Skipping healthcheck because there are no etcd machines") 145 | return nil 146 | } 147 | 148 | // clean up old machines 149 | for ip := range currClusterHFConfig.unhealthyMembersFrequency { 150 | found := false 151 | for _, machine := range etcdMachines { 152 | if getMemberClientURL(getEtcdMachineAddress(machine)) == ip { 153 | found = true 154 | } 155 | } 156 | if !found { 157 | log.Info("Removing member from unhealthyMembersFrequency, member does not exist", "member", ip) 158 | delete(currClusterHFConfig.unhealthyMembersFrequency, ip) 159 | } 160 | } 161 | 162 | log.Info("Performing healthchecks on the following etcd machines", "machines", klog.KObjSlice(etcdMachines.UnsortedList())) 163 | for _, etcdMachine := range etcdMachines { 164 | endpoint := getMachineEtcdEndpoint(etcdMachine) 165 | if endpoint == "" { 166 | log.Info("Member in bootstrap phase, ignoring") 167 | continue 168 | } 169 | err := r.performEndpointHealthCheck(ctx, cluster, endpoint, false) 170 | if err != nil { 171 | currClusterHFConfig.unhealthyMembersFrequency[endpoint]++ 172 | // only check if machine should be removed if it is owned 173 | if _, found := ownedMachines[etcdMachine.Name]; found { 174 | // member failed healthcheck so add it to unhealthy map or update it's unhealthy count 175 | log.Info("Member failed healthcheck, adding to unhealthy members list", "machine", etcdMachine, "IP", endpoint, 176 | "unhealthy frequency", currClusterHFConfig.unhealthyMembersFrequency[endpoint]) 177 | unhealthyCount := maxUnhealthyCount 178 | if val, set := etcdCluster.Annotations[etcdv1.HealthCheckRetriesAnnotation]; set { 179 | retries, err := strconv.Atoi(val) 180 | if err != nil || retries < 0 { 181 | log.Info("healthcheck-retries annotation configured with invalid value, using default retries") 182 | } 183 | unhealthyCount = retries 184 | } 185 | if currClusterHFConfig.unhealthyMembersFrequency[endpoint] >= unhealthyCount { 186 | log.Info("Adding to list of unhealthy members to remove", "member", endpoint) 187 | // member has been unresponsive, add the machine to unhealthyMembersToRemove queue 188 | currClusterHFConfig.unhealthyMembersToRemove[endpoint] = etcdMachine 189 | } 190 | } 191 | } else { 192 | _, markedUnhealthy := currClusterHFConfig.unhealthyMembersFrequency[endpoint] 193 | if markedUnhealthy { 194 | log.Info("Removing from total unhealthy members list", "member", endpoint) 195 | delete(currClusterHFConfig.unhealthyMembersFrequency, endpoint) 196 | } 197 | // member passed healthcheck, so if it was previously added to unhealthy map, remove it since only consecutive failures should lead to member removal 198 | _, markedToDelete := currClusterHFConfig.unhealthyMembersToRemove[endpoint] 199 | if markedToDelete { 200 | log.Info("Removing from list of unhealthy members to remove", "member", endpoint) 201 | delete(currClusterHFConfig.unhealthyMembersToRemove, endpoint) 202 | } 203 | } 204 | } 205 | 206 | if len(currClusterHFConfig.unhealthyMembersToRemove) == 0 { 207 | return nil 208 | } 209 | 210 | var retErr error 211 | // check if quorum is perserved before deleting any machines 212 | if len(etcdMachines)-len(currClusterHFConfig.unhealthyMembersFrequency) >= len(etcdMachines)/2+1 { 213 | // only touch owned machines in health check alg 214 | for machineEndpoint, machineToDelete := range currClusterHFConfig.unhealthyMembersToRemove { 215 | // only remove one machine at a time 216 | currentMachines := r.getOwnedMachines(ctx, cluster, *etcdCluster) 217 | currentMachines = currentMachines.Filter(collections.Not(collections.HasDeletionTimestamp)) 218 | if len(currentMachines) < desiredReplicas { 219 | log.Info("Waiting for new replica to be created before deleting additional replicas") 220 | continue 221 | } 222 | if err := r.removeEtcdMachine(ctx, etcdCluster, cluster, machineToDelete, getEtcdMachineAddressFromClientURL(machineEndpoint)); err != nil { 223 | // log and save error and continue deletion of other members, deletion of this member will be retried since it's still part of unhealthyMembersToRemove 224 | if machineToDelete == nil { 225 | log.Error(err, "error removing etcd member machine, machine not found", "endpoint", machineEndpoint) 226 | } else { 227 | log.Error(err, "error removing etcd member machine", "member", machineToDelete.Name, "endpoint", machineEndpoint) 228 | } 229 | retErr = multierror.Append(retErr, err) 230 | continue 231 | } 232 | delete(currClusterHFConfig.unhealthyMembersToRemove, machineEndpoint) 233 | } 234 | if retErr != nil { 235 | return retErr 236 | } 237 | } else { 238 | log.Info("Not safe to remove etcd machines, quorum not preserved") 239 | } 240 | 241 | etcdCluster.Status.Ready = false 242 | return r.Client.Status().Update(ctx, etcdCluster) 243 | } 244 | 245 | func (r *EtcdadmClusterReconciler) getOwnedMachines(ctx context.Context, cluster *clusterv1.Cluster, ec etcdv1.EtcdadmCluster) collections.Machines { 246 | etcdMachines, err := collections.GetFilteredMachinesForCluster(ctx, r.uncachedClient, cluster, EtcdClusterMachines(cluster.Name, ec.Name)) 247 | if err != nil { 248 | r.Log.Error(err, "Error filtering machines for etcd cluster") 249 | } 250 | 251 | return etcdMachines.Filter(collections.OwnedMachines(&ec)) 252 | } 253 | -------------------------------------------------------------------------------- /api/v1alpha3/zz_generated.conversion.go: -------------------------------------------------------------------------------- 1 | //go:build !ignore_autogenerated_etcd_cluster 2 | // +build !ignore_autogenerated_etcd_cluster 3 | 4 | /* 5 | 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | */ 19 | // Code generated by conversion-gen. DO NOT EDIT. 20 | 21 | package v1alpha3 22 | 23 | import ( 24 | unsafe "unsafe" 25 | 26 | apiv1alpha3 "github.com/aws/etcdadm-bootstrap-provider/api/v1alpha3" 27 | v1beta1 "github.com/aws/etcdadm-controller/api/v1beta1" 28 | clusterapiapiv1alpha3 "github.com/aws/etcdadm-controller/internal/thirdparty/api/v1alpha3" 29 | conversion "k8s.io/apimachinery/pkg/conversion" 30 | runtime "k8s.io/apimachinery/pkg/runtime" 31 | apiv1beta1 "sigs.k8s.io/cluster-api/api/core/v1beta1" 32 | ) 33 | 34 | func init() { 35 | localSchemeBuilder.Register(RegisterConversions) 36 | } 37 | 38 | // RegisterConversions adds conversion functions to the given scheme. 39 | // Public to allow building arbitrary schemes. 40 | func RegisterConversions(s *runtime.Scheme) error { 41 | if err := s.AddGeneratedConversionFunc((*EtcdadmCluster)(nil), (*v1beta1.EtcdadmCluster)(nil), func(a, b interface{}, scope conversion.Scope) error { 42 | return Convert_v1alpha3_EtcdadmCluster_To_v1beta1_EtcdadmCluster(a.(*EtcdadmCluster), b.(*v1beta1.EtcdadmCluster), scope) 43 | }); err != nil { 44 | return err 45 | } 46 | if err := s.AddGeneratedConversionFunc((*v1beta1.EtcdadmCluster)(nil), (*EtcdadmCluster)(nil), func(a, b interface{}, scope conversion.Scope) error { 47 | return Convert_v1beta1_EtcdadmCluster_To_v1alpha3_EtcdadmCluster(a.(*v1beta1.EtcdadmCluster), b.(*EtcdadmCluster), scope) 48 | }); err != nil { 49 | return err 50 | } 51 | if err := s.AddGeneratedConversionFunc((*EtcdadmClusterList)(nil), (*v1beta1.EtcdadmClusterList)(nil), func(a, b interface{}, scope conversion.Scope) error { 52 | return Convert_v1alpha3_EtcdadmClusterList_To_v1beta1_EtcdadmClusterList(a.(*EtcdadmClusterList), b.(*v1beta1.EtcdadmClusterList), scope) 53 | }); err != nil { 54 | return err 55 | } 56 | if err := s.AddGeneratedConversionFunc((*v1beta1.EtcdadmClusterList)(nil), (*EtcdadmClusterList)(nil), func(a, b interface{}, scope conversion.Scope) error { 57 | return Convert_v1beta1_EtcdadmClusterList_To_v1alpha3_EtcdadmClusterList(a.(*v1beta1.EtcdadmClusterList), b.(*EtcdadmClusterList), scope) 58 | }); err != nil { 59 | return err 60 | } 61 | if err := s.AddGeneratedConversionFunc((*EtcdadmClusterSpec)(nil), (*v1beta1.EtcdadmClusterSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { 62 | return Convert_v1alpha3_EtcdadmClusterSpec_To_v1beta1_EtcdadmClusterSpec(a.(*EtcdadmClusterSpec), b.(*v1beta1.EtcdadmClusterSpec), scope) 63 | }); err != nil { 64 | return err 65 | } 66 | if err := s.AddGeneratedConversionFunc((*v1beta1.EtcdadmClusterSpec)(nil), (*EtcdadmClusterSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { 67 | return Convert_v1beta1_EtcdadmClusterSpec_To_v1alpha3_EtcdadmClusterSpec(a.(*v1beta1.EtcdadmClusterSpec), b.(*EtcdadmClusterSpec), scope) 68 | }); err != nil { 69 | return err 70 | } 71 | if err := s.AddGeneratedConversionFunc((*EtcdadmClusterStatus)(nil), (*v1beta1.EtcdadmClusterStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { 72 | return Convert_v1alpha3_EtcdadmClusterStatus_To_v1beta1_EtcdadmClusterStatus(a.(*EtcdadmClusterStatus), b.(*v1beta1.EtcdadmClusterStatus), scope) 73 | }); err != nil { 74 | return err 75 | } 76 | if err := s.AddGeneratedConversionFunc((*v1beta1.EtcdadmClusterStatus)(nil), (*EtcdadmClusterStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { 77 | return Convert_v1beta1_EtcdadmClusterStatus_To_v1alpha3_EtcdadmClusterStatus(a.(*v1beta1.EtcdadmClusterStatus), b.(*EtcdadmClusterStatus), scope) 78 | }); err != nil { 79 | return err 80 | } 81 | return nil 82 | } 83 | 84 | func autoConvert_v1alpha3_EtcdadmCluster_To_v1beta1_EtcdadmCluster(in *EtcdadmCluster, out *v1beta1.EtcdadmCluster, s conversion.Scope) error { 85 | out.ObjectMeta = in.ObjectMeta 86 | if err := Convert_v1alpha3_EtcdadmClusterSpec_To_v1beta1_EtcdadmClusterSpec(&in.Spec, &out.Spec, s); err != nil { 87 | return err 88 | } 89 | if err := Convert_v1alpha3_EtcdadmClusterStatus_To_v1beta1_EtcdadmClusterStatus(&in.Status, &out.Status, s); err != nil { 90 | return err 91 | } 92 | return nil 93 | } 94 | 95 | // Convert_v1alpha3_EtcdadmCluster_To_v1beta1_EtcdadmCluster is an autogenerated conversion function. 96 | func Convert_v1alpha3_EtcdadmCluster_To_v1beta1_EtcdadmCluster(in *EtcdadmCluster, out *v1beta1.EtcdadmCluster, s conversion.Scope) error { 97 | return autoConvert_v1alpha3_EtcdadmCluster_To_v1beta1_EtcdadmCluster(in, out, s) 98 | } 99 | 100 | func autoConvert_v1beta1_EtcdadmCluster_To_v1alpha3_EtcdadmCluster(in *v1beta1.EtcdadmCluster, out *EtcdadmCluster, s conversion.Scope) error { 101 | out.ObjectMeta = in.ObjectMeta 102 | if err := Convert_v1beta1_EtcdadmClusterSpec_To_v1alpha3_EtcdadmClusterSpec(&in.Spec, &out.Spec, s); err != nil { 103 | return err 104 | } 105 | if err := Convert_v1beta1_EtcdadmClusterStatus_To_v1alpha3_EtcdadmClusterStatus(&in.Status, &out.Status, s); err != nil { 106 | return err 107 | } 108 | return nil 109 | } 110 | 111 | // Convert_v1beta1_EtcdadmCluster_To_v1alpha3_EtcdadmCluster is an autogenerated conversion function. 112 | func Convert_v1beta1_EtcdadmCluster_To_v1alpha3_EtcdadmCluster(in *v1beta1.EtcdadmCluster, out *EtcdadmCluster, s conversion.Scope) error { 113 | return autoConvert_v1beta1_EtcdadmCluster_To_v1alpha3_EtcdadmCluster(in, out, s) 114 | } 115 | 116 | func autoConvert_v1alpha3_EtcdadmClusterList_To_v1beta1_EtcdadmClusterList(in *EtcdadmClusterList, out *v1beta1.EtcdadmClusterList, s conversion.Scope) error { 117 | out.ListMeta = in.ListMeta 118 | if in.Items != nil { 119 | in, out := &in.Items, &out.Items 120 | *out = make([]v1beta1.EtcdadmCluster, len(*in)) 121 | for i := range *in { 122 | if err := Convert_v1alpha3_EtcdadmCluster_To_v1beta1_EtcdadmCluster(&(*in)[i], &(*out)[i], s); err != nil { 123 | return err 124 | } 125 | } 126 | } else { 127 | out.Items = nil 128 | } 129 | return nil 130 | } 131 | 132 | // Convert_v1alpha3_EtcdadmClusterList_To_v1beta1_EtcdadmClusterList is an autogenerated conversion function. 133 | func Convert_v1alpha3_EtcdadmClusterList_To_v1beta1_EtcdadmClusterList(in *EtcdadmClusterList, out *v1beta1.EtcdadmClusterList, s conversion.Scope) error { 134 | return autoConvert_v1alpha3_EtcdadmClusterList_To_v1beta1_EtcdadmClusterList(in, out, s) 135 | } 136 | 137 | func autoConvert_v1beta1_EtcdadmClusterList_To_v1alpha3_EtcdadmClusterList(in *v1beta1.EtcdadmClusterList, out *EtcdadmClusterList, s conversion.Scope) error { 138 | out.ListMeta = in.ListMeta 139 | if in.Items != nil { 140 | in, out := &in.Items, &out.Items 141 | *out = make([]EtcdadmCluster, len(*in)) 142 | for i := range *in { 143 | if err := Convert_v1beta1_EtcdadmCluster_To_v1alpha3_EtcdadmCluster(&(*in)[i], &(*out)[i], s); err != nil { 144 | return err 145 | } 146 | } 147 | } else { 148 | out.Items = nil 149 | } 150 | return nil 151 | } 152 | 153 | // Convert_v1beta1_EtcdadmClusterList_To_v1alpha3_EtcdadmClusterList is an autogenerated conversion function. 154 | func Convert_v1beta1_EtcdadmClusterList_To_v1alpha3_EtcdadmClusterList(in *v1beta1.EtcdadmClusterList, out *EtcdadmClusterList, s conversion.Scope) error { 155 | return autoConvert_v1beta1_EtcdadmClusterList_To_v1alpha3_EtcdadmClusterList(in, out, s) 156 | } 157 | 158 | func autoConvert_v1alpha3_EtcdadmClusterSpec_To_v1beta1_EtcdadmClusterSpec(in *EtcdadmClusterSpec, out *v1beta1.EtcdadmClusterSpec, s conversion.Scope) error { 159 | out.Replicas = (*int32)(unsafe.Pointer(in.Replicas)) 160 | out.InfrastructureTemplate = in.InfrastructureTemplate 161 | if err := apiv1alpha3.Convert_v1alpha3_EtcdadmConfigSpec_To_v1beta1_EtcdadmConfigSpec(&in.EtcdadmConfigSpec, &out.EtcdadmConfigSpec, s); err != nil { 162 | return err 163 | } 164 | return nil 165 | } 166 | 167 | // Convert_v1alpha3_EtcdadmClusterSpec_To_v1beta1_EtcdadmClusterSpec is an autogenerated conversion function. 168 | func Convert_v1alpha3_EtcdadmClusterSpec_To_v1beta1_EtcdadmClusterSpec(in *EtcdadmClusterSpec, out *v1beta1.EtcdadmClusterSpec, s conversion.Scope) error { 169 | return autoConvert_v1alpha3_EtcdadmClusterSpec_To_v1beta1_EtcdadmClusterSpec(in, out, s) 170 | } 171 | 172 | func autoConvert_v1beta1_EtcdadmClusterSpec_To_v1alpha3_EtcdadmClusterSpec(in *v1beta1.EtcdadmClusterSpec, out *EtcdadmClusterSpec, s conversion.Scope) error { 173 | out.Replicas = (*int32)(unsafe.Pointer(in.Replicas)) 174 | out.InfrastructureTemplate = in.InfrastructureTemplate 175 | if err := apiv1alpha3.Convert_v1beta1_EtcdadmConfigSpec_To_v1alpha3_EtcdadmConfigSpec(&in.EtcdadmConfigSpec, &out.EtcdadmConfigSpec, s); err != nil { 176 | return err 177 | } 178 | return nil 179 | } 180 | 181 | // Convert_v1beta1_EtcdadmClusterSpec_To_v1alpha3_EtcdadmClusterSpec is an autogenerated conversion function. 182 | func Convert_v1beta1_EtcdadmClusterSpec_To_v1alpha3_EtcdadmClusterSpec(in *v1beta1.EtcdadmClusterSpec, out *EtcdadmClusterSpec, s conversion.Scope) error { 183 | return autoConvert_v1beta1_EtcdadmClusterSpec_To_v1alpha3_EtcdadmClusterSpec(in, out, s) 184 | } 185 | 186 | func autoConvert_v1alpha3_EtcdadmClusterStatus_To_v1beta1_EtcdadmClusterStatus(in *EtcdadmClusterStatus, out *v1beta1.EtcdadmClusterStatus, s conversion.Scope) error { 187 | out.ReadyReplicas = in.ReadyReplicas 188 | out.InitMachineAddress = in.InitMachineAddress 189 | out.Initialized = in.Initialized 190 | out.Ready = in.Ready 191 | out.CreationComplete = in.CreationComplete 192 | out.Endpoints = in.Endpoints 193 | out.Selector = in.Selector 194 | out.ObservedGeneration = in.ObservedGeneration 195 | out.Conditions = *(*apiv1beta1.Conditions)(unsafe.Pointer(&in.Conditions)) 196 | return nil 197 | } 198 | 199 | // Convert_v1alpha3_EtcdadmClusterStatus_To_v1beta1_EtcdadmClusterStatus is an autogenerated conversion function. 200 | func Convert_v1alpha3_EtcdadmClusterStatus_To_v1beta1_EtcdadmClusterStatus(in *EtcdadmClusterStatus, out *v1beta1.EtcdadmClusterStatus, s conversion.Scope) error { 201 | return autoConvert_v1alpha3_EtcdadmClusterStatus_To_v1beta1_EtcdadmClusterStatus(in, out, s) 202 | } 203 | 204 | func autoConvert_v1beta1_EtcdadmClusterStatus_To_v1alpha3_EtcdadmClusterStatus(in *v1beta1.EtcdadmClusterStatus, out *EtcdadmClusterStatus, s conversion.Scope) error { 205 | out.ReadyReplicas = in.ReadyReplicas 206 | out.InitMachineAddress = in.InitMachineAddress 207 | out.Initialized = in.Initialized 208 | out.Ready = in.Ready 209 | out.CreationComplete = in.CreationComplete 210 | out.Endpoints = in.Endpoints 211 | out.Selector = in.Selector 212 | out.ObservedGeneration = in.ObservedGeneration 213 | out.Conditions = *(*clusterapiapiv1alpha3.Conditions)(unsafe.Pointer(&in.Conditions)) 214 | return nil 215 | } 216 | 217 | // Convert_v1beta1_EtcdadmClusterStatus_To_v1alpha3_EtcdadmClusterStatus is an autogenerated conversion function. 218 | func Convert_v1beta1_EtcdadmClusterStatus_To_v1alpha3_EtcdadmClusterStatus(in *v1beta1.EtcdadmClusterStatus, out *EtcdadmClusterStatus, s conversion.Scope) error { 219 | return autoConvert_v1beta1_EtcdadmClusterStatus_To_v1alpha3_EtcdadmClusterStatus(in, out, s) 220 | } 221 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /controllers/periodic_healthcheck_test.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "net/http" 8 | "strings" 9 | "testing" 10 | 11 | . "github.com/onsi/gomega" 12 | 13 | "github.com/aws/etcdadm-controller/controllers/mocks" 14 | "github.com/golang/mock/gomock" 15 | "k8s.io/apimachinery/pkg/types" 16 | 17 | clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" 18 | "sigs.k8s.io/controller-runtime/pkg/client/fake" 19 | "sigs.k8s.io/controller-runtime/pkg/log" 20 | ) 21 | 22 | func TestStartHealthCheckLoop(t *testing.T) { 23 | _ = NewWithT(t) 24 | 25 | ctrl := gomock.NewController(t) 26 | mockEtcd := mocks.NewMockEtcdClient(ctrl) 27 | mockRt := mocks.NewMockRoundTripper(ctrl) 28 | 29 | etcdTest := newEtcdadmClusterTest(3) 30 | etcdTest.buildClusterWithExternalEtcd() 31 | etcdTest.etcdadmCluster.Status.CreationComplete = true 32 | 33 | fakeKubernetesClient := fake.NewClientBuilder().WithScheme(setupScheme()).WithObjects(etcdTest.gatherObjects()...).Build() 34 | 35 | etcdEtcdClient := func(ctx context.Context, cluster *clusterv1.Cluster, endpoints string) (EtcdClient, error) { 36 | return mockEtcd, nil 37 | } 38 | 39 | r := &EtcdadmClusterReconciler{ 40 | Client: fakeKubernetesClient, 41 | uncachedClient: fakeKubernetesClient, 42 | Log: log.Log, 43 | GetEtcdClient: etcdEtcdClient, 44 | } 45 | mockHttpClient := &http.Client{ 46 | Transport: mockRt, 47 | } 48 | 49 | r.etcdHealthCheckConfig.clusterToHttpClient.Store(etcdTest.cluster.UID, mockHttpClient) 50 | r.SetIsPortOpen(isPortOpenMock) 51 | 52 | mockRt.EXPECT().RoundTrip(gomock.Any()).Return(getHealthyEtcdResponse(), nil).Times(3) 53 | 54 | etcdadmClusterMapper := make(map[types.UID]etcdadmClusterMemberHealthConfig) 55 | r.startHealthCheck(context.Background(), etcdadmClusterMapper) 56 | } 57 | 58 | func TestStartHealthCheckLoopWithNoRetries(t *testing.T) { 59 | g := NewWithT(t) 60 | ctrl := gomock.NewController(t) 61 | mockEtcd := mocks.NewMockEtcdClient(ctrl) 62 | mockRt := mocks.NewMockRoundTripper(ctrl) 63 | 64 | etcdTest := newEtcdadmClusterTest(3) 65 | etcdTest.buildClusterWithExternalEtcd().withHealthCheckRetries(0) 66 | etcdTest.etcdadmCluster.Status.CreationComplete = true 67 | 68 | fakeKubernetesClient := fake.NewClientBuilder().WithScheme(setupScheme()).WithObjects(etcdTest.gatherObjects()...).Build() 69 | 70 | mockEtcdClient := func(ctx context.Context, cluster *clusterv1.Cluster, endpoints string) (EtcdClient, error) { 71 | return mockEtcd, nil 72 | } 73 | 74 | r := &EtcdadmClusterReconciler{ 75 | Client: fakeKubernetesClient, 76 | uncachedClient: fakeKubernetesClient, 77 | Log: log.Log, 78 | GetEtcdClient: mockEtcdClient, 79 | } 80 | mockHttpClient := &http.Client{ 81 | Transport: mockRt, 82 | } 83 | 84 | r.etcdHealthCheckConfig.clusterToHttpClient.Store(etcdTest.cluster.UID, mockHttpClient) 85 | r.SetIsPortOpen(isPortOpenMock) 86 | 87 | mockRt.EXPECT().RoundTrip(gomock.Any()).Return(nil, errors.New("error")).Times(3) 88 | 89 | etcdadmClusterMapper := make(map[types.UID]etcdadmClusterMemberHealthConfig) 90 | r.startHealthCheck(context.Background(), etcdadmClusterMapper) 91 | 92 | g.Expect(etcdadmClusterMapper).To(BeEmpty()) 93 | } 94 | 95 | func TestStartHealthCheckLoopWithCustomRetries(t *testing.T) { 96 | g := NewWithT(t) 97 | ctrl := gomock.NewController(t) 98 | mockEtcd := mocks.NewMockEtcdClient(ctrl) 99 | mockRt := mocks.NewMockRoundTripper(ctrl) 100 | etcdadmClusterMapper := make(map[types.UID]etcdadmClusterMemberHealthConfig) 101 | 102 | etcdTest := newEtcdadmClusterTest(3) 103 | etcdTest.buildClusterWithExternalEtcd().withHealthCheckRetries(3) 104 | etcdTest.etcdadmCluster.Status.CreationComplete = true 105 | 106 | ip := etcdTest.machines[0].Status.Addresses[0].Address 107 | 108 | fakeKubernetesClient := fake.NewClientBuilder().WithScheme(setupScheme()).WithObjects(etcdTest.gatherObjects()...).Build() 109 | mockEtcdClient := func(ctx context.Context, cluster *clusterv1.Cluster, endpoints string) (EtcdClient, error) { 110 | return mockEtcd, nil 111 | } 112 | 113 | r := &EtcdadmClusterReconciler{ 114 | Client: fakeKubernetesClient, 115 | uncachedClient: fakeKubernetesClient, 116 | Log: log.Log, 117 | GetEtcdClient: mockEtcdClient, 118 | } 119 | mockHttpClient := &http.Client{ 120 | Transport: RoundTripperFunc(func(req *http.Request) (*http.Response, error) { 121 | if strings.Contains(req.Host, ip) { 122 | return nil, fmt.Errorf("Error") 123 | } 124 | return getHealthyEtcdResponse(), nil 125 | }), 126 | } 127 | 128 | r.etcdHealthCheckConfig.clusterToHttpClient.Store(etcdTest.cluster.UID, mockHttpClient) 129 | r.SetIsPortOpen(isPortOpenMock) 130 | 131 | mockRt.EXPECT().RoundTrip(gomock.Any()).Return(nil, errors.New("error")).Times(9) 132 | mockEtcd.EXPECT().MemberList(gomock.Any()).Return(etcdTest.getMemberListResponse(), nil).Times(3) 133 | mockEtcd.EXPECT().MemberRemove(gomock.Any(), gomock.Any()).Return(etcdTest.getMemberRemoveResponse(), nil).Times(1) 134 | mockEtcd.EXPECT().Close().Times(3) 135 | 136 | r.startHealthCheck(context.Background(), etcdadmClusterMapper) 137 | g.Expect(etcdTest.getDeletedMachines(fakeKubernetesClient)).To(BeEmpty()) 138 | 139 | r.startHealthCheck(context.Background(), etcdadmClusterMapper) 140 | g.Expect(etcdTest.getDeletedMachines(fakeKubernetesClient)).To(BeEmpty()) 141 | 142 | r.startHealthCheck(context.Background(), etcdadmClusterMapper) 143 | g.Expect(etcdTest.getDeletedMachines(fakeKubernetesClient)).To(HaveLen(1)) 144 | } 145 | 146 | func TestReconcilePeriodicHealthCheckMachineToBeDeletedNowHealthy(t *testing.T) { 147 | g := NewWithT(t) 148 | 149 | ctrl := gomock.NewController(t) 150 | mockEtcd := mocks.NewMockEtcdClient(ctrl) 151 | mockRt := mocks.NewMockRoundTripper(ctrl) 152 | 153 | etcdadmCluster := newEtcdadmClusterTest(1) 154 | etcdadmCluster.buildClusterWithExternalEtcd() 155 | etcdadmCluster.etcdadmCluster.Status.CreationComplete = true 156 | 157 | fakeKubernetesClient := fake.NewClientBuilder().WithScheme(setupScheme()).WithObjects(etcdadmCluster.gatherObjects()...).Build() 158 | 159 | etcdEtcdClient := func(ctx context.Context, cluster *clusterv1.Cluster, endpoints string) (EtcdClient, error) { 160 | return mockEtcd, nil 161 | } 162 | 163 | r := &EtcdadmClusterReconciler{ 164 | Client: fakeKubernetesClient, 165 | uncachedClient: fakeKubernetesClient, 166 | Log: log.Log, 167 | GetEtcdClient: etcdEtcdClient, 168 | } 169 | mockHttpClient := &http.Client{ 170 | Transport: mockRt, 171 | } 172 | 173 | r.etcdHealthCheckConfig.clusterToHttpClient.Store(etcdadmCluster.cluster.UID, mockHttpClient) 174 | r.SetIsPortOpen(isPortOpenMock) 175 | 176 | etcdadmClusterMapper := make(map[types.UID]etcdadmClusterMemberHealthConfig) 177 | 178 | mockRt.EXPECT().RoundTrip(gomock.Any()).Return(nil, errors.New("error")).Times(5) 179 | mockEtcd.EXPECT().MemberList(gomock.Any()).Return(etcdadmCluster.getMemberListResponse(), nil).Times(5) 180 | mockEtcd.EXPECT().Close().Times(5) 181 | 182 | for i := 0; i < 5; i++ { 183 | r.startHealthCheck(context.Background(), etcdadmClusterMapper) 184 | } 185 | 186 | g.Expect(etcdadmClusterMapper[etcdadmCluster.etcdadmCluster.UID].unhealthyMembersToRemove).To(HaveLen(1)) 187 | 188 | mockRt.EXPECT().RoundTrip(gomock.Any()).Return(getHealthyEtcdResponse(), nil) 189 | r.startHealthCheck(context.Background(), etcdadmClusterMapper) 190 | 191 | g.Expect(etcdadmClusterMapper[etcdadmCluster.etcdadmCluster.UID].unhealthyMembersToRemove).To(HaveLen(0)) 192 | } 193 | 194 | func TestQuorumNotPreserved(t *testing.T) { 195 | g := NewWithT(t) 196 | 197 | ctrl := gomock.NewController(t) 198 | mockEtcd := mocks.NewMockEtcdClient(ctrl) 199 | mockRt := mocks.NewMockRoundTripper(ctrl) 200 | 201 | etcdTest := newEtcdadmClusterTest(3) 202 | etcdTest.buildClusterWithExternalEtcd() 203 | etcdTest.etcdadmCluster.Status.CreationComplete = true 204 | 205 | fakeKubernetesClient := fake.NewClientBuilder().WithScheme(setupScheme()).WithObjects(etcdTest.gatherObjects()...).Build() 206 | 207 | etcdEtcdClient := func(ctx context.Context, cluster *clusterv1.Cluster, endpoints string) (EtcdClient, error) { 208 | return mockEtcd, nil 209 | } 210 | 211 | r := &EtcdadmClusterReconciler{ 212 | Client: fakeKubernetesClient, 213 | uncachedClient: fakeKubernetesClient, 214 | Log: log.Log, 215 | GetEtcdClient: etcdEtcdClient, 216 | } 217 | mockHttpClient := &http.Client{ 218 | Transport: mockRt, 219 | } 220 | 221 | r.etcdHealthCheckConfig.clusterToHttpClient.Store(etcdTest.cluster.UID, mockHttpClient) 222 | r.SetIsPortOpen(isPortOpenMock) 223 | 224 | etcdadmClusterMapper := make(map[types.UID]etcdadmClusterMemberHealthConfig) 225 | 226 | mockRt.EXPECT().RoundTrip(gomock.Any()).Return(nil, errors.New("error")).Times(15) 227 | mockEtcd.EXPECT().MemberList(gomock.Any()).Return(etcdTest.getMemberListResponse(), nil).Times(5) 228 | mockEtcd.EXPECT().Close().Times(5) 229 | 230 | for i := 0; i < 5; i++ { 231 | r.startHealthCheck(context.Background(), etcdadmClusterMapper) 232 | } 233 | 234 | unhealthyList := len(etcdadmClusterMapper[etcdTest.etcdadmCluster.UID].unhealthyMembersToRemove) 235 | g.Expect(unhealthyList).To(Equal(3)) 236 | 237 | mockRt.EXPECT().RoundTrip(gomock.Any()).Return(nil, errors.New("error")).Times(9) 238 | mockEtcd.EXPECT().MemberList(gomock.Any()).Times(3) 239 | mockEtcd.EXPECT().Close().Times(3) 240 | for i := 0; i < 3; i++ { 241 | r.startHealthCheck(context.Background(), etcdadmClusterMapper) 242 | } 243 | 244 | machineList := &clusterv1.MachineList{} 245 | g.Expect(fakeKubernetesClient.List(context.Background(), machineList)).To(Succeed()) 246 | for _, m := range machineList.Items { 247 | g.Expect(m.DeletionTimestamp.IsZero()).To(BeTrue()) 248 | } 249 | g.Expect(etcdadmClusterMapper[etcdTest.etcdadmCluster.UID].unhealthyMembersToRemove).To(HaveLen(3)) 250 | } 251 | 252 | func TestQuorumPreserved(t *testing.T) { 253 | g := NewWithT(t) 254 | 255 | ctrl := gomock.NewController(t) 256 | mockEtcd := mocks.NewMockEtcdClient(ctrl) 257 | 258 | etcdTest := newEtcdadmClusterTest(5) 259 | etcdTest.buildClusterWithExternalEtcd() 260 | etcdTest.etcdadmCluster.Status.CreationComplete = true 261 | 262 | ip1 := etcdTest.machines[0].Status.Addresses[0].Address 263 | ip2 := etcdTest.machines[1].Status.Addresses[0].Address 264 | 265 | fakeKubernetesClient := fake.NewClientBuilder().WithScheme(setupScheme()).WithObjects(etcdTest.gatherObjects()...).Build() 266 | 267 | etcdEtcdClient := func(ctx context.Context, cluster *clusterv1.Cluster, endpoints string) (EtcdClient, error) { 268 | return mockEtcd, nil 269 | } 270 | 271 | r := &EtcdadmClusterReconciler{ 272 | Client: fakeKubernetesClient, 273 | uncachedClient: fakeKubernetesClient, 274 | Log: log.Log, 275 | GetEtcdClient: etcdEtcdClient, 276 | } 277 | mockHttpClient := &http.Client{ 278 | Transport: RoundTripperFunc(func(req *http.Request) (*http.Response, error) { 279 | if strings.Contains(req.Host, ip1) || strings.Contains(req.Host, ip2) { 280 | return nil, fmt.Errorf("Error") 281 | } 282 | return getHealthyEtcdResponse(), nil 283 | }), 284 | } 285 | 286 | r.etcdHealthCheckConfig.clusterToHttpClient.Store(etcdTest.cluster.UID, mockHttpClient) 287 | r.SetIsPortOpen(isPortOpenMock) 288 | 289 | etcdadmClusterMapper := make(map[types.UID]etcdadmClusterMemberHealthConfig) 290 | 291 | mockEtcd.EXPECT().MemberList(gomock.Any()).Return(etcdTest.getMemberListResponse(), nil).Times(5) 292 | mockEtcd.EXPECT().MemberRemove(gomock.Any(), gomock.Any()).Return(etcdTest.getMemberRemoveResponse(), nil).Times(1) 293 | mockEtcd.EXPECT().Close().Times(5) 294 | for i := 0; i < 5; i++ { 295 | r.startHealthCheck(context.Background(), etcdadmClusterMapper) 296 | } 297 | 298 | g.Expect(etcdadmClusterMapper[etcdTest.etcdadmCluster.UID].unhealthyMembersFrequency).To(HaveLen(2)) 299 | g.Expect(etcdTest.getDeletedMachines(fakeKubernetesClient)).To(HaveLen(1)) 300 | } 301 | 302 | type RoundTripperFunc func(*http.Request) (*http.Response, error) 303 | 304 | func (fn RoundTripperFunc) RoundTrip(r *http.Request) (*http.Response, error) { 305 | return fn(r) 306 | } 307 | 308 | func isPortOpenMock(_ context.Context, _ string) bool { 309 | return true 310 | } 311 | -------------------------------------------------------------------------------- /controllers/etcd_plane.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "context" 5 | "reflect" 6 | 7 | etcdbootstrapv1 "github.com/aws/etcdadm-bootstrap-provider/api/v1beta1" 8 | etcdv1 "github.com/aws/etcdadm-controller/api/v1beta1" 9 | "github.com/pkg/errors" 10 | corev1 "k8s.io/api/core/v1" 11 | apierrors "k8s.io/apimachinery/pkg/api/errors" 12 | "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 13 | clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" 14 | "sigs.k8s.io/cluster-api/controllers/external" 15 | "sigs.k8s.io/cluster-api/util/collections" 16 | "sigs.k8s.io/cluster-api/util/failuredomains" 17 | "sigs.k8s.io/cluster-api/util/patch" 18 | "sigs.k8s.io/controller-runtime/pkg/client" 19 | ) 20 | 21 | type EtcdPlane struct { 22 | EC *etcdv1.EtcdadmCluster 23 | Cluster *clusterv1.Cluster 24 | Machines collections.Machines 25 | machinesPatchHelpers map[string]*patch.Helper 26 | etcdadmConfigs map[string]*etcdbootstrapv1.EtcdadmConfig 27 | infraResources map[string]*unstructured.Unstructured 28 | } 29 | 30 | func NewEtcdPlane(ctx context.Context, client client.Client, cluster *clusterv1.Cluster, ec *etcdv1.EtcdadmCluster, ownedMachines collections.Machines) (*EtcdPlane, error) { 31 | infraObjects, err := getInfraResources(ctx, client, ownedMachines) 32 | if err != nil { 33 | return nil, err 34 | } 35 | etcdadmConfigs, err := getEtcdadmConfigs(ctx, client, ownedMachines) 36 | if err != nil { 37 | return nil, err 38 | } 39 | patchHelpers := map[string]*patch.Helper{} 40 | for _, machine := range ownedMachines { 41 | patchHelper, err := patch.NewHelper(machine, client) 42 | if err != nil { 43 | return nil, errors.Wrapf(err, "failed to create patch helper for machine %s", machine.Name) 44 | } 45 | patchHelpers[machine.Name] = patchHelper 46 | } 47 | 48 | return &EtcdPlane{ 49 | EC: ec, 50 | Cluster: cluster, 51 | Machines: ownedMachines, 52 | machinesPatchHelpers: patchHelpers, 53 | infraResources: infraObjects, 54 | etcdadmConfigs: etcdadmConfigs, 55 | }, nil 56 | } 57 | 58 | // Etcdadm controller follows the same logic for selecting a machine to scale down as the KCP controller. Source: https://github.com/kubernetes-sigs/cluster-api/blob/master/controlplane/kubeadm/controllers/scale.go#L234 59 | func selectMachineForScaleDown(ep *EtcdPlane, outdatedMachines collections.Machines) (*clusterv1.Machine, error) { 60 | machines := ep.Machines 61 | switch { 62 | case ep.MachineWithDeleteAnnotation(outdatedMachines).Len() > 0: 63 | machines = ep.MachineWithDeleteAnnotation(outdatedMachines) 64 | case ep.MachineWithDeleteAnnotation(machines).Len() > 0: 65 | machines = ep.MachineWithDeleteAnnotation(machines) 66 | case outdatedMachines.Len() > 0: 67 | machines = outdatedMachines 68 | } 69 | return ep.MachineInFailureDomainWithMostMachines(machines) 70 | } 71 | 72 | // MachineWithDeleteAnnotation returns a machine that has been annotated with DeleteMachineAnnotation key. 73 | func (ep *EtcdPlane) MachineWithDeleteAnnotation(machines collections.Machines) collections.Machines { 74 | // See if there are any machines with DeleteMachineAnnotation key. 75 | annotatedMachines := machines.Filter(collections.HasAnnotationKey(clusterv1.DeleteMachineAnnotation)) 76 | // If there are, return list of annotated machines. 77 | return annotatedMachines 78 | } 79 | 80 | // All functions related to failureDomains follow the same logic as KCP's failureDomain implementation, to leverage existing methods 81 | // FailureDomainWithMostMachines returns a fd which has the most machines on it. 82 | func (ep *EtcdPlane) FailureDomainWithMostMachines(machines collections.Machines) *string { 83 | // Get failure domain IDs 84 | failureDomainIDs := make([]string, 0, len(ep.FailureDomains())) 85 | for _, fd := range ep.FailureDomains() { 86 | failureDomainIDs = append(failureDomainIDs, fd.Name) 87 | } 88 | 89 | // See if there are any Machines that are not in currently defined failure domains first. 90 | notInFailureDomains := machines.Filter( 91 | collections.Not(collections.InFailureDomains(failureDomainIDs...)), 92 | ) 93 | if len(notInFailureDomains) > 0 { 94 | // return the failure domain for the oldest Machine not in the current list of failure domains 95 | // this could be either nil (no failure domain defined) or a failure domain that is no longer defined 96 | // in the cluster status. 97 | return ¬InFailureDomains.Oldest().Spec.FailureDomain 98 | } 99 | result := failuredomains.PickMost(context.TODO(), ep.Cluster.Status.FailureDomains, ep.Machines, machines) 100 | return &result 101 | } 102 | 103 | // MachineInFailureDomainWithMostMachines returns the first matching failure domain with machines that has the most control-plane machines on it. 104 | func (ep *EtcdPlane) MachineInFailureDomainWithMostMachines(machines collections.Machines) (*clusterv1.Machine, error) { 105 | fd := ep.FailureDomainWithMostMachines(machines) 106 | var fdStr string 107 | if fd != nil { 108 | fdStr = *fd 109 | } 110 | machinesInFailureDomain := machines.Filter(collections.InFailureDomains(fdStr)) 111 | machineToMark := machinesInFailureDomain.Oldest() 112 | if machineToMark == nil { 113 | return nil, errors.New("failed to pick control plane Machine to mark for deletion") 114 | } 115 | return machineToMark, nil 116 | } 117 | 118 | // NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date machines. 119 | func (ep *EtcdPlane) NextFailureDomainForScaleUp() *string { 120 | if len(ep.Cluster.Status.FailureDomains) == 0 { 121 | return nil 122 | } 123 | result := failuredomains.PickFewest(context.TODO(), ep.FailureDomains(), ep.UpToDateMachines(), collections.Machines{}) 124 | return &result 125 | } 126 | 127 | // FailureDomains returns a slice of failure domain objects synced from the infrastructure provider into Cluster.Status. 128 | func (ep *EtcdPlane) FailureDomains() []clusterv1.FailureDomain { 129 | if ep.Cluster.Status.FailureDomains == nil { 130 | return []clusterv1.FailureDomain{} 131 | } 132 | return ep.Cluster.Status.FailureDomains 133 | } 134 | 135 | // UpToDateMachines returns the machines that are up to date with the control 136 | // plane's configuration and therefore do not require rollout. 137 | func (ep *EtcdPlane) UpToDateMachines() collections.Machines { 138 | return ep.Machines.Difference(ep.MachinesNeedingRollout()) 139 | } 140 | 141 | func (ep *EtcdPlane) NewestUpToDateMachine() *clusterv1.Machine { 142 | upToDateMachines := ep.UpToDateMachines() 143 | return upToDateMachines.Newest() 144 | } 145 | 146 | // MachinesNeedingRollout return a list of machines that need to be rolled out. 147 | func (ep *EtcdPlane) MachinesNeedingRollout() collections.Machines { 148 | // Ignore machines to be deleted. 149 | machines := ep.Machines.Filter(collections.Not(collections.HasDeletionTimestamp)) 150 | 151 | // Return machines if they are scheduled for rollout or if with an outdated configuration. 152 | return machines.AnyFilter( 153 | //Machines that do not match with Etcdadm config. 154 | collections.Not(MatchesEtcdadmClusterConfiguration(ep.infraResources, ep.etcdadmConfigs, ep.EC)), 155 | ) 156 | } 157 | 158 | // OutOfDateMachines return a list of all machines with an out of date config. 159 | func (ep *EtcdPlane) OutOfDateMachines() collections.Machines { 160 | // Return machines if they are scheduled for rollout or if with an outdated configuration. 161 | return ep.Machines.AnyFilter( 162 | //Machines that do not match with Etcdadm config. 163 | collections.Not(MatchesEtcdadmClusterConfiguration(ep.infraResources, ep.etcdadmConfigs, ep.EC)), 164 | ) 165 | } 166 | 167 | // MatchesEtcdadmClusterConfiguration returns a filter to find all machines that matches with EtcdadmCluster config and do not require any rollout. 168 | // Etcd version and extra params, and infrastructure template need to be equivalent. 169 | func MatchesEtcdadmClusterConfiguration(infraConfigs map[string]*unstructured.Unstructured, machineConfigs map[string]*etcdbootstrapv1.EtcdadmConfig, ec *etcdv1.EtcdadmCluster) func(machine *clusterv1.Machine) bool { 170 | return collections.And( 171 | MatchesEtcdadmConfig(machineConfigs, ec), 172 | MatchesTemplateClonedFrom(infraConfigs, ec), 173 | ) 174 | } 175 | 176 | // MatchesEtcdadmConfig checks if machine's EtcdadmConfigSpec is equivalent with EtcdadmCluster's spec 177 | func MatchesEtcdadmConfig(machineConfigs map[string]*etcdbootstrapv1.EtcdadmConfig, ec *etcdv1.EtcdadmCluster) collections.Func { 178 | return func(machine *clusterv1.Machine) bool { 179 | if machine == nil { 180 | return false 181 | } 182 | etcdadmConfig, found := machineConfigs[machine.Name] 183 | if !found { 184 | // Return true here because failing to get EtcdadmConfig should not be considered as unmatching. 185 | // This is a safety precaution to avoid rolling out machines if the client or the api-server is misbehaving. 186 | return true 187 | } 188 | 189 | ecConfig := ec.Spec.EtcdadmConfigSpec.DeepCopy() 190 | return reflect.DeepEqual(&etcdadmConfig.Spec, ecConfig) 191 | } 192 | } 193 | 194 | // MatchesTemplateClonedFrom returns a filter to find all machines that match a given EtcdadmCluster's infra template. 195 | func MatchesTemplateClonedFrom(infraConfigs map[string]*unstructured.Unstructured, ec *etcdv1.EtcdadmCluster) collections.Func { 196 | return func(machine *clusterv1.Machine) bool { 197 | if machine == nil { 198 | return false 199 | } 200 | infraObj, found := infraConfigs[machine.Name] 201 | if !found { 202 | // Return true here because failing to get infrastructure machine should not be considered as unmatching. 203 | return true 204 | } 205 | 206 | clonedFromName, ok1 := infraObj.GetAnnotations()[clusterv1.TemplateClonedFromNameAnnotation] 207 | clonedFromGroupKind, ok2 := infraObj.GetAnnotations()[clusterv1.TemplateClonedFromGroupKindAnnotation] 208 | if !ok1 || !ok2 { 209 | // All etcdadmCluster cloned infra machines should have this annotation. 210 | // Missing the annotation may be due to older version machines or adopted machines. 211 | // Should not be considered as mismatch. 212 | return true 213 | } 214 | 215 | // Check if the machine's infrastructure reference has been created from the current etcdadmCluster infrastructure template. 216 | if clonedFromName != ec.Spec.InfrastructureTemplate.Name || 217 | clonedFromGroupKind != ec.Spec.InfrastructureTemplate.GroupVersionKind().GroupKind().String() { 218 | return false 219 | } 220 | return true 221 | } 222 | } 223 | 224 | // getInfraResources fetches the external infrastructure resource for each machine in the collection and returns a map of machine.Name -> infraResource. 225 | func getInfraResources(ctx context.Context, cl client.Client, machines collections.Machines) (map[string]*unstructured.Unstructured, error) { 226 | result := map[string]*unstructured.Unstructured{} 227 | for _, m := range machines { 228 | // Convert ContractVersionedObjectReference to ObjectReference 229 | // Use the APIGroup from the machine's InfrastructureRef and assume v1beta1 version 230 | apiVersion := m.Spec.InfrastructureRef.APIGroup + "/v1beta1" 231 | infraRef := &corev1.ObjectReference{ 232 | APIVersion: apiVersion, 233 | Kind: m.Spec.InfrastructureRef.Kind, 234 | Name: m.Spec.InfrastructureRef.Name, 235 | Namespace: m.Namespace, 236 | } 237 | infraObj, err := external.Get(ctx, cl, infraRef) 238 | if err != nil { 239 | if apierrors.IsNotFound(errors.Cause(err)) { 240 | continue 241 | } 242 | return nil, errors.Wrapf(err, "failed to retrieve infra obj for machine %q", m.Name) 243 | } 244 | result[m.Name] = infraObj 245 | } 246 | return result, nil 247 | } 248 | 249 | // getEtcdadmConfigs fetches the etcdadm config for each machine in the collection and returns a map of machine.Name -> EtcdadmConfig. 250 | func getEtcdadmConfigs(ctx context.Context, cl client.Client, machines collections.Machines) (map[string]*etcdbootstrapv1.EtcdadmConfig, error) { 251 | result := map[string]*etcdbootstrapv1.EtcdadmConfig{} 252 | for _, m := range machines { 253 | bootstrapRef := m.Spec.Bootstrap.ConfigRef 254 | if !bootstrapRef.IsDefined() { 255 | continue 256 | } 257 | machineConfig := &etcdbootstrapv1.EtcdadmConfig{} 258 | if err := cl.Get(ctx, client.ObjectKey{Name: bootstrapRef.Name, Namespace: m.Namespace}, machineConfig); err != nil { 259 | if apierrors.IsNotFound(errors.Cause(err)) { 260 | continue 261 | } 262 | return nil, errors.Wrapf(err, "failed to retrieve bootstrap config for machine %q", m.Name) 263 | } 264 | result[m.Name] = machineConfig 265 | } 266 | return result, nil 267 | } 268 | --------------------------------------------------------------------------------