├── sonobuoy ├── setup ├── vault.hcl ├── cke.config ├── wait-up.sh ├── cke.config.yml ├── etcd.conf.yml ├── etcd-entrypoint.sh ├── vault-entrypoint.sh ├── .gitignore ├── PRODUCT.yaml ├── cke-cluster.yml.template ├── worker-ign.yml └── docker-compose.yml ├── tools ├── .gitignore ├── .dockerignore ├── Dockerfile ├── README.md ├── empty-dir │ └── main.go ├── rivers │ ├── di_test.go │ ├── health_test.go │ ├── README.md │ └── upstream_test.go ├── Makefile ├── make_directories │ └── main.go ├── install-cni │ └── main.go └── RELEASE.md ├── example ├── .gitignore ├── cke.config ├── cke.config.yml ├── cke-entrypoint.sh ├── etcd-entrypoint.sh ├── setup │ ├── cke-policy.hcl │ ├── test.sh │ └── admin-policy.hcl ├── wait-up.sh ├── vault.hcl ├── vault-entrypoint.sh ├── cke-cluster.yml ├── Vagrantfile └── etcd.conf.yml ├── mtest ├── cke.yml ├── mscp ├── test.sh ├── mssh ├── .gitignore ├── kubeconfig.yml ├── httpd.yml ├── tools_test.go ├── README.md ├── reboot-job-completed.yaml ├── reboot-job-running.yaml ├── ssh_config ├── robustness_test.go ├── repair-deployment.yaml ├── reboot-deployment.yaml ├── env_test.go ├── reboot-slow-eviction-deployment.yaml ├── reboot-alittleslow-eviction-deployment.yaml ├── assets_test.go ├── upgrade_test.go ├── localproxy_test.go ├── mtest_key ├── webhook-resources.yaml └── reboot-eviction-dry-run.yaml ├── docker ├── .gitignore ├── install-tools └── Dockerfile ├── op ├── common │ ├── doc.go │ ├── image_pull.go │ ├── volume.go │ └── mkdirs.go ├── k8s │ ├── cni.go │ ├── scheduler_restart.go │ ├── controller_manager_restart.go │ ├── encryption.go │ └── proxy_restart.go ├── etcd.go ├── etcd │ ├── wait.go │ ├── mark.go │ ├── destroy.go │ ├── start.go │ └── restart.go ├── status_test.go ├── repair_dequeue.go ├── clusterdns │ ├── clusterdns.go │ ├── update_config_map.go │ └── create_configmap.go ├── rivers_restart.go ├── nodedns │ └── update_configmap.go ├── kube_node_update.go ├── resource.go ├── rivers_boot.go ├── kube_wait.go ├── upgrade.go ├── repair_drain_timeout.go └── stop.go ├── pkg ├── ckecli │ ├── main.go │ └── cmd │ │ ├── resource.go │ │ ├── etcd.go │ │ ├── repair_queue.go │ │ ├── vault.go │ │ ├── cluster.go │ │ ├── sabakan.go │ │ ├── kubernetes.go │ │ ├── auto_repair.go │ │ ├── constraints.go │ │ ├── reboot_queue.go │ │ ├── ca.go │ │ ├── sabakan_enable.go │ │ ├── sabakan_disable.go │ │ ├── reboot_queue_enable.go │ │ ├── repair_queue_enable.go │ │ ├── reboot_queue_disable.go │ │ ├── repair_queue_disable.go │ │ ├── auto_repair_enable.go │ │ ├── auto_repair_disable.go │ │ ├── images.go │ │ ├── resource_list.go │ │ ├── sabakan_get_url.go │ │ ├── sabakan_is_enabled.go │ │ ├── leader.go │ │ ├── completion.go │ │ ├── resource_get.go │ │ ├── reboot_queue_is_enabled.go │ │ ├── repair_queue_is_enabled.go │ │ ├── auto_repair_is_enabled.go │ │ ├── status.go │ │ ├── constraints_show.go │ │ ├── sabakan_get_variables.go │ │ ├── cluster_get.go │ │ ├── auto_repair_get_variables.go │ │ ├── repair_queue_list.go │ │ ├── sabakan_get_temlate.go │ │ ├── reboot_queue_cancel.go │ │ ├── repair_queue_delete.go │ │ ├── sabakan_set_url.go │ │ ├── reboot_queue_add_test.go │ │ ├── ca_get.go │ │ ├── reboot_queue_reset_backoff.go │ │ ├── reboot_queue_cancel_all.go │ │ ├── repair_queue_reset_backoff.go │ │ ├── repair_queue_delete_finished.go │ │ ├── sabakan_set_template.go │ │ ├── repair_queue_delete_unfinished.go │ │ ├── etcd_useradd.go │ │ ├── repair_queue_add.go │ │ ├── cluster_set.go │ │ ├── resource_delete.go │ │ ├── vault_config.go │ │ ├── resource_set.go │ │ ├── ca_set.go │ │ ├── history.go │ │ ├── sabakan_set_variables.go │ │ ├── auto_repair_set_variables.go │ │ ├── vault_ssh_privkey.go │ │ ├── reboot_queue_add.go │ │ ├── reboot_queue_list.go │ │ └── scp.go └── cke-localproxy │ └── main.go ├── logo └── LICENSE ├── sabakan ├── constants.go ├── mock │ ├── gqlgen.yml │ ├── server.go │ └── schema.graphql ├── op.go ├── template.go └── score.go ├── tools.go ├── version.go ├── bin ├── env ├── env-sonobuoy ├── watch_service └── run-mtest.sh ├── .gitignore ├── server ├── config.go ├── responses.go ├── apierror.go ├── server.go ├── watch.go └── integrator.go ├── .github ├── ISSUE_TEMPLATE │ ├── issue.md │ └── bug_report.md └── workflows │ ├── sonobuoy.yaml │ └── release-tools.yaml ├── cluster_config.go ├── collections.go ├── docs ├── container-runtime.md ├── api.md ├── constraints.md ├── cke-localproxy.md ├── record.md ├── faq.md ├── logging.md ├── cke.md ├── user-resources.md └── mtest.md ├── metrics ├── collector_test.go └── updater.go ├── images.go ├── static └── rbac.yml ├── operation.go ├── constraints_test.go ├── localproxy └── strategy.go ├── constraints.go ├── kubeconfig.go ├── etcd_util.go ├── main_test.go ├── phase.go ├── record.go ├── collections_test.go └── Makefile /sonobuoy/setup: -------------------------------------------------------------------------------- 1 | ../example/setup -------------------------------------------------------------------------------- /sonobuoy/vault.hcl: -------------------------------------------------------------------------------- 1 | ../example/vault.hcl -------------------------------------------------------------------------------- /tools/.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | /plugins 3 | -------------------------------------------------------------------------------- /sonobuoy/cke.config: -------------------------------------------------------------------------------- 1 | ../example/cke.config -------------------------------------------------------------------------------- /sonobuoy/wait-up.sh: -------------------------------------------------------------------------------- 1 | ../example/wait-up.sh -------------------------------------------------------------------------------- /sonobuoy/cke.config.yml: -------------------------------------------------------------------------------- 1 | ../example/cke.config.yml -------------------------------------------------------------------------------- /sonobuoy/etcd.conf.yml: -------------------------------------------------------------------------------- 1 | ../example/etcd.conf.yml -------------------------------------------------------------------------------- /example/.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | /etcd-data 3 | .vagrant 4 | -------------------------------------------------------------------------------- /mtest/cke.yml: -------------------------------------------------------------------------------- 1 | endpoints: 2 | - http://__HOST1__:2379 3 | -------------------------------------------------------------------------------- /sonobuoy/etcd-entrypoint.sh: -------------------------------------------------------------------------------- 1 | ../example/etcd-entrypoint.sh -------------------------------------------------------------------------------- /sonobuoy/vault-entrypoint.sh: -------------------------------------------------------------------------------- 1 | ../example/vault-entrypoint.sh -------------------------------------------------------------------------------- /example/cke.config: -------------------------------------------------------------------------------- 1 | endpoints: 2 | - http://localhost:2379 3 | -------------------------------------------------------------------------------- /example/cke.config.yml: -------------------------------------------------------------------------------- 1 | endpoints: 2 | - http://172.30.0.14:2379 3 | -------------------------------------------------------------------------------- /tools/.dockerignore: -------------------------------------------------------------------------------- 1 | * 2 | !bin 3 | !plugins/bin 4 | !plugins/LICENSE 5 | -------------------------------------------------------------------------------- /docker/.gitignore: -------------------------------------------------------------------------------- 1 | cke 2 | cke-localproxy 3 | ckecli 4 | compile_resources 5 | LICENSE 6 | -------------------------------------------------------------------------------- /tools/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM scratch 2 | 3 | COPY bin/ / 4 | COPY plugins/ /cni_plugins/ 5 | 6 | ENV PATH=/ 7 | -------------------------------------------------------------------------------- /example/cke-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | /usr/local/cke/install-tools 4 | /usr/local/cke/bin/cke 5 | -------------------------------------------------------------------------------- /mtest/mscp: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CURRENT_DIR=$(cd $(dirname $0);pwd) 4 | scp -F ${CURRENT_DIR}/ssh_config $@ 5 | -------------------------------------------------------------------------------- /op/common/doc.go: -------------------------------------------------------------------------------- 1 | // Package common provides generic commands shared by many Operators. 2 | package common 3 | -------------------------------------------------------------------------------- /mtest/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | TARGET="$1" 4 | 5 | $GINKGO -v -focus="${TARGET}" . 6 | RET=$? 7 | 8 | exit $RET 9 | -------------------------------------------------------------------------------- /pkg/ckecli/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "github.com/cybozu-go/cke/pkg/ckecli/cmd" 4 | 5 | func main() { 6 | cmd.Execute() 7 | } 8 | -------------------------------------------------------------------------------- /example/etcd-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | /usr/local/etcd/install-tools 4 | /usr/local/etcd/bin/etcd --config-file=/etc/etcd/etcd.conf.yml 5 | -------------------------------------------------------------------------------- /mtest/mssh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CURRENT_DIR=$(cd $(dirname $0);pwd) 4 | chmod 600 ${CURRENT_DIR}/mtest_key 5 | ssh -F ${CURRENT_DIR}/ssh_config $@ 6 | -------------------------------------------------------------------------------- /op/k8s/cni.go: -------------------------------------------------------------------------------- 1 | package k8s 2 | 3 | const ( 4 | cniBinDir = "/opt/cni/bin" 5 | cniConfDir = "/etc/cni/net.d" 6 | cniVarDir = "/var/lib/cni" 7 | ) 8 | -------------------------------------------------------------------------------- /example/setup/cke-policy.hcl: -------------------------------------------------------------------------------- 1 | # Manage CKE secrets 2 | path "cke/*" 3 | { 4 | capabilities = ["create", "read", "update", "delete", "list", "sudo"] 5 | } 6 | -------------------------------------------------------------------------------- /logo/LICENSE: -------------------------------------------------------------------------------- 1 | Cybozu logo license 2 | Copyright (c) 2019 Cybozu 3 | 4 | Please see the [guideline](https://cybozu.co.jp/logotypes/) for using this logo file. 5 | -------------------------------------------------------------------------------- /sabakan/constants.go: -------------------------------------------------------------------------------- 1 | package sabakan 2 | 3 | const ( 4 | // CKELabelRole is the label name to specify node's role 5 | CKELabelRole = "cke.cybozu.com/role" 6 | ) 7 | -------------------------------------------------------------------------------- /tools.go: -------------------------------------------------------------------------------- 1 | //go:build tools 2 | 3 | package tools 4 | 5 | import ( 6 | _ "github.com/99designs/gqlgen" 7 | _ "github.com/99designs/gqlgen/graphql/introspection" 8 | ) 9 | -------------------------------------------------------------------------------- /mtest/.gitignore: -------------------------------------------------------------------------------- 1 | /etcd-*-linux-amd64.tar.gz 2 | /output 3 | *.img 4 | /vault_*.zip 5 | crictl.tar.gz 6 | protoc.zip 7 | snapshot* 8 | coreos_*.img 9 | flatcar_*.img 10 | -------------------------------------------------------------------------------- /sonobuoy/.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | /etcd-data 3 | /.vagrant 4 | /.kubeconfig 5 | /sonobuoy.tar.gz 6 | /worker.ign 7 | /cke-cluster.yml 8 | /gcp_rsa 9 | /gcp_rsa.pub 10 | /run.sh 11 | -------------------------------------------------------------------------------- /example/setup/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | 3 | etcdctl --endpoints=http://172.30.0.14:2379 member list 4 | VAULT_ADDR=http://172.30.0.13:8200 VAULT_TOKEN=cybozu vault status 5 | ckecli leader 6 | -------------------------------------------------------------------------------- /mtest/kubeconfig.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | clusters: 3 | - name: local 4 | cluster: 5 | server: http://@NODE1@:8080 6 | users: 7 | - name: admin 8 | contexts: 9 | - context: 10 | cluster: local 11 | user: admin 12 | -------------------------------------------------------------------------------- /docker/install-tools: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | 3 | DEST=/host 4 | 5 | cp --remove-destination /usr/local/cke/bin/ckecli $DEST/ckecli 6 | cp --remove-destination /usr/local/cke/bin/cke-localproxy $DEST/cke-localproxy 7 | chmod 755 $DEST/ckecli $DEST/cke-localproxy 8 | -------------------------------------------------------------------------------- /example/wait-up.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | for i in {1..10}; do 4 | if $(docker inspect setup | jq 'any(.Name == "/setup" and .State.Status == "exited" and .State.ExitCode == 0)'); then 5 | exit 0 6 | fi 7 | sleep 1 8 | done 9 | exit 1 10 | -------------------------------------------------------------------------------- /mtest/httpd.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: httpd 5 | labels: 6 | app.kubernetes.io/name: httpd 7 | spec: 8 | containers: 9 | - name: httpd 10 | image: ghcr.io/cybozu/testhttpd:0 11 | hostNetwork: true 12 | -------------------------------------------------------------------------------- /version.go: -------------------------------------------------------------------------------- 1 | package cke 2 | 3 | // Version represents current cke version 4 | const Version = "1.32.4" 5 | 6 | // ConfigVersion represents the current configuration scheme 7 | // of how CKE constructs its Kubernetes cluster. 8 | const ConfigVersion = "2" 9 | -------------------------------------------------------------------------------- /example/vault.hcl: -------------------------------------------------------------------------------- 1 | # -*- mode: hcl -*- 2 | disable_mlock = true 3 | 4 | listener "tcp" { 5 | address = "0.0.0.0:8200" 6 | tls_disable = 1 7 | } 8 | 9 | storage "etcd" { 10 | address = "http://172.30.0.14:2379" 11 | etcd_api = "v3" 12 | } 13 | -------------------------------------------------------------------------------- /bin/env: -------------------------------------------------------------------------------- 1 | PROJECT=neco-test 2 | ZONE=asia-northeast1-c 3 | SERVICE_ACCOUNT=neco-test@neco-test.iam.gserviceaccount.com 4 | MACHINE_TYPE=c2-standard-30 5 | DISK_TYPE=pd-ssd 6 | BOOT_DISK_SIZE=30GB 7 | GCLOUD="gcloud --quiet --account ${SERVICE_ACCOUNT} --project ${PROJECT}" 8 | -------------------------------------------------------------------------------- /sabakan/mock/gqlgen.yml: -------------------------------------------------------------------------------- 1 | # Generate test GraphQL server. 2 | 3 | schema: 4 | - schema.graphql 5 | exec: 6 | filename: generated.go 7 | package: mock 8 | model: 9 | filename: models.go 10 | resolver: 11 | layout: follow-schema 12 | dir: . 13 | package: mock 14 | -------------------------------------------------------------------------------- /op/etcd.go: -------------------------------------------------------------------------------- 1 | package op 2 | 3 | import "github.com/cybozu-go/cke" 4 | 5 | // EtcdVolumeName returns etcd volume name 6 | func EtcdVolumeName(e cke.EtcdParams) string { 7 | if len(e.VolumeName) == 0 { 8 | return DefaultEtcdVolumeName 9 | } 10 | return e.VolumeName 11 | } 12 | -------------------------------------------------------------------------------- /mtest/tools_test.go: -------------------------------------------------------------------------------- 1 | //go:build tools 2 | // +build tools 3 | 4 | package mtest 5 | 6 | // this is to avoid removal from go.mod. gofail and ginkgo are used in mtest/Makefile. 7 | import ( 8 | _ "github.com/onsi/ginkgo/v2/ginkgo" 9 | _ "go.etcd.io/gofail" 10 | _ "go.etcd.io/gofail/runtime" 11 | ) 12 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/resource.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | var resourceCmd = &cobra.Command{ 8 | Use: "resource", 9 | Short: "resource subcommand", 10 | Long: `resource subcommand`, 11 | } 12 | 13 | func init() { 14 | rootCmd.AddCommand(resourceCmd) 15 | } 16 | -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- 1 | This directory contains source code to build `cke-tools`, a helper container for CKE. 2 | 3 | The container image is pushed to [ghcr.io/cybozu-go/cke-tools](https://github.com/cybozu-go/cke/pkgs/container/cke-tools) by GitHub Actions. 4 | 5 | See [RELEASE.md](RELEASE.md) for how to push a new image version. 6 | -------------------------------------------------------------------------------- /mtest/README.md: -------------------------------------------------------------------------------- 1 | How to manually run CKE using placemat 2 | ====================================== 3 | 4 | 1. Run `make setup` 5 | 2. Run `make placemat` 6 | 3. Run `make bootstrap` 7 | 4. Login to `host1` by: 8 | 9 | ```console 10 | $ ./mssh host1 11 | ``` 12 | 13 | 5. To stop placemat, run `make stop`. 14 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/etcd.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | // etcdCmd represents the etcd command 8 | var etcdCmd = &cobra.Command{ 9 | Use: "etcd", 10 | Short: "etcd subcommand", 11 | Long: `etcd subcommand`, 12 | } 13 | 14 | func init() { 15 | rootCmd.AddCommand(etcdCmd) 16 | } 17 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/repair_queue.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | var repairQueueCmd = &cobra.Command{ 8 | Use: "repair-queue", 9 | Short: "repair-queue subcommand", 10 | Long: "repair-queue subcommand", 11 | } 12 | 13 | func init() { 14 | rootCmd.AddCommand(repairQueueCmd) 15 | } 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Test binary, build with `go test -c` 2 | *.test 3 | 4 | # Output of the go coverage tool, specifically when used with LiteIDE 5 | *.out 6 | 7 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 8 | .glide/ 9 | vendor/ 10 | 11 | # Editors 12 | *~ 13 | .*.swp 14 | .#* 15 | \#*# 16 | /.vscode 17 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/vault.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | // vaultCmd represents the vault command 8 | var vaultCmd = &cobra.Command{ 9 | Use: "vault", 10 | Short: "vault subcommand", 11 | Long: `vault subcommand`, 12 | } 13 | 14 | func init() { 15 | rootCmd.AddCommand(vaultCmd) 16 | } 17 | -------------------------------------------------------------------------------- /bin/env-sonobuoy: -------------------------------------------------------------------------------- 1 | PROJECT=neco-test 2 | ZONE=asia-northeast2-c 3 | SERVICE_ACCOUNT=neco-test@neco-test.iam.gserviceaccount.com 4 | MACHINE_TYPE_SONOBUOY=c2-standard-4 5 | MACHINE_TYPE_WORKER=c2-standard-8 6 | DISK_TYPE=pd-ssd 7 | BOOT_DISK_SIZE=20GB 8 | GCLOUD="gcloud --quiet --account ${SERVICE_ACCOUNT} --project ${PROJECT}" 9 | GO_VERSION=1.23.6 10 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/cluster.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | // clusterCmd represents the cluster command 8 | var clusterCmd = &cobra.Command{ 9 | Use: "cluster", 10 | Short: "cluster subcommand", 11 | Long: `cluster subcommand`, 12 | } 13 | 14 | func init() { 15 | rootCmd.AddCommand(clusterCmd) 16 | } 17 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/sabakan.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | // sabakanCmd represents the sabakan command 8 | var sabakanCmd = &cobra.Command{ 9 | Use: "sabakan", 10 | Short: "sabakan subcommand", 11 | Long: `sabakan subcommand`, 12 | } 13 | 14 | func init() { 15 | rootCmd.AddCommand(sabakanCmd) 16 | } 17 | -------------------------------------------------------------------------------- /mtest/reboot-job-completed.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | namespace: reboot-test 5 | name: job-completed 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: ubuntu 11 | image: ghcr.io/cybozu/ubuntu:22.04 12 | command: ["true"] 13 | restartPolicy: Never 14 | backoffLimit: 1 15 | -------------------------------------------------------------------------------- /mtest/reboot-job-running.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | namespace: reboot-test 5 | name: job-running 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: ubuntu 11 | image: ghcr.io/cybozu/ubuntu:22.04 12 | command: ["sleep", "3600"] 13 | restartPolicy: Never 14 | backoffLimit: 1 15 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/kubernetes.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | // kubernetesCmd represents the kubernetes command 8 | var kubernetesCmd = &cobra.Command{ 9 | Use: "kubernetes", 10 | Short: "kubernetes subcommand", 11 | Long: `kubernetes subcommand`, 12 | } 13 | 14 | func init() { 15 | rootCmd.AddCommand(kubernetesCmd) 16 | } 17 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/auto_repair.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | // autoRepairCmd represents the auto-repair command 8 | var autoRepairCmd = &cobra.Command{ 9 | Use: "auto-repair", 10 | Short: "auto-repair subcommand", 11 | Long: `auto-repair subcommand`, 12 | } 13 | 14 | func init() { 15 | rootCmd.AddCommand(autoRepairCmd) 16 | } 17 | -------------------------------------------------------------------------------- /example/vault-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | function retry() { 4 | for i in {1..10}; do 5 | sleep 1 6 | if "$@"; then 7 | return 0 8 | fi 9 | echo "retry connecting to etcd" 10 | done 11 | return $? 12 | } 13 | 14 | retry curl http://172.30.0.14:2379/health 15 | 16 | /usr/local/vault/install-tools 17 | /usr/local/vault/bin/vault server -config=/etc/vault/config.hcl 18 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/constraints.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | // constraintsCmd represents the constraints command 8 | var constraintsCmd = &cobra.Command{ 9 | Use: "constraints", 10 | Aliases: []string{"cstr"}, 11 | Short: "constraints subcommand", 12 | Long: `constraints subcommand`, 13 | } 14 | 15 | func init() { 16 | rootCmd.AddCommand(constraintsCmd) 17 | } 18 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # CKE container 2 | FROM ghcr.io/cybozu/ubuntu:22.04 3 | 4 | COPY cke /usr/local/cke/bin/cke 5 | COPY ckecli /usr/local/cke/bin/ckecli 6 | COPY cke-localproxy /usr/local/cke/bin/cke-localproxy 7 | COPY install-tools /usr/local/cke/install-tools 8 | 9 | RUN chmod -R +xr /usr/local/cke 10 | 11 | ENV PATH=/usr/local/cke/bin:"$PATH" 12 | 13 | USER 10000:10000 14 | 15 | ENTRYPOINT ["/usr/local/cke/bin/cke"] 16 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/reboot_queue.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | // rebootQueueCmd represents the reboot-queue command 8 | var rebootQueueCmd = &cobra.Command{ 9 | Use: "reboot-queue", 10 | Aliases: []string{"rq"}, 11 | Short: "reboot-queue subcommand", 12 | Long: `reboot-queue subcommand`, 13 | } 14 | 15 | func init() { 16 | rootCmd.AddCommand(rebootQueueCmd) 17 | } 18 | -------------------------------------------------------------------------------- /server/config.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import "time" 4 | 5 | // Config is the configuration for cke-server. 6 | type Config struct { 7 | // Interval is the interval of the main loop. 8 | Interval time.Duration 9 | // CertsGCInterval is the interval of the certificate garbage collection. 10 | CertsGCInterval time.Duration 11 | // MaxConcurrentUpdates is the maximum number of concurrent updates. 12 | MaxConcurrentUpdates int 13 | } 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/issue.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Task 3 | about: Describe this issue 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## What 11 | 12 | Describe what this issue should address. 13 | 14 | ## How 15 | 16 | Describe how to address the issue. 17 | 18 | ## Checklist 19 | 20 | - [ ] Finish implentation of the issue 21 | - [ ] Test all functions 22 | - [ ] Have enough logs to trace activities 23 | - [ ] Notify developers of necessary actions 24 | -------------------------------------------------------------------------------- /cluster_config.go: -------------------------------------------------------------------------------- 1 | package cke 2 | 3 | const ( 4 | defaultEtcdVolumeName = "etcd-cke" 5 | defaultContainerRuntimeEndpoint = "/run/containerd/containerd.sock" 6 | ) 7 | 8 | // NewCluster creates Cluster 9 | func NewCluster() *Cluster { 10 | return &Cluster{ 11 | Options: Options{ 12 | Etcd: EtcdParams{ 13 | VolumeName: defaultEtcdVolumeName, 14 | }, 15 | Kubelet: KubeletParams{ 16 | CRIEndpoint: defaultContainerRuntimeEndpoint, 17 | }, 18 | }, 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/ca.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | func isValidCAName(name string) bool { 8 | switch name { 9 | case "server", "etcd-peer", "etcd-client", "kubernetes": 10 | return true 11 | } 12 | return false 13 | } 14 | 15 | // caCmd represents the ca command 16 | var caCmd = &cobra.Command{ 17 | Use: "ca", 18 | Short: "ca subcommand", 19 | Long: `ca subcommand`, 20 | } 21 | 22 | func init() { 23 | rootCmd.AddCommand(caCmd) 24 | } 25 | -------------------------------------------------------------------------------- /mtest/ssh_config: -------------------------------------------------------------------------------- 1 | Host * 2 | User cybozu 3 | IdentityFile mtest_key 4 | StrictHostKeyChecking no 5 | UserKnownHostsFile /dev/null 6 | LogLevel ERROR 7 | 8 | Host host1 9 | HostName 10.0.0.11 10 | 11 | Host host2 12 | HostName 10.0.0.12 13 | 14 | Host node1 15 | HostName 10.0.0.101 16 | 17 | Host node2 18 | HostName 10.0.0.102 19 | 20 | Host node3 21 | HostName 10.0.0.103 22 | 23 | Host node4 24 | HostName 10.0.0.104 25 | 26 | Host node5 27 | HostName 10.0.0.105 28 | 29 | Host node6 30 | HostName 10.0.0.106 31 | -------------------------------------------------------------------------------- /sabakan/mock/server.go: -------------------------------------------------------------------------------- 1 | package mock 2 | 3 | import ( 4 | "net/http/httptest" 5 | 6 | "github.com/99designs/gqlgen/graphql/handler" 7 | "github.com/99designs/gqlgen/graphql/handler/transport" 8 | ) 9 | 10 | // Server creates a mock server that implements sabakan GraphQL API. 11 | func Server() *httptest.Server { 12 | h := handler.New(NewExecutableSchema(Config{ 13 | Resolvers: mockResolver{}, 14 | })) 15 | h.AddTransport(transport.GET{}) 16 | h.AddTransport(transport.POST{}) 17 | return httptest.NewServer(h) 18 | } 19 | -------------------------------------------------------------------------------- /sonobuoy/PRODUCT.yaml: -------------------------------------------------------------------------------- 1 | vendor: Cybozu 2 | name: CKE - Cybozu Kubernetes Engine 3 | version: vX.Y.Z 4 | website_url: https://github.com/cybozu-go/cke/ 5 | repo_url: https://github.com/cybozu-go/cke/ 6 | documentation_url: https://github.com/cybozu-go/cke/tree/main/docs 7 | product_logo_url: https://raw.githubusercontent.com/cybozu-go/cke/main/logo/cybozu_logo.svg 8 | type: Installer 9 | description: Cybozu Kubernetes Engine, a distributed service that automates Kubernetes cluster management. 10 | contact_email_address: neco@cybozu.com 11 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/sabakan_enable.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/cybozu-go/well" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var sabakanEnableCmd = &cobra.Command{ 11 | Use: "enable", 12 | Short: "enable sabakan integration", 13 | Long: `Enable sabakan integration.`, 14 | 15 | RunE: func(cmd *cobra.Command, args []string) error { 16 | well.Go(func(ctx context.Context) error { 17 | return storage.EnableSabakan(ctx, true) 18 | }) 19 | well.Stop() 20 | return well.Wait() 21 | }, 22 | } 23 | 24 | func init() { 25 | sabakanCmd.AddCommand(sabakanEnableCmd) 26 | } 27 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/sabakan_disable.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/cybozu-go/well" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var sabakanDisableCmd = &cobra.Command{ 11 | Use: "disable", 12 | Short: "disable sabakan integration", 13 | Long: `Disable sabakan integration.`, 14 | 15 | RunE: func(cmd *cobra.Command, args []string) error { 16 | well.Go(func(ctx context.Context) error { 17 | return storage.EnableSabakan(ctx, false) 18 | }) 19 | well.Stop() 20 | return well.Wait() 21 | }, 22 | } 23 | 24 | func init() { 25 | sabakanCmd.AddCommand(sabakanDisableCmd) 26 | } 27 | -------------------------------------------------------------------------------- /mtest/robustness_test.go: -------------------------------------------------------------------------------- 1 | package mtest 2 | 3 | import ( 4 | . "github.com/onsi/ginkgo/v2" 5 | . "github.com/onsi/gomega" 6 | ) 7 | 8 | func testStopCP() { 9 | It("should stop CP", func() { 10 | // stop CKE temporarily to avoid hang-up in SSH session due to node2 shutdown 11 | stopCKE() 12 | 13 | execAt(node2, "sudo", "systemd-run", "halt", "-f", "-f") 14 | Eventually(func() error { 15 | _, err := execAtLocal("ping", "-c", "1", "-W", "1", node2) 16 | return err 17 | }).ShouldNot(Succeed()) 18 | 19 | execAt(node3, "sudo", "systemctl", "stop", "sshd.socket") 20 | 21 | runCKE(ckeImageURL) 22 | }) 23 | } 24 | -------------------------------------------------------------------------------- /example/cke-cluster.yml: -------------------------------------------------------------------------------- 1 | name: tutorial 2 | nodes: 3 | - address: 192.168.1.101 4 | user: core 5 | control_plane: true 6 | - address: 192.168.1.102 7 | user: core 8 | - address: 192.168.1.103 9 | user: core 10 | service_subnet: 10.100.0.0/16 11 | dns_servers: ["8.8.8.8", "1.1.1.1"] 12 | options: 13 | kubelet: 14 | config: 15 | apiVersion: kubelet.config.k8s.io/v1beta1 16 | kind: KubeletConfiguration 17 | volumePluginDir: /var/lib/kubelet/volumeplugins 18 | kube-controller-manager: 19 | extra_args: 20 | - "--allocate-node-cidrs=true" 21 | - "--cluster-cidr=192.168.0.0/16" 22 | -------------------------------------------------------------------------------- /mtest/repair-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | namespace: repair-test 5 | name: sample 6 | spec: 7 | replicas: 3 8 | selector: 9 | matchLabels: 10 | app: sample 11 | template: 12 | metadata: 13 | labels: 14 | app: sample 15 | spec: 16 | containers: 17 | - name: httpd 18 | image: ghcr.io/cybozu/testhttpd:0 19 | --- 20 | apiVersion: policy/v1 21 | kind: PodDisruptionBudget 22 | metadata: 23 | namespace: repair-test 24 | name: sample 25 | spec: 26 | maxUnavailable: 0 27 | selector: 28 | matchLabels: 29 | app: sample 30 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/reboot_queue_enable.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/cybozu-go/well" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var rebootQueueEnableCmd = &cobra.Command{ 11 | Use: "enable", 12 | Short: "enable reboot queue processing", 13 | Long: `Enable reboot queue processing.`, 14 | 15 | RunE: func(cmd *cobra.Command, args []string) error { 16 | well.Go(func(ctx context.Context) error { 17 | return storage.EnableRebootQueue(ctx, true) 18 | }) 19 | well.Stop() 20 | return well.Wait() 21 | }, 22 | } 23 | 24 | func init() { 25 | rebootQueueCmd.AddCommand(rebootQueueEnableCmd) 26 | } 27 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/repair_queue_enable.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/cybozu-go/well" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var repairQueueEnableCmd = &cobra.Command{ 11 | Use: "enable", 12 | Short: "enable repair queue processing", 13 | Long: `Enable repair queue processing.`, 14 | 15 | RunE: func(cmd *cobra.Command, args []string) error { 16 | well.Go(func(ctx context.Context) error { 17 | return storage.EnableRepairQueue(ctx, true) 18 | }) 19 | well.Stop() 20 | return well.Wait() 21 | }, 22 | } 23 | 24 | func init() { 25 | repairQueueCmd.AddCommand(repairQueueEnableCmd) 26 | } 27 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Environments** 14 | - Version: 15 | - OS: 16 | 17 | **To Reproduce** 18 | Steps to reproduce the behavior: 19 | 1. Go to '...' 20 | 2. Click on '....' 21 | 3. Scroll down to '....' 22 | 4. See error 23 | 24 | **Expected behavior** 25 | A clear and concise description of what you expected to happen. 26 | 27 | **Additional context** 28 | Add any other context about the problem here. 29 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/reboot_queue_disable.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/cybozu-go/well" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var rebootQueueDisableCmd = &cobra.Command{ 11 | Use: "disable", 12 | Short: "disable reboot queue processing", 13 | Long: `Disable reboot queue processing.`, 14 | 15 | RunE: func(cmd *cobra.Command, args []string) error { 16 | well.Go(func(ctx context.Context) error { 17 | return storage.EnableRebootQueue(ctx, false) 18 | }) 19 | well.Stop() 20 | return well.Wait() 21 | }, 22 | } 23 | 24 | func init() { 25 | rebootQueueCmd.AddCommand(rebootQueueDisableCmd) 26 | } 27 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/repair_queue_disable.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/cybozu-go/well" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var repairQueueDisableCmd = &cobra.Command{ 11 | Use: "disable", 12 | Short: "disable repair queue processing", 13 | Long: `Disable repair queue processing.`, 14 | 15 | RunE: func(cmd *cobra.Command, args []string) error { 16 | well.Go(func(ctx context.Context) error { 17 | return storage.EnableRepairQueue(ctx, false) 18 | }) 19 | well.Stop() 20 | return well.Wait() 21 | }, 22 | } 23 | 24 | func init() { 25 | repairQueueCmd.AddCommand(repairQueueDisableCmd) 26 | } 27 | -------------------------------------------------------------------------------- /mtest/reboot-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | namespace: reboot-test 5 | name: sample 6 | spec: 7 | replicas: 3 8 | selector: 9 | matchLabels: 10 | reboot-app: sample 11 | template: 12 | metadata: 13 | labels: 14 | reboot-app: sample 15 | spec: 16 | containers: 17 | - name: httpd 18 | image: ghcr.io/cybozu/testhttpd:0 19 | --- 20 | apiVersion: policy/v1 21 | kind: PodDisruptionBudget 22 | metadata: 23 | namespace: reboot-test 24 | name: sample 25 | spec: 26 | maxUnavailable: 0 27 | selector: 28 | matchLabels: 29 | reboot-app: sample 30 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/auto_repair_enable.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/cybozu-go/well" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var autoRepairEnableCmd = &cobra.Command{ 11 | Use: "enable", 12 | Short: "enable sabakan-triggered automatic repair", 13 | Long: `Enable sabakan-triggered automatic repair.`, 14 | 15 | RunE: func(cmd *cobra.Command, args []string) error { 16 | well.Go(func(ctx context.Context) error { 17 | return storage.EnableAutoRepair(ctx, true) 18 | }) 19 | well.Stop() 20 | return well.Wait() 21 | }, 22 | } 23 | 24 | func init() { 25 | autoRepairCmd.AddCommand(autoRepairEnableCmd) 26 | } 27 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/auto_repair_disable.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/cybozu-go/well" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var autoRepairDisableCmd = &cobra.Command{ 11 | Use: "disable", 12 | Short: "disable sabakan-triggered automatic repair", 13 | Long: `Disable sabakan-triggered automatic repair.`, 14 | 15 | RunE: func(cmd *cobra.Command, args []string) error { 16 | well.Go(func(ctx context.Context) error { 17 | return storage.EnableAutoRepair(ctx, false) 18 | }) 19 | well.Stop() 20 | return well.Wait() 21 | }, 22 | } 23 | 24 | func init() { 25 | autoRepairCmd.AddCommand(autoRepairDisableCmd) 26 | } 27 | -------------------------------------------------------------------------------- /mtest/env_test.go: -------------------------------------------------------------------------------- 1 | package mtest 2 | 3 | import ( 4 | "os" 5 | ) 6 | 7 | var ( 8 | host1 = os.Getenv("HOST1") 9 | host2 = os.Getenv("HOST2") 10 | node1 = os.Getenv("NODE1") 11 | node2 = os.Getenv("NODE2") 12 | node3 = os.Getenv("NODE3") 13 | node4 = os.Getenv("NODE4") 14 | node5 = os.Getenv("NODE5") 15 | node6 = os.Getenv("NODE6") 16 | 17 | ckeClusterPath = os.Getenv("CKECLUSTER") 18 | ckeConfigPath = os.Getenv("CKECONFIG") 19 | ckeImagePath = os.Getenv("CKE_IMAGE") 20 | ckeImageURL = os.Getenv("CKE_IMAGE_URL") 21 | kubectlPath = os.Getenv("KUBECTL") 22 | testSuite = os.Getenv("SUITE") 23 | 24 | sshKeyFile = os.Getenv("SSH_PRIVKEY") 25 | ) 26 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/images.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/cybozu-go/cke" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | // imagesCmd represents the images command 11 | var imagesCmd = &cobra.Command{ 12 | Use: "images", 13 | Short: "list container image names used by cke", 14 | Long: `List container image names used by cke.`, 15 | 16 | // Override rootCmd.PersistentPreRunE. 17 | PersistentPreRun: func(cmd *cobra.Command, args []string) {}, 18 | Run: func(cmd *cobra.Command, args []string) { 19 | for _, img := range cke.AllImages() { 20 | fmt.Println(img) 21 | } 22 | }, 23 | } 24 | 25 | func init() { 26 | rootCmd.AddCommand(imagesCmd) 27 | } 28 | -------------------------------------------------------------------------------- /sabakan/op.go: -------------------------------------------------------------------------------- 1 | package sabakan 2 | 3 | type updateOp struct { 4 | name string 5 | changes []string 6 | } 7 | 8 | func (op *updateOp) record(msg string) { 9 | op.changes = append(op.changes, msg) 10 | } 11 | 12 | func (op *updateOp) addControlPlane(m *Machine) { 13 | op.record("add new control plane: " + m.Spec.IPv4[0]) 14 | } 15 | 16 | func (op *updateOp) addWorker(m *Machine) { 17 | op.record("add new worker: " + m.Spec.IPv4[0]) 18 | } 19 | 20 | func (op *updateOp) promoteWorker(worker *Machine) { 21 | op.record("promote a worker: " + worker.Spec.IPv4[0]) 22 | } 23 | 24 | func (op *updateOp) demoteControlPlane(cp *Machine) { 25 | op.record("demote a control plane: " + cp.Spec.IPv4[0]) 26 | } 27 | -------------------------------------------------------------------------------- /collections.go: -------------------------------------------------------------------------------- 1 | package cke 2 | 3 | func compareStrings(s1, s2 []string) bool { 4 | if len(s1) != len(s2) { 5 | return false 6 | } 7 | for i := range s1 { 8 | if s1[i] != s2[i] { 9 | return false 10 | } 11 | } 12 | return true 13 | } 14 | 15 | func compareStringMap(m1, m2 map[string]string) bool { 16 | if len(m1) != len(m2) { 17 | return false 18 | } 19 | for k, v := range m1 { 20 | if v2, ok := m2[k]; !ok || v != v2 { 21 | return false 22 | } 23 | } 24 | return true 25 | } 26 | 27 | func compareMounts(m1, m2 []Mount) bool { 28 | if len(m1) != len(m2) { 29 | return false 30 | } 31 | 32 | for i := range m1 { 33 | if !m1[i].Equal(m2[i]) { 34 | return false 35 | } 36 | } 37 | return true 38 | } 39 | -------------------------------------------------------------------------------- /mtest/reboot-slow-eviction-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | namespace: reboot-test 5 | name: slow 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | reboot-app: slow 11 | template: 12 | metadata: 13 | labels: 14 | reboot-app: slow 15 | spec: 16 | containers: 17 | - name: ubuntu 18 | image: ghcr.io/cybozu/ubuntu:22.04 19 | # Because sleep command ignores SIGTERM, 20 | # this Pod will stay Terminating state after deletion 21 | # for the time specified by terminationGracePeriodSeconds. 22 | command: [ sleep, infinity ] 23 | # longer than eviction_timeout_seconds 24 | terminationGracePeriodSeconds: 90 25 | -------------------------------------------------------------------------------- /op/etcd/wait.go: -------------------------------------------------------------------------------- 1 | package etcd 2 | 3 | import "github.com/cybozu-go/cke" 4 | 5 | type etcdWaitClusterOp struct { 6 | endpoints []string 7 | executed bool 8 | } 9 | 10 | // WaitClusterOp returns an Operator to wait until etcd cluster becomes healthy 11 | func WaitClusterOp(nodes []*cke.Node) cke.Operator { 12 | return &etcdWaitClusterOp{ 13 | endpoints: etcdEndpoints(nodes), 14 | } 15 | } 16 | 17 | func (o *etcdWaitClusterOp) Name() string { 18 | return "etcd-wait-cluster" 19 | } 20 | 21 | func (o *etcdWaitClusterOp) NextCommand() cke.Commander { 22 | if o.executed { 23 | return nil 24 | } 25 | o.executed = true 26 | 27 | return waitEtcdSyncCommand{o.endpoints, false} 28 | } 29 | 30 | func (o *etcdWaitClusterOp) Targets() []string { 31 | return o.endpoints 32 | } 33 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/resource_list.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/cybozu-go/well" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var resourceListCmd = &cobra.Command{ 12 | Use: "list", 13 | Short: "list keys of user resources", 14 | Long: `List keys of registered user resources.`, 15 | 16 | RunE: func(cmd *cobra.Command, args []string) error { 17 | well.Go(func(ctx context.Context) error { 18 | keys, err := storage.ListResources(ctx) 19 | if err != nil { 20 | return err 21 | } 22 | 23 | for _, key := range keys { 24 | fmt.Println(key) 25 | } 26 | return nil 27 | }) 28 | well.Stop() 29 | return well.Wait() 30 | }, 31 | } 32 | 33 | func init() { 34 | resourceCmd.AddCommand(resourceListCmd) 35 | } 36 | -------------------------------------------------------------------------------- /bin/watch_service: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | 3 | . $(dirname $0)/env 4 | 5 | test $# -eq 1 6 | service="$1" 7 | 8 | tail_once() { 9 | host="$1" 10 | $GCLOUD 2>/dev/null compute ssh --zone=${ZONE} cybozu@${INSTANCE_NAME} -- \ 11 | ssh -F ssh_config ${host} -- sudo journalctl -f -u ${service}.service 12 | } 13 | 14 | tail_forever() { 15 | host="$1" 16 | 17 | while true; do 18 | tail_once $host || continue 19 | sleep 3 20 | done 21 | } 22 | 23 | chmod 600 ./mtest/mtest_key 24 | while ! $GCLOUD 2>/dev/null compute scp --zone=${ZONE} ./mtest/mtest_key ./mtest/ssh_config cybozu@${INSTANCE_NAME}:; do 25 | sleep 1 26 | done 27 | 28 | tail_forever host1 | sed -e s/^/$(tput -Txterm setaf 1)/ & 29 | tail_forever host2 | sed -e s/^/$(tput -Txterm setaf 2)/ & 30 | 31 | wait 32 | -------------------------------------------------------------------------------- /mtest/reboot-alittleslow-eviction-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | namespace: reboot-test 5 | name: alittleslow 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | reboot-app: alittleslow 11 | template: 12 | metadata: 13 | labels: 14 | reboot-app: alittleslow 15 | spec: 16 | containers: 17 | - name: ubuntu 18 | image: ghcr.io/cybozu/ubuntu:22.04 19 | # Because sleep command ignores SIGTERM, 20 | # this Pod will stay Terminating state after deletion 21 | # for the time specified by terminationGracePeriodSeconds. 22 | command: [ sleep, infinity ] 23 | # shorter than eviction_timeout_seconds 24 | terminationGracePeriodSeconds: 15 25 | -------------------------------------------------------------------------------- /tools/empty-dir/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | ) 8 | 9 | func main() { 10 | if len(os.Args) != 2 { 11 | fmt.Fprintf(os.Stderr, "usage: %s DIR\n", os.Args[0]) 12 | os.Exit(2) 13 | } 14 | if err := subMain(os.Args[1]); err != nil { 15 | fmt.Fprintf(os.Stderr, "%v\n", err) 16 | os.Exit(1) 17 | } 18 | } 19 | 20 | func subMain(dir string) error { 21 | files, err := os.ReadDir(dir) 22 | if err != nil && !os.IsNotExist(err) { 23 | return fmt.Errorf("failed to read %s: %w", dir, err) 24 | } 25 | 26 | for _, f := range files { 27 | target := filepath.Join(dir, f.Name()) 28 | if err := os.RemoveAll(target); err != nil { 29 | return fmt.Errorf("failed to delete %s: %w", target, err) 30 | } 31 | } 32 | return nil 33 | } 34 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/sabakan_get_url.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/cybozu-go/well" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | // sabakanGetURLCmd represents the "sabakan get-url" command 12 | var sabakanGetURLCmd = &cobra.Command{ 13 | Use: "get-url", 14 | Short: "get stored URL of sabakan server", 15 | Long: `get stored URL of sabakan server.`, 16 | 17 | RunE: func(cmd *cobra.Command, args []string) error { 18 | well.Go(func(ctx context.Context) error { 19 | u, err := storage.GetSabakanURL(ctx) 20 | if err != nil { 21 | return err 22 | } 23 | fmt.Println(u) 24 | return nil 25 | }) 26 | well.Stop() 27 | return well.Wait() 28 | }, 29 | } 30 | 31 | func init() { 32 | sabakanCmd.AddCommand(sabakanGetURLCmd) 33 | } 34 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/sabakan_is_enabled.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/cybozu-go/well" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var sabakanIsEnabledCmd = &cobra.Command{ 12 | Use: "is-enabled", 13 | Short: "show sabakan integration status", 14 | Long: `Show whether sabakan integration is enabled or not. "true" if enabled.`, 15 | 16 | RunE: func(cmd *cobra.Command, args []string) error { 17 | well.Go(func(ctx context.Context) error { 18 | disabled, err := storage.IsSabakanDisabled(ctx) 19 | if err != nil { 20 | return err 21 | } 22 | fmt.Println(!disabled) 23 | return nil 24 | }) 25 | well.Stop() 26 | return well.Wait() 27 | }, 28 | } 29 | 30 | func init() { 31 | sabakanCmd.AddCommand(sabakanIsEnabledCmd) 32 | } 33 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/leader.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/cybozu-go/well" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | // leaderCmd represents the leader command 12 | var leaderCmd = &cobra.Command{ 13 | Use: "leader", 14 | Short: "show the hostname of the current leader process", 15 | Long: `Show the hostname of the current leader process.`, 16 | 17 | RunE: func(cmd *cobra.Command, args []string) error { 18 | well.Go(func(ctx context.Context) error { 19 | leader, err := storage.GetLeaderHostname(ctx) 20 | if err != nil { 21 | return err 22 | } 23 | 24 | fmt.Println(leader) 25 | return nil 26 | }) 27 | well.Stop() 28 | return well.Wait() 29 | }, 30 | } 31 | 32 | func init() { 33 | rootCmd.AddCommand(leaderCmd) 34 | } 35 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/completion.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/spf13/cobra" 7 | ) 8 | 9 | // completionCmd represents the completion command 10 | var completionCmd = &cobra.Command{ 11 | Use: "completion", 12 | Short: "Generates bash completion scripts", 13 | Long: `To load completion run 14 | 15 | . <(ckecli completion) 16 | 17 | To configure your bash shell to load completions for each session add to your bashrc 18 | 19 | # ~/.bashrc or ~/.profile 20 | . <(ckecli completion) 21 | `, 22 | 23 | // Override rootCmd.PersistentPreRunE. 24 | PersistentPreRun: func(cmd *cobra.Command, args []string) {}, 25 | Run: func(cmd *cobra.Command, args []string) { 26 | rootCmd.GenBashCompletion(os.Stdout) 27 | }, 28 | } 29 | 30 | func init() { 31 | rootCmd.AddCommand(completionCmd) 32 | } 33 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/resource_get.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "strings" 7 | 8 | "github.com/cybozu-go/well" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | var resourceGetCmd = &cobra.Command{ 13 | Use: "get KEY", 14 | Short: "get a user-defined resource by key", 15 | Long: `Get a user-defined resource by key.`, 16 | Args: cobra.ExactArgs(1), 17 | RunE: func(cmd *cobra.Command, args []string) error { 18 | well.Go(func(ctx context.Context) error { 19 | data, _, err := storage.GetResource(ctx, args[0]) 20 | if err != nil { 21 | return err 22 | } 23 | 24 | fmt.Println(strings.TrimSpace(string(data))) 25 | return nil 26 | }) 27 | well.Stop() 28 | return well.Wait() 29 | }, 30 | } 31 | 32 | func init() { 33 | resourceCmd.AddCommand(resourceGetCmd) 34 | } 35 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/reboot_queue_is_enabled.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/cybozu-go/well" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var rebootQueueIsEnabledCmd = &cobra.Command{ 12 | Use: "is-enabled", 13 | Short: "show reboot queue status", 14 | Long: `Show whether the processing of the reboot queue is enabled or not. "true" if enabled.`, 15 | 16 | RunE: func(cmd *cobra.Command, args []string) error { 17 | well.Go(func(ctx context.Context) error { 18 | disabled, err := storage.IsRebootQueueDisabled(ctx) 19 | if err != nil { 20 | return err 21 | } 22 | fmt.Println(!disabled) 23 | return nil 24 | }) 25 | well.Stop() 26 | return well.Wait() 27 | }, 28 | } 29 | 30 | func init() { 31 | rebootQueueCmd.AddCommand(rebootQueueIsEnabledCmd) 32 | } 33 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/repair_queue_is_enabled.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/cybozu-go/well" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var repairQueueIsEnabledCmd = &cobra.Command{ 12 | Use: "is-enabled", 13 | Short: "show repair queue status", 14 | Long: `Show whether the processing of the repair queue is enabled or not. "true" if enabled.`, 15 | 16 | RunE: func(cmd *cobra.Command, args []string) error { 17 | well.Go(func(ctx context.Context) error { 18 | disabled, err := storage.IsRepairQueueDisabled(ctx) 19 | if err != nil { 20 | return err 21 | } 22 | fmt.Println(!disabled) 23 | return nil 24 | }) 25 | well.Stop() 26 | return well.Wait() 27 | }, 28 | } 29 | 30 | func init() { 31 | repairQueueCmd.AddCommand(repairQueueIsEnabledCmd) 32 | } 33 | -------------------------------------------------------------------------------- /mtest/assets_test.go: -------------------------------------------------------------------------------- 1 | package mtest 2 | 3 | import _ "embed" 4 | 5 | //go:embed httpd.yml 6 | var httpdYAML []byte 7 | 8 | //go:embed reboot-deployment.yaml 9 | var rebootDeploymentYAML []byte 10 | 11 | //go:embed reboot-job-completed.yaml 12 | var rebootJobCompletedYAML []byte 13 | 14 | //go:embed reboot-job-running.yaml 15 | var rebootJobRunningYAML []byte 16 | 17 | //go:embed reboot-eviction-dry-run.yaml 18 | var rebootEvictionDryRunYAML []byte 19 | 20 | //go:embed reboot-slow-eviction-deployment.yaml 21 | var rebootSlowEvictionDeploymentYAML []byte 22 | 23 | //go:embed reboot-alittleslow-eviction-deployment.yaml 24 | var rebootALittleSlowEvictionDeploymentYAML []byte 25 | 26 | //go:embed repair-deployment.yaml 27 | var repairDeploymentYAML []byte 28 | 29 | //go:embed webhook-resources.yaml 30 | var webhookYAML []byte 31 | -------------------------------------------------------------------------------- /sonobuoy/cke-cluster.yml.template: -------------------------------------------------------------------------------- 1 | name: tutorial 2 | nodes: 3 | - address: @WORKER1_ADDRESS@ 4 | user: cke 5 | control_plane: true 6 | - address: @WORKER2_ADDRESS@ 7 | user: cke 8 | - address: @WORKER3_ADDRESS@ 9 | user: cke 10 | service_subnet: 10.100.0.0/16 11 | dns_servers: ["8.8.8.8", "1.1.1.1"] 12 | options: 13 | kubelet: 14 | config: 15 | apiVersion: kubelet.config.k8s.io/v1beta1 16 | kind: KubeletConfiguration 17 | cgroupDriver: systemd 18 | volumePluginDir: /var/lib/kubelet/volumeplugins 19 | kube-controller-manager: 20 | extra_args: 21 | - "--allocate-node-cidrs=true" 22 | - "--cluster-cidr=192.168.0.0/16" 23 | kube-proxy: 24 | config: 25 | apiVersion: kubeproxy.config.k8s.io/v1alpha1 26 | kind: KubeProxyConfiguration 27 | mode: ipvs 28 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/auto_repair_is_enabled.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/cybozu-go/well" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var autoRepairIsEnabledCmd = &cobra.Command{ 12 | Use: "is-enabled", 13 | Short: "show sabakan-triggered automatic repair status", 14 | Long: `Show whether sabakan-triggered automatic repair is enabled or not. "true" if enabled.`, 15 | 16 | RunE: func(cmd *cobra.Command, args []string) error { 17 | well.Go(func(ctx context.Context) error { 18 | disabled, err := storage.IsAutoRepairDisabled(ctx) 19 | if err != nil { 20 | return err 21 | } 22 | fmt.Println(!disabled) 23 | return nil 24 | }) 25 | well.Stop() 26 | return well.Wait() 27 | }, 28 | } 29 | 30 | func init() { 31 | autoRepairCmd.AddCommand(autoRepairIsEnabledCmd) 32 | } 33 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/status.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "os" 8 | 9 | "github.com/cybozu-go/cke" 10 | "github.com/spf13/cobra" 11 | ) 12 | 13 | var statusCmd = &cobra.Command{ 14 | Use: "status", 15 | Short: "show the server status", 16 | Long: `Show the server status if the server is running. 17 | If no status is available, this command exits with status code 4.`, 18 | 19 | RunE: func(cmd *cobra.Command, args []string) error { 20 | st, err := storage.GetStatus(context.Background()) 21 | if err == cke.ErrNotFound { 22 | fmt.Fprintln(os.Stderr, "no status") 23 | os.Exit(4) 24 | } 25 | if err != nil { 26 | return err 27 | } 28 | enc := json.NewEncoder(os.Stdout) 29 | return enc.Encode(st) 30 | }, 31 | } 32 | 33 | func init() { 34 | rootCmd.AddCommand(statusCmd) 35 | } 36 | -------------------------------------------------------------------------------- /docs/container-runtime.md: -------------------------------------------------------------------------------- 1 | Container Runtime support 2 | ========================= 3 | 4 | CKE deployed containers 5 | ----------------------- 6 | 7 | The following programs are run as Docker containers. 8 | 9 | - `etcd` 10 | - `kube-apiserver` 11 | - `kube-controller-manager` 12 | - `kube-scheduler` 13 | - `kubelet` 14 | - [rivers](../tools/rivers) 15 | 16 | Kubernetes Pods 17 | --------------- 18 | 19 | CKE has tested only with [containerd][]. 20 | 21 | To use containerd, add the following configurations to `cluster.yml`. 22 | 23 | ```yaml 24 | options: 25 | kubelet: 26 | extra_binds: 27 | # The root directory for containerd metadata. (Default: "/var/lib/containerd") 28 | - source: /var/lib/containerd 29 | destination: /var/lib/containerd 30 | read_only: false 31 | cri_endpoint: /path/to/containerd/socket 32 | ``` 33 | 34 | [containerd]: https://containerd.io/ 35 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/constraints_show.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "os" 7 | 8 | "github.com/cybozu-go/well" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | // constraintsShowCmd represents the "constraints show" command 13 | var constraintsShowCmd = &cobra.Command{ 14 | Use: "show", 15 | Short: "show current constraints", 16 | Long: `Show the list of current constraint values.`, 17 | 18 | RunE: func(cmd *cobra.Command, args []string) error { 19 | well.Go(func(ctx context.Context) error { 20 | cstr, err := storage.GetConstraints(ctx) 21 | if err != nil { 22 | return err 23 | } 24 | 25 | enc := json.NewEncoder(os.Stdout) 26 | enc.SetIndent("", " ") 27 | return enc.Encode(cstr) 28 | }) 29 | well.Stop() 30 | return well.Wait() 31 | }, 32 | } 33 | 34 | func init() { 35 | constraintsCmd.AddCommand(constraintsShowCmd) 36 | } 37 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/sabakan_get_variables.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/cybozu-go/well" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | // sabakanGetVariablesCmd represents the "sabakan get-variables" command 12 | var sabakanGetVariablesCmd = &cobra.Command{ 13 | Use: "get-variables", 14 | Short: "get the query variables to search available machines in sabakan", 15 | Long: `Get the query variables to search available machines in sabakan.`, 16 | 17 | RunE: func(cmd *cobra.Command, args []string) error { 18 | well.Go(func(ctx context.Context) error { 19 | data, err := storage.GetSabakanQueryVariables(ctx) 20 | if err != nil { 21 | return err 22 | } 23 | os.Stdout.Write(data) 24 | return nil 25 | }) 26 | well.Stop() 27 | return well.Wait() 28 | }, 29 | } 30 | 31 | func init() { 32 | sabakanCmd.AddCommand(sabakanGetVariablesCmd) 33 | } 34 | -------------------------------------------------------------------------------- /tools/rivers/di_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "net" 7 | "sync" 8 | ) 9 | 10 | // testDialer is a pseudo Dialer used for DI 11 | type testDialer struct { 12 | m sync.Mutex 13 | errorAddress string 14 | } 15 | 16 | func (d *testDialer) Dial(network, address string) (net.Conn, error) { 17 | return d.DialContext(context.Background(), network, address) 18 | } 19 | 20 | func (d *testDialer) DialContext(ctx context.Context, network, address string) (net.Conn, error) { 21 | d.m.Lock() 22 | defer d.m.Unlock() 23 | 24 | if address == d.errorAddress { 25 | return nil, fmt.Errorf("") 26 | } 27 | // return dummy connection 28 | conn1, conn2 := net.Pipe() 29 | defer conn2.Close() 30 | return conn1, nil 31 | } 32 | 33 | func (d *testDialer) SetErrorAddress(address string) { 34 | d.m.Lock() 35 | defer d.m.Unlock() 36 | 37 | d.errorAddress = address 38 | } 39 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/cluster_get.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/cybozu-go/well" 8 | "github.com/spf13/cobra" 9 | "sigs.k8s.io/yaml" 10 | ) 11 | 12 | // clusterGetCmd represents the "cluster get" command 13 | var clusterGetCmd = &cobra.Command{ 14 | Use: "get", 15 | Short: "dump stored cluster configuration", 16 | Long: `Dump cluster configuration stored in etcd.`, 17 | 18 | RunE: func(cmd *cobra.Command, args []string) error { 19 | well.Go(func(ctx context.Context) error { 20 | cfg, err := storage.GetCluster(ctx) 21 | if err != nil { 22 | return err 23 | } 24 | 25 | b, err := yaml.Marshal(cfg) 26 | if err != nil { 27 | return nil 28 | } 29 | 30 | _, err = os.Stdout.Write(b) 31 | return err 32 | }) 33 | well.Stop() 34 | return well.Wait() 35 | }, 36 | } 37 | 38 | func init() { 39 | clusterCmd.AddCommand(clusterGetCmd) 40 | } 41 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/auto_repair_get_variables.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/cybozu-go/well" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | // autoRepairGetVariablesCmd represents the "auto-repair get-variables" command 12 | var autoRepairGetVariablesCmd = &cobra.Command{ 13 | Use: "get-variables", 14 | Short: "get the query variables to search non-healthy machines in sabakan", 15 | Long: `Get the query variables to search non-healthy machines in sabakan.`, 16 | 17 | RunE: func(cmd *cobra.Command, args []string) error { 18 | well.Go(func(ctx context.Context) error { 19 | data, err := storage.GetAutoRepairQueryVariables(ctx) 20 | if err != nil { 21 | return err 22 | } 23 | os.Stdout.Write(data) 24 | return nil 25 | }) 26 | well.Stop() 27 | return well.Wait() 28 | }, 29 | } 30 | 31 | func init() { 32 | autoRepairCmd.AddCommand(autoRepairGetVariablesCmd) 33 | } 34 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/repair_queue_list.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "os" 7 | 8 | "github.com/cybozu-go/well" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | var repairQueueListCmd = &cobra.Command{ 13 | Use: "list", 14 | Short: "list the entries in the repair queue", 15 | Long: `List the entries in the repair queue. 16 | 17 | The output is a list of RepairQueueEntry formatted in JSON.`, 18 | Args: cobra.NoArgs, 19 | RunE: func(cmd *cobra.Command, args []string) error { 20 | well.Go(func(ctx context.Context) error { 21 | entries, err := storage.GetRepairsEntries(ctx) 22 | if err != nil { 23 | return err 24 | } 25 | 26 | enc := json.NewEncoder(os.Stdout) 27 | enc.SetIndent("", " ") 28 | return enc.Encode(entries) 29 | }) 30 | well.Stop() 31 | return well.Wait() 32 | }, 33 | } 34 | 35 | func init() { 36 | repairQueueCmd.AddCommand(repairQueueListCmd) 37 | } 38 | -------------------------------------------------------------------------------- /op/etcd/mark.go: -------------------------------------------------------------------------------- 1 | package etcd 2 | 3 | import ( 4 | "github.com/cybozu-go/cke" 5 | "github.com/cybozu-go/cke/op" 6 | "github.com/cybozu-go/cke/op/common" 7 | ) 8 | 9 | type markMemberOp struct { 10 | nodes []*cke.Node 11 | executed bool 12 | } 13 | 14 | // MarkMemberOp returns an Operator to mark nodes as added members. 15 | func MarkMemberOp(nodes []*cke.Node) cke.Operator { 16 | return &markMemberOp{ 17 | nodes: nodes, 18 | } 19 | } 20 | 21 | func (o *markMemberOp) Name() string { 22 | return "etcd-mark-member" 23 | } 24 | 25 | func (o *markMemberOp) NextCommand() cke.Commander { 26 | if o.executed { 27 | return nil 28 | } 29 | o.executed = true 30 | 31 | return common.VolumeCreateCommand(o.nodes, op.EtcdAddedMemberVolumeName) 32 | } 33 | 34 | func (o *markMemberOp) Targets() []string { 35 | ips := make([]string, len(o.nodes)) 36 | for i, n := range o.nodes { 37 | ips[i] = n.Address 38 | } 39 | return ips 40 | } 41 | -------------------------------------------------------------------------------- /sabakan/template.go: -------------------------------------------------------------------------------- 1 | package sabakan 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/cybozu-go/cke" 7 | ) 8 | 9 | // ValidateTemplate validates cluster template. 10 | func ValidateTemplate(tmpl *cke.Cluster) error { 11 | if len(tmpl.Nodes) < 2 { 12 | return errors.New("template must contain at least two nodes") 13 | } 14 | 15 | roles := make(map[string]bool) 16 | var cpCount, ncpCount int 17 | for _, n := range tmpl.Nodes { 18 | if n.ControlPlane { 19 | cpCount++ 20 | continue 21 | } 22 | 23 | ncpCount++ 24 | if n.Labels[CKELabelRole] == "" { 25 | continue 26 | } 27 | roles[n.Labels[CKELabelRole]] = true 28 | } 29 | 30 | if cpCount != 1 { 31 | return errors.New("template must contain only one control plane node") 32 | } 33 | if ncpCount >= 2 && ncpCount != len(roles) { 34 | return errors.New("non-control plane nodes must be associated with unique roles") 35 | } 36 | 37 | return tmpl.Validate(true) 38 | } 39 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/sabakan_get_temlate.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/cybozu-go/well" 8 | "github.com/spf13/cobra" 9 | "sigs.k8s.io/yaml" 10 | ) 11 | 12 | // sabakanGetTemplateCmd represents the "sabakan get-template" command 13 | var sabakanGetTemplateCmd = &cobra.Command{ 14 | Use: "get-template", 15 | Short: "get the cluster configuration template", 16 | Long: `Get the cluster configuration template.`, 17 | 18 | RunE: func(cmd *cobra.Command, args []string) error { 19 | well.Go(func(ctx context.Context) error { 20 | tmpl, _, err := storage.GetSabakanTemplate(ctx) 21 | if err != nil { 22 | return err 23 | } 24 | 25 | b, err := yaml.Marshal(tmpl) 26 | if err != nil { 27 | return nil 28 | } 29 | 30 | _, err = os.Stdout.Write(b) 31 | return err 32 | }) 33 | well.Stop() 34 | return well.Wait() 35 | }, 36 | } 37 | 38 | func init() { 39 | sabakanCmd.AddCommand(sabakanGetTemplateCmd) 40 | } 41 | -------------------------------------------------------------------------------- /example/setup/admin-policy.hcl: -------------------------------------------------------------------------------- 1 | # Manage auth methods broadly across Vault 2 | path "auth/*" 3 | { 4 | capabilities = ["create", "read", "update", "delete", "list", "sudo"] 5 | } 6 | 7 | # List, create, update, and delete auth methods 8 | path "sys/auth/*" 9 | { 10 | capabilities = ["create", "read", "update", "delete", "sudo"] 11 | } 12 | 13 | # List existing policies 14 | path "sys/policy" 15 | { 16 | capabilities = ["read"] 17 | } 18 | 19 | # Create and manage ACL policies broadly across Vault 20 | path "sys/policy/*" 21 | { 22 | capabilities = ["create", "read", "update", "delete", "list", "sudo"] 23 | } 24 | 25 | # Create and manage secret engines broadly across Vault. 26 | path "sys/mounts/*" 27 | { 28 | capabilities = ["create", "read", "update", "delete", "list", "sudo"] 29 | } 30 | 31 | # List existing secret engines. 32 | path "sys/mounts" 33 | { 34 | capabilities = ["read"] 35 | } 36 | 37 | # Read health checks 38 | path "sys/health" 39 | { 40 | capabilities = ["read", "sudo"] 41 | } 42 | -------------------------------------------------------------------------------- /metrics/collector_test.go: -------------------------------------------------------------------------------- 1 | package metrics 2 | 3 | import ( 4 | "fmt" 5 | "net/http/httptest" 6 | "strings" 7 | "testing" 8 | ) 9 | 10 | func TestStandardMetrics(t *testing.T) { 11 | collector, _ := newTestCollector() 12 | handler := GetHandler(collector) 13 | 14 | w := httptest.NewRecorder() 15 | req := httptest.NewRequest("GET", "/metrics", nil) 16 | handler.ServeHTTP(w, req) 17 | 18 | metricsFamily, err := parseMetrics(w.Result()) 19 | if err != nil { 20 | t.Fatal(err) 21 | } 22 | 23 | foundStandardMetrics := false 24 | foundRuntimeMetrics := false 25 | for _, mf := range metricsFamily { 26 | fmt.Println(*mf.Name) 27 | if strings.HasPrefix(*mf.Name, "process_") { 28 | foundStandardMetrics = true 29 | } 30 | if strings.HasPrefix(*mf.Name, "go_") { 31 | foundRuntimeMetrics = true 32 | } 33 | } 34 | if !foundStandardMetrics { 35 | t.Errorf("standard metrics was not found") 36 | } 37 | if !foundRuntimeMetrics { 38 | t.Errorf("runtime metrics was not found") 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/reboot_queue_cancel.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "strconv" 6 | 7 | "github.com/cybozu-go/cke" 8 | "github.com/cybozu-go/well" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | var rebootQueueCancelCmd = &cobra.Command{ 13 | Use: "cancel INDEX", 14 | Short: "cancel the specified reboot queue entry", 15 | Long: `Cancel the specified reboot queue entry.`, 16 | Args: cobra.ExactArgs(1), 17 | RunE: func(cmd *cobra.Command, args []string) error { 18 | index, err := strconv.ParseInt(args[0], 10, 64) 19 | if err != nil { 20 | return err 21 | } 22 | 23 | well.Go(func(ctx context.Context) error { 24 | entry, err := storage.GetRebootsEntry(ctx, index) 25 | if err != nil { 26 | return err 27 | } 28 | 29 | entry.Status = cke.RebootStatusCancelled 30 | return storage.UpdateRebootsEntry(ctx, entry) 31 | }) 32 | well.Stop() 33 | return well.Wait() 34 | }, 35 | } 36 | 37 | func init() { 38 | rebootQueueCmd.AddCommand(rebootQueueCancelCmd) 39 | } 40 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/repair_queue_delete.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "strconv" 6 | 7 | "github.com/cybozu-go/well" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var repairQueueDeleteCmd = &cobra.Command{ 12 | Use: "delete INDEX", 13 | Short: "delete a repair queue entry", 14 | Long: `Delete the specified repair queue entry.`, 15 | Args: cobra.ExactArgs(1), 16 | RunE: func(cmd *cobra.Command, args []string) error { 17 | index, err := strconv.ParseInt(args[0], 10, 64) 18 | if err != nil { 19 | return err 20 | } 21 | 22 | well.Go(func(ctx context.Context) error { 23 | entry, err := storage.GetRepairsEntry(ctx, index) 24 | if err != nil { 25 | return err 26 | } 27 | 28 | if entry.Deleted { 29 | return nil 30 | } 31 | 32 | entry.Deleted = true 33 | return storage.UpdateRepairsEntry(ctx, entry) 34 | }) 35 | well.Stop() 36 | return well.Wait() 37 | }, 38 | } 39 | 40 | func init() { 41 | repairQueueCmd.AddCommand(repairQueueDeleteCmd) 42 | } 43 | -------------------------------------------------------------------------------- /op/status_test.go: -------------------------------------------------------------------------------- 1 | package op 2 | 3 | import "testing" 4 | 5 | func TestContainCommandOption(t *testing.T) { 6 | type args struct { 7 | slice []string 8 | optionName string 9 | } 10 | tests := []struct { 11 | name string 12 | args args 13 | want bool 14 | }{ 15 | { 16 | "valid", 17 | args{[]string{"scheduler", "--config", "aaa"}, "--config"}, 18 | true, 19 | }, 20 | { 21 | "with =", 22 | args{[]string{"scheduler", "--config=aaa"}, "--config"}, 23 | true, 24 | }, 25 | { 26 | "with space character", 27 | args{[]string{"scheduler", "--config aaa"}, "--config"}, 28 | true, 29 | }, 30 | { 31 | "no content", 32 | args{[]string{"scheduler", "--option1", "aaa"}, "--config"}, 33 | false, 34 | }, 35 | } 36 | for _, tt := range tests { 37 | t.Run(tt.name, func(t *testing.T) { 38 | if got := containCommandOption(tt.args.slice, tt.args.optionName); got != tt.want { 39 | t.Errorf("containCommandOption() = %v, want %v", got, tt.want) 40 | } 41 | }) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /tools/Makefile: -------------------------------------------------------------------------------- 1 | CNI_PLUGIN_VERSION = 1.7.1 2 | TAG = ghcr.io/cybozu-go/cke-tools:dev 3 | GOBUILD = CGO_ENABLED=0 go build -ldflags="-w -s" 4 | 5 | .PHONY: all 6 | all: bin/empty-dir bin/install-cni bin/make_directories bin/rivers bin/write_files plugins 7 | 8 | .PHONY: test 9 | test: 10 | go test -v ./... 11 | 12 | .PHONY: image 13 | image: all 14 | docker build --no-cache -t $(TAG) . 15 | 16 | bin/empty-dir: 17 | mkdir -p bin 18 | $(GOBUILD) -o $@ ./empty-dir 19 | 20 | bin/install-cni: 21 | mkdir -p bin 22 | $(GOBUILD) -o $@ ./install-cni 23 | 24 | bin/make_directories: 25 | mkdir -p bin 26 | $(GOBUILD) -o $@ ./make_directories 27 | 28 | bin/rivers: 29 | mkdir -p bin 30 | $(GOBUILD) -o $@ ./rivers 31 | 32 | bin/write_files: 33 | mkdir -p bin 34 | $(GOBUILD) -o $@ ./write_files 35 | 36 | .PHONY: plugins 37 | plugins: 38 | rm -rf plugins 39 | git clone --depth 1 -b v$(CNI_PLUGIN_VERSION) https://github.com/containernetworking/plugins 40 | cd plugins; CGO_ENABLED=0 ./build_linux.sh 41 | 42 | .PHONY: clean 43 | clean: 44 | rm -rf bin plugins 45 | -------------------------------------------------------------------------------- /server/responses.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "net/http" 7 | 8 | "github.com/cybozu-go/log" 9 | "github.com/cybozu-go/well" 10 | ) 11 | 12 | func renderJSON(w http.ResponseWriter, data interface{}, status int) { 13 | w.Header().Set("Content-Type", "application/json") 14 | w.WriteHeader(status) 15 | err := json.NewEncoder(w).Encode(data) 16 | if err != nil { 17 | log.Error("failed to output JSON", map[string]interface{}{ 18 | log.FnError: err.Error(), 19 | }) 20 | } 21 | } 22 | 23 | func renderError(ctx context.Context, w http.ResponseWriter, e APIError) { 24 | fields := well.FieldsFromContext(ctx) 25 | fields["status"] = e.Status 26 | fields[log.FnError] = e.Error() 27 | log.Error(http.StatusText(e.Status), fields) 28 | 29 | w.Header().Set("Content-Type", "application/json") 30 | w.WriteHeader(e.Status) 31 | err := json.NewEncoder(w).Encode(fields) 32 | if err != nil { 33 | log.Error("failed to output JSON", map[string]interface{}{ 34 | log.FnError: err.Error(), 35 | }) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /images.go: -------------------------------------------------------------------------------- 1 | package cke 2 | 3 | // Image is the type of container images. 4 | type Image string 5 | 6 | // Name returns docker image name. 7 | func (i Image) Name() string { 8 | return string(i) 9 | } 10 | 11 | // Container image definitions 12 | const ( 13 | EtcdImage = Image("ghcr.io/cybozu/etcd:3.6.5.1") 14 | KubernetesImage = Image("ghcr.io/cybozu/kubernetes:1.32.7.1") 15 | ToolsImage = Image("ghcr.io/cybozu-go/cke-tools:1.32.0") 16 | PauseImage = Image("ghcr.io/cybozu/pause:3.10.1.2") 17 | CoreDNSImage = Image("ghcr.io/cybozu/coredns:1.12.2.1") 18 | UnboundImage = Image("ghcr.io/cybozu/unbound:1.24.1.1") 19 | UnboundExporterImage = Image("ghcr.io/cybozu/unbound_exporter:0.4.6.3") 20 | ) 21 | 22 | // AllImages return container images list used by CKE 23 | func AllImages() []string { 24 | return []string{ 25 | EtcdImage.Name(), 26 | ToolsImage.Name(), 27 | KubernetesImage.Name(), 28 | PauseImage.Name(), 29 | CoreDNSImage.Name(), 30 | UnboundImage.Name(), 31 | UnboundExporterImage.Name(), 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /docs/api.md: -------------------------------------------------------------------------------- 1 | REST API 2 | ======== 3 | 4 | ## `GET /health` 5 | 6 | Get health information of this CKE instance. 7 | 8 | **Successful response** 9 | 10 | - HTTP status code: 200 OK 11 | - HTTP response header: `Content-Type: application/json` 12 | - HTTP response body: Health information of this CKE instance. The response is `{"health":"healthy"}` 13 | 14 | **Failure response** 15 | 16 | - HTTP status code: 500 Internal Server Error 17 | - HTTP response header: `Content-Type: application/json` 18 | - HTTP response body: Health information of this CKE instance. The response is `{"health":"unhealthy"}` 19 | 20 | **Example** 21 | 22 | ```console 23 | $ curl http://localhost:10180/health 24 | {"health":"healthy"} 25 | ``` 26 | 27 | ## `GET /version` 28 | 29 | Get current CKE version. 30 | 31 | **Successful response** 32 | 33 | - HTTP status code: 200 OK 34 | - HTTP response header: `Content-Type: application/json` 35 | - HTTP response body: Current CKE version. 36 | 37 | **Example** 38 | 39 | ```console 40 | $ curl http://localhost:10180/version 41 | {"version":"1.15.5"} 42 | ``` 43 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/sabakan_set_url.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "net/url" 7 | "path" 8 | "strings" 9 | 10 | "github.com/cybozu-go/well" 11 | "github.com/spf13/cobra" 12 | ) 13 | 14 | // sabakanSetURLCmd represents the "sabakan set-url" command 15 | var sabakanSetURLCmd = &cobra.Command{ 16 | Use: "set-url URL", 17 | Short: "set URL of sabakan server", 18 | Long: `Set URL of sabakan server and enable sabakan integration.`, 19 | 20 | Args: cobra.ExactArgs(1), 21 | RunE: func(cmd *cobra.Command, args []string) error { 22 | u, err := url.Parse(args[0]) 23 | if err != nil { 24 | return err 25 | } 26 | 27 | if !u.IsAbs() { 28 | return errors.New("invalid URL") 29 | } 30 | 31 | if !strings.HasSuffix(u.Path, "/graphql") { 32 | u.Path = path.Join(u.Path, "/graphql") 33 | } 34 | 35 | well.Go(func(ctx context.Context) error { 36 | return storage.SetSabakanURL(ctx, u.String()) 37 | }) 38 | well.Stop() 39 | return well.Wait() 40 | }, 41 | } 42 | 43 | func init() { 44 | sabakanCmd.AddCommand(sabakanSetURLCmd) 45 | } 46 | -------------------------------------------------------------------------------- /docs/constraints.md: -------------------------------------------------------------------------------- 1 | Constraints on cluster 2 | =================== 3 | 4 | Constraints 5 | ----------------- 6 | 7 | Cluster should satisfy these constraints. 8 | 9 | | Name | Type | Default | Description | 10 | | -------------------------------------- | ---- | ------- | --------------------------------------------------------------------- | 11 | | `control-plane-count` | int | 1 | Number of control plane nodes | 12 | | `minimum-workers-rate` | int | 80 | The minimum percentage of workers/machines. | 13 | | `maximum-unreachable-nodes-for-reboot` | int | 0 | The maximum number of unreachable nodes allowed for operating reboot. | 14 | | `maximum-repair-queue-entries` | int | 0 | The maximum number of repair queue entries | 15 | | `wait-seconds-to-repair-rebooting` | int | 0 | The wait time in seconds to repair a rebooting machine | 16 | -------------------------------------------------------------------------------- /example/Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | Vagrant.configure("2") do |config| 5 | config.vm.box = "flatcar-stable" 6 | config.vm.box_url = "https://stable.release.flatcar-linux.net/amd64-usr/current/flatcar_production_vagrant.json" 7 | config.vm.provider "virtualbox" do |vb| 8 | vb.customize ["modifyvm", :id, "--paravirtprovider", "kvm"] 9 | end 10 | 11 | config.vm.define "worker-1" do |machine| 12 | machine.vm.network "private_network", ip: "192.168.1.101" 13 | machine.vm.provider "virtualbox" do |vb| 14 | vb.memory = "8192" 15 | vb.cpus = 4 16 | end 17 | end 18 | 19 | config.vm.define "worker-2" do |machine| 20 | machine.vm.network "private_network", ip: "192.168.1.102" 21 | machine.vm.provider "virtualbox" do |vb| 22 | vb.memory = "8192" 23 | vb.cpus = 4 24 | end 25 | end 26 | 27 | config.vm.define "worker-3" do |machine| 28 | machine.vm.network "private_network", ip: "192.168.1.103" 29 | machine.vm.provider "virtualbox" do |vb| 30 | vb.memory = "8192" 31 | vb.cpus = 4 32 | end 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/reboot_queue_add_test.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/cybozu-go/cke" 7 | ) 8 | 9 | func TestValidateNode(t *testing.T) { 10 | cluster := &cke.Cluster{ 11 | Nodes: []*cke.Node{ 12 | { 13 | Address: "1.1.1.1", 14 | ControlPlane: true, 15 | }, 16 | { 17 | Address: "2.2.2.2", 18 | ControlPlane: true, 19 | }, 20 | }, 21 | } 22 | 23 | testCases := []struct { 24 | name string 25 | node string 26 | succeed bool 27 | }{ 28 | { 29 | name: "succeed", 30 | node: "1.1.1.1", 31 | succeed: true, 32 | }, 33 | { 34 | name: "non-existing node", 35 | node: "3.3.3.3", 36 | succeed: false, 37 | }, 38 | } 39 | 40 | for _, tc := range testCases { 41 | t.Run(tc.name, func(t *testing.T) { 42 | ret := validateNode(tc.node, cluster) 43 | if tc.succeed { 44 | if ret != nil { 45 | t.Errorf("validateNodes() failed unexpectedly: %v", ret) 46 | } 47 | } else { 48 | if ret == nil { 49 | t.Error("validateNodes() succeeded unexpectedly") 50 | } 51 | } 52 | }) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /static/rbac.yml: -------------------------------------------------------------------------------- 1 | kind: ClusterRole 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | metadata: 4 | name: system:kube-apiserver-to-kubelet 5 | labels: 6 | kubernetes.io/bootstrapping: rbac-defaults 7 | annotations: 8 | cke.cybozu.com/revision: "1" 9 | # turn on auto-reconciliation 10 | # https://kubernetes.io/docs/reference/access-authn-authz/rbac/#auto-reconciliation 11 | rbac.authorization.kubernetes.io/autoupdate: "true" 12 | rules: 13 | - apiGroups: [""] 14 | resources: 15 | - nodes/proxy 16 | - nodes/stats 17 | - nodes/log 18 | - nodes/spec 19 | - nodes/metrics 20 | verbs: ["*"] 21 | --- 22 | kind: ClusterRoleBinding 23 | apiVersion: rbac.authorization.k8s.io/v1 24 | metadata: 25 | name: system:kube-apiserver 26 | labels: 27 | kubernetes.io/bootstrapping: rbac-defaults 28 | annotations: 29 | cke.cybozu.com/revision: "1" 30 | rbac.authorization.kubernetes.io/autoupdate: "true" 31 | roleRef: 32 | apiGroup: rbac.authorization.k8s.io 33 | kind: ClusterRole 34 | name: system:kube-apiserver-to-kubelet 35 | subjects: 36 | - kind: User 37 | name: kubernetes 38 | -------------------------------------------------------------------------------- /.github/workflows/sonobuoy.yaml: -------------------------------------------------------------------------------- 1 | name: sonobuoy 2 | on: 3 | workflow_dispatch: 4 | jobs: 5 | sonobuoy: 6 | name: Run sonobuoy 7 | runs-on: ubuntu-24.04 8 | steps: 9 | - uses: actions/checkout@v4 10 | - uses: actions/setup-go@v5 11 | with: 12 | go-version-file: go.mod 13 | - uses: google-github-actions/auth@v2 14 | with: 15 | credentials_json: ${{ secrets.NECO_TEST_SERVICE_ACCOUNT }} 16 | - uses: google-github-actions/setup-gcloud@v2 17 | - name: Set GCP instance name 18 | run: echo "INSTANCE_NAME=cke-${{ matrix.suite }}-${{ github.run_number }}-$(TZ=Asia/Tokyo date +%H%M%S)" >> $GITHUB_ENV 19 | - name: Run sonobuoy 20 | run: ./bin/run-sonobuoy.sh 21 | timeout-minutes: 240 22 | - name: Check failures 23 | run: | 24 | grep -F 'no tests failed for plugin "e2e" in tarball' /tmp/e2e-check.log 25 | - name: Upload sonobuoy test result 26 | uses: actions/upload-artifact@v4 27 | with: 28 | name: sonobuoy-test 29 | path: /tmp/sonobuoy.tar.gz 30 | retention-days: 1 31 | if: always() 32 | -------------------------------------------------------------------------------- /operation.go: -------------------------------------------------------------------------------- 1 | package cke 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | ) 7 | 8 | // Operator is the interface for operations 9 | type Operator interface { 10 | // Name returns the operation name. 11 | Name() string 12 | // NextCommand returns the next command or nil if completed. 13 | NextCommand() Commander 14 | // Targets returns the ip which will be affected by the operation 15 | Targets() []string 16 | } 17 | 18 | // InfoOperator is an extension of Operator that provides some information after the operation 19 | type InfoOperator interface { 20 | Operator 21 | Info() string 22 | } 23 | 24 | // Commander is a single step to proceed an operation 25 | type Commander interface { 26 | // Run executes the command 27 | Run(ctx context.Context, inf Infrastructure, leaderKey string) error 28 | // Command returns the command information 29 | Command() Command 30 | } 31 | 32 | // Command represents some command 33 | type Command struct { 34 | Name string `json:"name"` 35 | Target string `json:"target"` 36 | } 37 | 38 | // String implements fmt.Stringer 39 | func (c Command) String() string { 40 | return fmt.Sprintf("%s %s", c.Name, c.Target) 41 | } 42 | -------------------------------------------------------------------------------- /constraints_test.go: -------------------------------------------------------------------------------- 1 | package cke 2 | 3 | import "testing" 4 | 5 | func testConstraintsCheck(t *testing.T) { 6 | nodes := []*Node{ 7 | {ControlPlane: true}, 8 | {ControlPlane: true}, 9 | {ControlPlane: false}, 10 | {ControlPlane: false}, 11 | } 12 | 13 | tests := []struct { 14 | name string 15 | constraints Constraints 16 | cluster Cluster 17 | wantErr bool 18 | }{ 19 | { 20 | name: "valid case", 21 | constraints: Constraints{ControlPlaneCount: 2}, 22 | cluster: Cluster{Nodes: nodes[:]}, 23 | wantErr: false, 24 | }, 25 | 26 | { 27 | name: "control plane not equal", 28 | constraints: Constraints{ControlPlaneCount: 1}, 29 | cluster: Cluster{Nodes: nodes[:]}, 30 | wantErr: true, 31 | }, 32 | } 33 | for _, tt := range tests { 34 | c := tt.constraints 35 | t.Run(tt.name, func(t *testing.T) { 36 | if err := c.Check(&tt.cluster); (err != nil) != tt.wantErr { 37 | t.Errorf("Constraints.Check() error = %v, wantErr %v", err, tt.wantErr) 38 | } 39 | }) 40 | } 41 | } 42 | 43 | func TestConstraints(t *testing.T) { 44 | t.Run("Check", testConstraintsCheck) 45 | } 46 | -------------------------------------------------------------------------------- /mtest/upgrade_test.go: -------------------------------------------------------------------------------- 1 | package mtest 2 | 3 | import ( 4 | "time" 5 | 6 | . "github.com/onsi/ginkgo/v2" 7 | . "github.com/onsi/gomega" 8 | ) 9 | 10 | func testUpgrade() { 11 | It("tests Kubernetes before reboot", func() { 12 | Eventually(func() error { 13 | _, _, err := kubectl("get", "sa/default") 14 | return err 15 | }).Should(Succeed()) 16 | }) 17 | 18 | It("reboots all nodes", func() { 19 | stopCKE() 20 | 21 | nodes := []string{node1, node2, node3, node4, node5, node6} 22 | for _, n := range nodes { 23 | execAt(n, "sudo", "systemd-run", "reboot", "-f", "-f") 24 | } 25 | time.Sleep(10 * time.Second) 26 | Eventually(func() error { 27 | for _, n := range nodes { 28 | _, err := execAtLocal("ping", "-c", "1", "-W", "1", n) 29 | if err != nil { 30 | return err 31 | } 32 | } 33 | return nil 34 | }).Should(Succeed()) 35 | 36 | Expect(prepareSSHClients(nodes...)).Should(Succeed()) 37 | }) 38 | 39 | It("runs new CKE", func() { 40 | runCKE(ckeImageURL) 41 | waitServerStatusCompletion() 42 | }) 43 | 44 | It("removes kubectl cache", func() { 45 | execSafeAt(host1, "rm", "-rf", "~/.kube/cache") 46 | }) 47 | } 48 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/ca_get.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "os" 7 | 8 | "github.com/cybozu-go/well" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | // caGetCmd represents the "ca get" command 13 | var caGetCmd = &cobra.Command{ 14 | Use: "get NAME", 15 | Short: "dump stored CA certificate to stdout", 16 | Long: `Dump stored CA certificate to stdout. 17 | 18 | NAME is one of: 19 | server 20 | etcd-peer 21 | etcd-client 22 | kubernetes`, 23 | 24 | Args: func(cmd *cobra.Command, args []string) error { 25 | if len(args) != 1 { 26 | return errors.New("wrong number of arguments") 27 | } 28 | 29 | if !isValidCAName(args[0]) { 30 | return errors.New("wrong CA name: " + args[0]) 31 | } 32 | 33 | return nil 34 | }, 35 | RunE: func(cmd *cobra.Command, args []string) error { 36 | well.Go(func(ctx context.Context) error { 37 | pem, err := storage.GetCACertificate(ctx, args[0]) 38 | if err != nil { 39 | return err 40 | } 41 | 42 | _, err = os.Stdout.WriteString(pem) 43 | return err 44 | }) 45 | well.Stop() 46 | return well.Wait() 47 | }, 48 | } 49 | 50 | func init() { 51 | caCmd.AddCommand(caGetCmd) 52 | } 53 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/reboot_queue_reset_backoff.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/cybozu-go/cke" 8 | "github.com/cybozu-go/well" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | var rebootQueueResetBackoffCmd = &cobra.Command{ 13 | Use: "reset-backoff", 14 | Short: "Reset drain backoff of the entries in reboot queue", 15 | Long: `Reset drain_backoff_count and drain_backoff_expire of the entries in reboot queue`, 16 | RunE: func(cmd *cobra.Command, args []string) error { 17 | well.Go(func(ctx context.Context) error { 18 | entries, err := storage.GetRebootsEntries(ctx) 19 | if err != nil { 20 | return err 21 | } 22 | for _, entry := range entries { 23 | entry.DrainBackOffCount = 0 24 | entry.DrainBackOffExpire = time.Time{} 25 | err := storage.UpdateRebootsEntry(ctx, entry) 26 | if err == cke.ErrNotFound { 27 | // The entry has just finished 28 | continue 29 | } 30 | if err != nil { 31 | return err 32 | } 33 | } 34 | return nil 35 | }) 36 | well.Stop() 37 | return well.Wait() 38 | }, 39 | } 40 | 41 | func init() { 42 | rebootQueueCmd.AddCommand(rebootQueueResetBackoffCmd) 43 | } 44 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/reboot_queue_cancel_all.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/cybozu-go/cke" 7 | "github.com/cybozu-go/well" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var rebootQueueCancelAllCmd = &cobra.Command{ 12 | Use: "cancel-all", 13 | Short: "cancel all the reboot queue entries", 14 | Long: `Cancel all the reboot queue entries.`, 15 | Args: cobra.NoArgs, 16 | RunE: func(cmd *cobra.Command, args []string) error { 17 | well.Go(func(ctx context.Context) error { 18 | entries, err := storage.GetRebootsEntries(ctx) 19 | if err != nil { 20 | return err 21 | } 22 | 23 | for _, entry := range entries { 24 | if entry.Status == cke.RebootStatusCancelled { 25 | continue 26 | } 27 | 28 | entry.Status = cke.RebootStatusCancelled 29 | err := storage.UpdateRebootsEntry(ctx, entry) 30 | if err == cke.ErrNotFound { 31 | // The entry has just finished 32 | continue 33 | } 34 | if err != nil { 35 | return err 36 | } 37 | } 38 | return nil 39 | }) 40 | well.Stop() 41 | return well.Wait() 42 | }, 43 | } 44 | 45 | func init() { 46 | rebootQueueCmd.AddCommand(rebootQueueCancelAllCmd) 47 | } 48 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/repair_queue_reset_backoff.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/cybozu-go/cke" 8 | "github.com/cybozu-go/well" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | var repairQueueResetBackoffCmd = &cobra.Command{ 13 | Use: "reset-backoff", 14 | Short: "Reset drain backoff of the entries in repair queue", 15 | Long: `Reset drain_backoff_count and drain_backoff_expire of the entries in repair queue`, 16 | RunE: func(cmd *cobra.Command, args []string) error { 17 | well.Go(func(ctx context.Context) error { 18 | entries, err := storage.GetRepairsEntries(ctx) 19 | if err != nil { 20 | return err 21 | } 22 | for _, entry := range entries { 23 | entry.DrainBackOffCount = 0 24 | entry.DrainBackOffExpire = time.Time{} 25 | err := storage.UpdateRepairsEntry(ctx, entry) 26 | if err == cke.ErrNotFound { 27 | // The entry has just been dequeued. 28 | continue 29 | } 30 | if err != nil { 31 | return err 32 | } 33 | } 34 | return nil 35 | }) 36 | well.Stop() 37 | return well.Wait() 38 | }, 39 | } 40 | 41 | func init() { 42 | repairQueueCmd.AddCommand(repairQueueResetBackoffCmd) 43 | } 44 | -------------------------------------------------------------------------------- /localproxy/strategy.go: -------------------------------------------------------------------------------- 1 | package localproxy 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | 7 | "github.com/cybozu-go/cke" 8 | "github.com/cybozu-go/cke/op/k8s" 9 | ) 10 | 11 | func decideOps(c *cke.Cluster, currentAP string, st *status) (newAP string, ops []cke.Operator) { 12 | if len(st.apiServers) == 0 { 13 | return 14 | } 15 | 16 | newAP = st.apiServers[0] 17 | for _, n := range st.apiServers { 18 | if n == currentAP { 19 | newAP = currentAP 20 | break 21 | } 22 | } 23 | 24 | apURL := fmt.Sprintf("https://%s:6443", newAP) 25 | 26 | if !st.proxyRunning { 27 | ops = append(ops, k8s.KubeProxyBootOp(ckeNodes, c.Name, apURL, c.Options.Proxy)) 28 | } else { 29 | if newAP != currentAP || st.proxyImage != cke.KubernetesImage.Name() { 30 | ops = append(ops, k8s.KubeProxyRestartOp(ckeNodes, c.Name, apURL, c.Options.Proxy)) 31 | } 32 | } 33 | 34 | if !st.unboundRunning { 35 | ops = append(ops, &unboundBootOp{conf: st.desiredUnboundConf}) 36 | return 37 | } 38 | 39 | if !bytes.Equal(st.unboundConf, st.desiredUnboundConf) || st.unboundImage != cke.UnboundImage.Name() { 40 | ops = append(ops, &unboundRestartOp{conf: st.desiredUnboundConf}) 41 | } 42 | 43 | return 44 | } 45 | -------------------------------------------------------------------------------- /op/repair_dequeue.go: -------------------------------------------------------------------------------- 1 | package op 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/cybozu-go/cke" 7 | ) 8 | 9 | type repairDequeueOp struct { 10 | finished bool 11 | 12 | entry *cke.RepairQueueEntry 13 | } 14 | 15 | func RepairDequeueOp(entry *cke.RepairQueueEntry) cke.Operator { 16 | return &repairDequeueOp{ 17 | entry: entry, 18 | } 19 | } 20 | 21 | func (o *repairDequeueOp) Name() string { 22 | return "repair-dequeue" 23 | } 24 | 25 | func (o *repairDequeueOp) NextCommand() cke.Commander { 26 | if o.finished { 27 | return nil 28 | } 29 | 30 | o.finished = true 31 | return repairDequeueCommand{ 32 | entry: o.entry, 33 | } 34 | } 35 | 36 | func (o *repairDequeueOp) Targets() []string { 37 | return []string{o.entry.Address} 38 | } 39 | 40 | type repairDequeueCommand struct { 41 | entry *cke.RepairQueueEntry 42 | } 43 | 44 | func (c repairDequeueCommand) Run(ctx context.Context, inf cke.Infrastructure, leaderKey string) error { 45 | return inf.Storage().DeleteRepairsEntry(ctx, leaderKey, c.entry.Index) 46 | } 47 | 48 | func (c repairDequeueCommand) Command() cke.Command { 49 | return cke.Command{ 50 | Name: "repairDequeueCommand", 51 | Target: c.entry.Address, 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /.github/workflows/release-tools.yaml: -------------------------------------------------------------------------------- 1 | name: release-tools 2 | on: 3 | push: 4 | tags: 5 | - 'tools-*' 6 | jobs: 7 | release-cke-tools: 8 | name: Release CKE tools image 9 | runs-on: ubuntu-24.04 10 | steps: 11 | - uses: actions/checkout@v4 12 | - uses: actions/setup-go@v5 13 | with: 14 | go-version-file: go.mod 15 | - name: Build CKE tools 16 | run: | 17 | cd tools 18 | make all 19 | - name: Extract tag 20 | id: extract-tag 21 | run: echo "tag=${GITHUB_REF#refs/tags/tools-}" >> $GITHUB_OUTPUT 22 | - name: Log in to ghcr.io 23 | uses: docker/login-action@v3 24 | with: 25 | registry: ghcr.io 26 | username: ${{ github.actor }} 27 | password: ${{ secrets.GITHUB_TOKEN }} 28 | - name: Build and Push cke-tools 29 | uses: docker/build-push-action@v6 30 | with: 31 | context: ./tools 32 | push: true 33 | platforms: linux/amd64 34 | tags: ghcr.io/${{ github.repository_owner }}/cke-tools:${{ steps.extract-tag.outputs.tag }} 35 | labels: org.opencontainers.image.source=https://github.com/${{ github.repository }} 36 | -------------------------------------------------------------------------------- /docs/cke-localproxy.md: -------------------------------------------------------------------------------- 1 | # cke-localproxy command reference 2 | 3 | `cke-localproxy` is an optional service that runs on the same host as CKE. 4 | 5 | It runs `kube-proxy` and a node local DNS service to allow programs running 6 | on the same host to access Kubernetes Services. 7 | 8 | ## Prerequisites 9 | 10 | `cke-localproxy` depends on Docker. 11 | The user account that runs `cke-localproxy` therefore should be granted to use Docker. 12 | 13 | In order to run the local DNS service, you may need to disable `systemd-resolved.service`. 14 | 15 | To access Kubernetes Services, the host needs to be able to communicate with Kubernetes Pods. 16 | 17 | ## Configuration 18 | 19 | To resolve Service DNS names, configure `/etc/resolv.conf` like this: 20 | 21 | ``` 22 | nameserver 127.0.0.1 23 | search cluster.local 24 | options ndots:3 25 | ``` 26 | 27 | ## Synopsis 28 | 29 | ``` 30 | Usage of cke-localproxy: 31 | --config string configuration file path (default "/etc/cke/config.yml") 32 | --interval duration check interval (default 1m0s) 33 | --logfile string Log filename 34 | --logformat string Log format [plain,logfmt,json] 35 | --loglevel string Log level [critical,error,warning,info,debug] 36 | ``` 37 | -------------------------------------------------------------------------------- /constraints.go: -------------------------------------------------------------------------------- 1 | package cke 2 | 3 | import "errors" 4 | 5 | // Constraints is a set of conditions that a cluster must satisfy 6 | type Constraints struct { 7 | ControlPlaneCount int `json:"control-plane-count"` 8 | MinimumWorkersRate int `json:"minimum-workers-rate"` 9 | RebootMaximumUnreachable int `json:"maximum-unreachable-nodes-for-reboot"` 10 | MaximumRepairs int `json:"maximum-repair-queue-entries"` 11 | RepairRebootingSeconds int `json:"wait-seconds-to-repair-rebooting"` 12 | } 13 | 14 | // Check checks the cluster satisfies the constraints 15 | func (c *Constraints) Check(cluster *Cluster) error { 16 | cpCount := 0 17 | 18 | for _, n := range cluster.Nodes { 19 | if n.ControlPlane { 20 | cpCount++ 21 | } 22 | } 23 | 24 | if cpCount != c.ControlPlaneCount { 25 | return errors.New("number of control planes is not equal to the constraint") 26 | } 27 | 28 | return nil 29 | } 30 | 31 | // DefaultConstraints returns the default constraints 32 | func DefaultConstraints() *Constraints { 33 | return &Constraints{ 34 | ControlPlaneCount: 1, 35 | MinimumWorkersRate: 80, 36 | RebootMaximumUnreachable: 0, 37 | MaximumRepairs: 0, 38 | RepairRebootingSeconds: 0, 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /docs/record.md: -------------------------------------------------------------------------------- 1 | Operation Record 2 | ================ 3 | 4 | CKE stores the most recent operations in etcd up to 1,000 records. 5 | 6 | A record is an object with these fields: 7 | 8 | | Name | Type | Description | 9 | | ----------- | --------- | ------------------------------------------------- | 10 | | `id` | string | ID of the operation | 11 | | `status` | string | One of `new`, `running`, `cancelled`, `completed` | 12 | | `operation` | string | The operation name | 13 | | `command` | `Command` | See `Command` spec. | 14 | | `error` | string | Command error message if operation failed. | 15 | | `start-at` | string | RFC3339 formatted time | 16 | | `end-at` | string | RFC3339 formatted time | 17 | 18 | `Command` is an object with these fields: 19 | 20 | | Name | Type | Description | 21 | | -------- | ------- | ----------------------- | 22 | | `name` | string | The name of the command | 23 | | `target` | string | The target of the command | 24 | | `detail` | string | The detail of the command | 25 | -------------------------------------------------------------------------------- /tools/rivers/health_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "strings" 6 | "testing" 7 | "time" 8 | 9 | "github.com/cybozu-go/log" 10 | ) 11 | 12 | func TestHealthChecker(t *testing.T) { 13 | upstreams := []*Upstream{{ 14 | address: "0", 15 | }} 16 | dialer := &testDialer{} 17 | logger := log.NewLogger() 18 | buf := &bytes.Buffer{} 19 | logger.SetOutput(buf) 20 | cfg := HealthCheckerConfig{ 21 | CheckInterval: time.Millisecond * 100, 22 | Dialer: dialer, 23 | Logger: logger, 24 | } 25 | hc := NewHealthChecker(upstreams, cfg) 26 | hc.Start() 27 | 28 | time.Sleep(time.Millisecond * 200) 29 | if !upstreams[0].IsHealthy() { 30 | t.Errorf("HealthChecker did not change upstream healthy\n") 31 | } 32 | if !strings.Contains(buf.String(), "becomes healthy") { 33 | t.Errorf("HealthChecker did not output status change log") 34 | } 35 | 36 | buf = &bytes.Buffer{} 37 | logger.SetOutput(buf) 38 | dialer.SetErrorAddress("0") 39 | time.Sleep(time.Millisecond * 300) 40 | if upstreams[0].IsHealthy() { 41 | t.Errorf("HealthChecker did not change upstream unhealthy\n") 42 | } 43 | if !strings.Contains(buf.String(), "becomes unhealthy") { 44 | t.Errorf("HealthChecker did not output status change log") 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /mtest/localproxy_test.go: -------------------------------------------------------------------------------- 1 | package mtest 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | 8 | . "github.com/onsi/ginkgo/v2" 9 | . "github.com/onsi/gomega" 10 | ) 11 | 12 | func testLocalProxy() { 13 | It("should run kube-localproxy on a boot server", func() { 14 | execSafeAt(host1, "sudo", "systemd-run", "-u", "cke-localproxy", 15 | "/opt/bin/cke-localproxy", "--interval=5s") 16 | }) 17 | 18 | It("should run kube-proxy", func() { 19 | Eventually(func() error { 20 | stdout, stderr, err := execAt(host1, "sudo", "ipvsadm", "-L", "-n") 21 | if err != nil { 22 | return fmt.Errorf("ipvsadm failed: %s: %w", stderr, err) 23 | } 24 | 25 | if bytes.Contains(stdout, []byte("10.34.56.1:443")) { 26 | return nil 27 | } 28 | return errors.New("kube-proxy has not configured proxy rules yet") 29 | }).Should(Succeed()) 30 | }) 31 | 32 | It("should run unbound", func() { 33 | Consistently(func() error { 34 | stdout, stderr, err := execAt(host1, "docker", "ps", "--format={{.Names}}") 35 | if err != nil { 36 | return fmt.Errorf("docker ps failed: %s: %w", stderr, err) 37 | } 38 | 39 | if bytes.Contains(stdout, []byte("cke-unbound")) { 40 | return nil 41 | } 42 | return errors.New("cke-unbound service is not running") 43 | }, 5, 0.1).Should(Succeed()) 44 | }) 45 | } 46 | -------------------------------------------------------------------------------- /tools/make_directories/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "strconv" 8 | 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | var options struct { 13 | mode string 14 | } 15 | 16 | var rootCmd = &cobra.Command{ 17 | Use: "make_directories DIR [DIR ...]", 18 | Short: "make directories", 19 | Long: `make directories with given permission flags`, 20 | Args: cobra.MinimumNArgs(1), 21 | RunE: func(cmd *cobra.Command, args []string) error { 22 | cmd.SilenceUsage = true 23 | return subMain(args) 24 | }, 25 | } 26 | 27 | func main() { 28 | if err := rootCmd.Execute(); err != nil { 29 | fmt.Fprintf(os.Stderr, "%v\n", err) 30 | os.Exit(1) 31 | } 32 | } 33 | 34 | func subMain(dirs []string) error { 35 | modeBits, err := strconv.ParseInt(options.mode, 8, 64) 36 | if err != nil { 37 | return fmt.Errorf("invalid mode %s: %w", options.mode, err) 38 | } 39 | 40 | for _, d := range dirs { 41 | if !filepath.IsAbs(d) { 42 | return fmt.Errorf("non-absolute path: %s", d) 43 | } 44 | 45 | if err := os.MkdirAll(d, os.FileMode(modeBits)); err != nil { 46 | return fmt.Errorf("failed to create %s: %w", d, err) 47 | } 48 | } 49 | return nil 50 | } 51 | 52 | func init() { 53 | rootCmd.Flags().StringVar(&options.mode, "mode", "755", "permission bits for directories") 54 | } 55 | -------------------------------------------------------------------------------- /example/etcd.conf.yml: -------------------------------------------------------------------------------- 1 | # -*- mode: yaml -*- 2 | # This is the configuration file for the etcd server. 3 | 4 | # Human-readable name for this member. 5 | name: 'etcd0' 6 | 7 | # Path to the data directory. 8 | data-dir: '/data/etcd' 9 | 10 | # List of comma separated URLs to listen on for peer traffic. 11 | listen-peer-urls: http://0.0.0.0:2380 12 | 13 | # List of comma separated URLs to listen on for client traffic. 14 | listen-client-urls: http://0.0.0.0:2379 15 | 16 | # List of this member's peer URLs to advertise to the rest of the cluster. 17 | # The URLs needed to be a comma-separated list. 18 | initial-advertise-peer-urls: http://172.30.0.14:2380 19 | 20 | # List of this member's client URLs to advertise to the public. 21 | # The URLs needed to be a comma-separated list. 22 | advertise-client-urls: http://172.30.0.14:2379 23 | 24 | # Initial cluster configuration for bootstrapping. 25 | initial-cluster: etcd0=http://172.30.0.14:2380 26 | 27 | # Initial cluster token for the etcd cluster during bootstrap. 28 | initial-cluster-token: 'etcd-cluster-1' 29 | 30 | # Initial cluster state ('new' or 'existing'). 31 | initial-cluster-state: 'new' 32 | 33 | # Enable runtime profiling data via HTTP server 34 | enable-pprof: true 35 | 36 | # Specify 'stdout' or 'stderr' to skip journald logging even when running under systemd. 37 | log-outputs: [stderr] 38 | -------------------------------------------------------------------------------- /tools/rivers/README.md: -------------------------------------------------------------------------------- 1 | rivers 2 | ====== 3 | 4 | Rivers is a simple TCP reverse proxy written in GO. 5 | 6 | Usage 7 | ----- 8 | 9 | Launch rivers by docker as follows: 10 | 11 | ```console 12 | $ ./rivers 13 | --listen localhost:6443 \ 14 | --upstreams 10.0.0.100:6443,10.0.0.101:6443,10.0.0.102:6443 15 | ``` 16 | 17 | Rivers starts and waits for TCP connections on address and port specified by `--listen`. 18 | Rivers receives TCP packets, and forwards them to upstream servers specified by `--upstreams`. 19 | The upstream server is selected at random. 20 | 21 | ### Command-line options 22 | 23 | ``` 24 | -check-interval string 25 | Interval for health check (default "20s") 26 | -dial-keep-alive string 27 | Interval between keep-alive probes (default "15s") 28 | -dial-timeout string 29 | Timeout for dial to an upstream server (default "10s") 30 | -listen string 31 | Listen address and port (address:port) 32 | -logfile string 33 | Log filename 34 | -logformat string 35 | Log format [plain,logfmt,json] 36 | -loglevel string 37 | Log level [critical,error,warning,info,debug] 38 | -shutdown-timeout string 39 | Timeout for server shutting-down gracefully (disabled if specified "0") (default "10s") 40 | -upstreams string 41 | Comma-separated upstream servers (addr1:port1,addr2:port2) 42 | ``` 43 | -------------------------------------------------------------------------------- /pkg/cke-localproxy/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "time" 6 | 7 | "github.com/cybozu-go/cke" 8 | "github.com/cybozu-go/cke/localproxy" 9 | "github.com/cybozu-go/etcdutil" 10 | "github.com/cybozu-go/log" 11 | "github.com/cybozu-go/well" 12 | "github.com/spf13/pflag" 13 | "sigs.k8s.io/yaml" 14 | ) 15 | 16 | var ( 17 | flgConfigPath = pflag.String("config", "/etc/cke/config.yml", "configuration file path") 18 | flgInterval = pflag.Duration("interval", 1*time.Minute, "check interval") 19 | ) 20 | 21 | func loadConfig(p string) (*etcdutil.Config, error) { 22 | b, err := os.ReadFile(p) 23 | if err != nil { 24 | return nil, err 25 | } 26 | cfg := cke.NewEtcdConfig() 27 | err = yaml.Unmarshal(b, cfg) 28 | if err != nil { 29 | return nil, err 30 | } 31 | 32 | return cfg, nil 33 | } 34 | 35 | func main() { 36 | pflag.Parse() 37 | well.LogConfig{}.Apply() 38 | 39 | cfg, err := loadConfig(*flgConfigPath) 40 | if err != nil { 41 | log.ErrorExit(err) 42 | } 43 | 44 | etcd, err := etcdutil.NewClient(cfg) 45 | if err != nil { 46 | log.ErrorExit(err) 47 | } 48 | defer etcd.Close() 49 | 50 | // Controller 51 | controller := localproxy.LocalProxy{Interval: *flgInterval, Storage: cke.Storage{Client: etcd}} 52 | well.Go(controller.Run) 53 | 54 | err = well.Wait() 55 | if err != nil && !well.IsSignaled(err) { 56 | log.ErrorExit(err) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/repair_queue_delete_finished.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/cybozu-go/cke" 8 | "github.com/cybozu-go/well" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | var repairQueueDeleteFinishedCmd = &cobra.Command{ 13 | Use: "delete-finished", 14 | Short: "delete all finished repair queue entries", 15 | Long: `Delete all finished repair queue entries. 16 | 17 | Entries in "succeeded" or "failed" status are deleted. 18 | This displays the index numbers of deleted entries, one per line.`, 19 | Args: cobra.NoArgs, 20 | RunE: func(cmd *cobra.Command, args []string) error { 21 | well.Go(func(ctx context.Context) error { 22 | entries, err := storage.GetRepairsEntries(ctx) 23 | if err != nil { 24 | return err 25 | } 26 | 27 | for _, entry := range entries { 28 | if entry.Deleted || !entry.HasFinished() { 29 | continue 30 | } 31 | 32 | entry.Deleted = true 33 | err := storage.UpdateRepairsEntry(ctx, entry) 34 | if err == cke.ErrNotFound { 35 | // The entry has just been dequeued. 36 | continue 37 | } 38 | if err != nil { 39 | return err 40 | } 41 | 42 | fmt.Println(entry.Index) 43 | } 44 | 45 | return nil 46 | }) 47 | well.Stop() 48 | return well.Wait() 49 | }, 50 | } 51 | 52 | func init() { 53 | repairQueueCmd.AddCommand(repairQueueDeleteFinishedCmd) 54 | } 55 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/sabakan_set_template.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/cybozu-go/cke" 8 | "github.com/cybozu-go/cke/sabakan" 9 | "github.com/cybozu-go/well" 10 | "github.com/spf13/cobra" 11 | "sigs.k8s.io/yaml" 12 | ) 13 | 14 | // sabakanSetTemplateCmd represents the "sabakan set-template" command 15 | var sabakanSetTemplateCmd = &cobra.Command{ 16 | Use: "set-template FILE", 17 | Short: "set the cluster configuration template", 18 | Long: `Set the cluster configuration template. 19 | 20 | FILE should contain a YAML/JSON template of the cluster configuration. 21 | The format is the same as the cluster configuration, but must contain 22 | just one control-plane node and one non contorl-plane node.`, 23 | 24 | Args: cobra.ExactArgs(1), 25 | RunE: func(cmd *cobra.Command, args []string) error { 26 | b, err := os.ReadFile(args[0]) 27 | if err != nil { 28 | return err 29 | } 30 | 31 | tmpl := cke.NewCluster() 32 | err = yaml.Unmarshal(b, tmpl) 33 | if err != nil { 34 | return err 35 | } 36 | err = sabakan.ValidateTemplate(tmpl) 37 | if err != nil { 38 | return err 39 | } 40 | 41 | well.Go(func(ctx context.Context) error { 42 | return storage.SetSabakanTemplate(ctx, tmpl) 43 | }) 44 | well.Stop() 45 | return well.Wait() 46 | }, 47 | } 48 | 49 | func init() { 50 | sabakanCmd.AddCommand(sabakanSetTemplateCmd) 51 | } 52 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/repair_queue_delete_unfinished.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/cybozu-go/cke" 8 | "github.com/cybozu-go/well" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | var repairQueueDeleteUnfinishedCmd = &cobra.Command{ 13 | Use: "delete-unfinished", 14 | Short: "delete all unfinished repair queue entries", 15 | Long: `Delete all unfinished repair queue entries. 16 | 17 | Entries not in "succeeded" or "failed" status are deleted. 18 | This displays the index numbers of deleted entries, one per line.`, 19 | Args: cobra.NoArgs, 20 | RunE: func(cmd *cobra.Command, args []string) error { 21 | well.Go(func(ctx context.Context) error { 22 | entries, err := storage.GetRepairsEntries(ctx) 23 | if err != nil { 24 | return err 25 | } 26 | 27 | for _, entry := range entries { 28 | if entry.Deleted || entry.HasFinished() { 29 | continue 30 | } 31 | 32 | entry.Deleted = true 33 | err := storage.UpdateRepairsEntry(ctx, entry) 34 | if err == cke.ErrNotFound { 35 | // The entry has just been dequeued. 36 | continue 37 | } 38 | if err != nil { 39 | return err 40 | } 41 | 42 | fmt.Println(entry.Index) 43 | } 44 | 45 | return nil 46 | }) 47 | well.Stop() 48 | return well.Wait() 49 | }, 50 | } 51 | 52 | func init() { 53 | repairQueueCmd.AddCommand(repairQueueDeleteUnfinishedCmd) 54 | } 55 | -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- 1 | Frequently Asked Questions 2 | ========================== 3 | 4 | ## kubelet dies saying: failed to get device for dir "/var/lib/kubelet": could not find device ... in cached partitions map 5 | 6 | This error happens when the filesystem of `/var/lib/kubelet` is `tmpfs`. 7 | 8 | [kubelet][] uses `/var/lib/kubelet` directory to prepare Pod volumes and [local ephemeral storages][les]. 9 | 10 | To limit and request usage of local ephemeral storages, kubelet has a feature gate called [`LocalStorageCapacityIsolation`][LSI]. When `LocalStorageCapacityIsolation` is enabled (default since Kubernetes 1.10), kubelet tries to identify the underlying block device of `/var/lib/kubelet`. If `tmpfs` is used for `/var/lib/kubelet`, kubelet dies because there is no underlying block device. 11 | 12 | You have several options to workaround the problem: 13 | 14 | 1. Use filesystem other than `tmpfs`. 15 | 2. Specify another directory by `--root-dir` option. 16 | 3. Disable the feature gate by adding the following `extra_args` to kubelet. 17 | 18 | ```console 19 | --feature-gates=LocalStorageCapacityIsolation=disable 20 | ``` 21 | 22 | [kubelet]: https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#options 23 | [les]: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#local-ephemeral-storage 24 | [LSI]: https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/ 25 | -------------------------------------------------------------------------------- /docs/logging.md: -------------------------------------------------------------------------------- 1 | Logging 2 | ======= 3 | 4 | CKE logs 5 | -------- 6 | 7 | `cke` is built on [github.com/cybozu-go/well][well] framework that provides [standard logging options](https://github.com/cybozu-go/well#command-line-options). 8 | 9 | In addition, CKE records recent important operations in etcd. Use [`ckecli history`](ckecli.md) to view them. 10 | 11 | Kubernetes programs 12 | ------------------- 13 | 14 | CKE runs Kubernetes programs such as `apiserver` or `kubelet` by `docker run --log-driver=journald` 15 | to send their logs to `journald`. 16 | 17 | To view logs of a program, use `journalctl` as follows: 18 | 19 | ```console 20 | $ sudo journalctl CONTAINER_NAME=kubelet 21 | ``` 22 | 23 | As kube-apiserver uses both stderr and stdout, filter its logs as follows. 24 | 25 | To view general logs of a apiserver, use `journalctl` as follows: 26 | 27 | ```console 28 | $ sudo journalctl CONTAINER_NAME=kube-apiserver -p 3 29 | ``` 30 | 31 | You can send the audit log of a apiserver to either of journal log or file. 32 | 33 | If the log is sent to journal log, you can see it using `journalctl` as follows: 34 | 35 | ```console 36 | $ sudo journalctl CONTAINER_NAME=kube-apiserver -p 6..6 37 | ``` 38 | 39 | Container names are defined in [op/constants.go](../op/constants.go). 40 | 41 | Ref: https://docs.docker.com/config/containers/logging/journald/#retrieve-log-messages-with-journalctl 42 | 43 | [well]: https://github.com/cybozu-go/well 44 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/etcd_useradd.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | 7 | "github.com/cybozu-go/cke" 8 | "github.com/cybozu-go/well" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | // etcdUserAddCmd represents the "etcd user-add" command 13 | var etcdUserAddCmd = &cobra.Command{ 14 | Use: "user-add NAME PREFIX", 15 | Short: "add a user to CKE managed etcd", 16 | Long: `Add a user to etcd managed by CKE (not the one used by CKE). 17 | 18 | NAME must not be "root" or "backup". 19 | PREFIX limits the user's privilege to keys having the prefix.`, 20 | 21 | Args: func(cmd *cobra.Command, args []string) error { 22 | if len(args) != 2 { 23 | return errors.New("wrong number of arguments") 24 | } 25 | 26 | switch args[0] { 27 | case "", "root", "backup": 28 | return errors.New("bad etcd username: " + args[0]) 29 | } 30 | 31 | if args[1] == "" { 32 | return errors.New("bad etcd prefix: " + args[1]) 33 | } 34 | 35 | return nil 36 | }, 37 | RunE: func(cmd *cobra.Command, args []string) error { 38 | username := args[0] 39 | prefix := args[1] 40 | 41 | well.Go(func(ctx context.Context) error { 42 | etcd, err := inf.NewEtcdClient(ctx, nil) 43 | if err != nil { 44 | return err 45 | } 46 | return cke.AddUserRole(ctx, etcd, username, prefix) 47 | }) 48 | well.Stop() 49 | return well.Wait() 50 | }, 51 | } 52 | 53 | func init() { 54 | etcdCmd.AddCommand(etcdUserAddCmd) 55 | } 56 | -------------------------------------------------------------------------------- /kubeconfig.go: -------------------------------------------------------------------------------- 1 | package cke 2 | 3 | import ( 4 | "k8s.io/client-go/tools/clientcmd/api" 5 | ) 6 | 7 | // Kubeconfig creates *api.Config that will be rendered as "kubeconfig" file. 8 | func Kubeconfig(cluster, user, ca, clientCrt, clientKey string) *api.Config { 9 | cfg := api.NewConfig() 10 | c := api.NewCluster() 11 | c.Server = "https://localhost:16443" 12 | c.CertificateAuthorityData = []byte(ca) 13 | cfg.Clusters[cluster] = c 14 | 15 | auth := api.NewAuthInfo() 16 | auth.ClientCertificateData = []byte(clientCrt) 17 | auth.ClientKeyData = []byte(clientKey) 18 | cfg.AuthInfos[user] = auth 19 | 20 | ctx := api.NewContext() 21 | ctx.AuthInfo = user 22 | ctx.Cluster = cluster 23 | cfg.Contexts["default"] = ctx 24 | cfg.CurrentContext = "default" 25 | 26 | return cfg 27 | } 28 | 29 | // UserKubeconfig makes kubeconfig for users 30 | func UserKubeconfig(cluster, userName, ca, clientCrt, clientKey, server string) *api.Config { 31 | cfg := api.NewConfig() 32 | c := api.NewCluster() 33 | c.Server = server 34 | c.CertificateAuthorityData = []byte(ca) 35 | cfg.Clusters[cluster] = c 36 | 37 | auth := api.NewAuthInfo() 38 | auth.ClientCertificateData = []byte(clientCrt) 39 | auth.ClientKeyData = []byte(clientKey) 40 | cfg.AuthInfos[userName] = auth 41 | 42 | ctx := api.NewContext() 43 | ctx.AuthInfo = userName 44 | ctx.Cluster = cluster 45 | cfg.Contexts["default"] = ctx 46 | cfg.CurrentContext = "default" 47 | 48 | return cfg 49 | } 50 | -------------------------------------------------------------------------------- /etcd_util.go: -------------------------------------------------------------------------------- 1 | package cke 2 | 3 | import ( 4 | "context" 5 | "crypto/rand" 6 | "encoding/hex" 7 | 8 | "github.com/cybozu-go/etcdutil" 9 | clientv3 "go.etcd.io/etcd/client/v3" 10 | ) 11 | 12 | const ( 13 | defaultEtcdPrefix = "/cke/" 14 | ) 15 | 16 | // NewEtcdConfig creates Config with default prefix. 17 | func NewEtcdConfig() *etcdutil.Config { 18 | return etcdutil.NewConfig(defaultEtcdPrefix) 19 | } 20 | 21 | // AddUserRole create etcd user and role. 22 | func AddUserRole(ctx context.Context, cli *clientv3.Client, name, prefix string) error { 23 | r := make([]byte, 32) 24 | _, err := rand.Read(r) 25 | if err != nil { 26 | return err 27 | } 28 | 29 | _, err = cli.UserAdd(ctx, name, hex.EncodeToString(r)) 30 | if err != nil { 31 | return err 32 | } 33 | 34 | if prefix == "" { 35 | return nil 36 | } 37 | 38 | _, err = cli.RoleAdd(ctx, name) 39 | if err != nil { 40 | return err 41 | } 42 | 43 | _, err = cli.RoleGrantPermission(ctx, name, prefix, clientv3.GetPrefixRangeEnd(prefix), clientv3.PermissionType(clientv3.PermReadWrite)) 44 | if err != nil { 45 | return err 46 | } 47 | 48 | _, err = cli.UserGrantRole(ctx, name, name) 49 | if err != nil { 50 | return err 51 | } 52 | 53 | return nil 54 | } 55 | 56 | // GetUserRoles get roles of target user. 57 | func GetUserRoles(ctx context.Context, cli *clientv3.Client, user string) ([]string, error) { 58 | resp, err := cli.UserGet(ctx, user) 59 | return resp.Roles, err 60 | } 61 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/repair_queue_add.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/cybozu-go/cke" 7 | "github.com/cybozu-go/well" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var repairQueueAddCmd = &cobra.Command{ 12 | Use: "add OPERATION MACHINE_TYPE ADDRESS [SERIAL]", 13 | Short: "append a repair request to the repair queue", 14 | Long: `Append a repair request to the repair queue. 15 | 16 | The repair target is a machine with an IP address ADDRESS and a machine type MACHINE_TYPE. 17 | The machine should be processed with an operation OPERATION. 18 | Optionally, you can specify the machine's serial number as the fourth argument.`, 19 | Args: cobra.RangeArgs(3, 4), 20 | RunE: func(cmd *cobra.Command, args []string) error { 21 | operation := args[0] 22 | machineType := args[1] 23 | address := args[2] 24 | serial := "" 25 | if len(args) > 3 { 26 | serial = args[3] 27 | } 28 | 29 | well.Go(func(ctx context.Context) error { 30 | entry := cke.NewRepairQueueEntry(operation, machineType, address, serial) 31 | cluster, err := storage.GetCluster(ctx) 32 | if err != nil { 33 | return err 34 | } 35 | if _, err := entry.GetMatchingRepairOperation(cluster); err != nil { 36 | return err 37 | } 38 | 39 | return storage.RegisterRepairsEntry(ctx, entry) 40 | }) 41 | well.Stop() 42 | return well.Wait() 43 | }, 44 | } 45 | 46 | func init() { 47 | repairQueueCmd.AddCommand(repairQueueAddCmd) 48 | } 49 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/cluster_set.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "os" 6 | 7 | "github.com/cybozu-go/cke" 8 | "github.com/cybozu-go/well" 9 | "github.com/spf13/cobra" 10 | "sigs.k8s.io/yaml" 11 | ) 12 | 13 | // clusterSetCmd represents the "cluster set" command 14 | var clusterSetCmd = &cobra.Command{ 15 | Use: "set FILE", 16 | Short: "load cluster configuration", 17 | Long: `Load cluster configuration from FILE and store it in etcd. 18 | 19 | The file must be either YAML or JSON.`, 20 | 21 | Args: cobra.ExactArgs(1), 22 | RunE: func(cmd *cobra.Command, args []string) error { 23 | b, err := os.ReadFile(args[0]) 24 | if err != nil { 25 | return err 26 | } 27 | 28 | cfg := cke.NewCluster() 29 | err = yaml.Unmarshal(b, cfg) 30 | if err != nil { 31 | return err 32 | } 33 | err = cfg.Validate(false) 34 | if err != nil { 35 | return err 36 | } 37 | 38 | well.Go(func(ctx context.Context) error { 39 | constraints, err := storage.GetConstraints(ctx) 40 | switch err { 41 | case cke.ErrNotFound: 42 | constraints = cke.DefaultConstraints() 43 | fallthrough 44 | case nil: 45 | err = constraints.Check(cfg) 46 | if err != nil { 47 | return err 48 | } 49 | default: 50 | return err 51 | } 52 | 53 | return storage.PutCluster(ctx, cfg) 54 | }) 55 | well.Stop() 56 | return well.Wait() 57 | }, 58 | } 59 | 60 | func init() { 61 | clusterCmd.AddCommand(clusterSetCmd) 62 | } 63 | -------------------------------------------------------------------------------- /tools/install-cni/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "os" 7 | "path/filepath" 8 | ) 9 | 10 | const ( 11 | hostBinDir = "/host/bin" 12 | cniDir = "/cni_plugins/bin" 13 | ) 14 | 15 | func main() { 16 | err := subMain() 17 | if err != nil { 18 | fmt.Fprintf(os.Stderr, "%v\n", err) 19 | os.Exit(1) 20 | } 21 | } 22 | 23 | func subMain() error { 24 | files, err := os.ReadDir(cniDir) 25 | if err != nil { 26 | return fmt.Errorf("failed to read %s: %w", cniDir, err) 27 | } 28 | 29 | for _, f := range files { 30 | if err := copyFile(f.Name()); err != nil { 31 | return err 32 | } 33 | } 34 | 35 | return nil 36 | } 37 | 38 | func copyFile(name string) error { 39 | src := filepath.Join(cniDir, name) 40 | f, err := os.Open(src) 41 | if err != nil { 42 | return fmt.Errorf("failed to open %s: %w", src, err) 43 | } 44 | defer f.Close() 45 | 46 | dest := filepath.Join(hostBinDir, name) 47 | err = os.Remove(dest) 48 | if err != nil && !os.IsNotExist(err) { 49 | return fmt.Errorf("failed to remove %s: %w", dest, err) 50 | } 51 | 52 | o, err := os.Create(dest) 53 | if err != nil { 54 | return fmt.Errorf("failed to create %s: %w", dest, err) 55 | } 56 | defer o.Close() 57 | 58 | _, err = io.Copy(o, f) 59 | if err != nil { 60 | return fmt.Errorf("failed to copy %s: %w", name, err) 61 | } 62 | 63 | if err := o.Sync(); err != nil { 64 | return fmt.Errorf("failed to fsync %s: %w", dest, err) 65 | } 66 | 67 | return o.Chmod(0755) 68 | } 69 | -------------------------------------------------------------------------------- /op/common/image_pull.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/cybozu-go/cke" 8 | "github.com/cybozu-go/log" 9 | "github.com/cybozu-go/well" 10 | ) 11 | 12 | type imagePullCommand struct { 13 | nodes []*cke.Node 14 | img cke.Image 15 | } 16 | 17 | const ( 18 | pullMaxRetry = 3 19 | pullWaitDuration = 10 * time.Second 20 | ) 21 | 22 | // ImagePullCommand returns a Commander to pull an image on nodes. 23 | func ImagePullCommand(nodes []*cke.Node, img cke.Image) cke.Commander { 24 | return imagePullCommand{nodes, img} 25 | } 26 | 27 | func (c imagePullCommand) Run(ctx context.Context, inf cke.Infrastructure, _ string) error { 28 | env := well.NewEnvironment(ctx) 29 | for _, n := range c.nodes { 30 | ce := inf.Engine(n.Address) 31 | env.Go(func(ctx context.Context) error { 32 | var err error 33 | for i := 0; i < pullMaxRetry; i++ { 34 | err = ce.PullImage(c.img) 35 | if err == nil { 36 | return nil 37 | } 38 | 39 | log.Warn("failed to pull image", map[string]interface{}{ 40 | "image": c.img.Name(), 41 | log.FnError: err, 42 | }) 43 | select { 44 | case <-ctx.Done(): 45 | return ctx.Err() 46 | case <-time.After(pullWaitDuration): 47 | } 48 | } 49 | return err 50 | }) 51 | } 52 | env.Stop() 53 | return env.Wait() 54 | } 55 | 56 | func (c imagePullCommand) Command() cke.Command { 57 | return cke.Command{ 58 | Name: "image-pull", 59 | Target: c.img.Name(), 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /op/etcd/destroy.go: -------------------------------------------------------------------------------- 1 | package etcd 2 | 3 | import ( 4 | "github.com/cybozu-go/cke" 5 | "github.com/cybozu-go/cke/op" 6 | "github.com/cybozu-go/cke/op/common" 7 | ) 8 | 9 | type destroyMemberOp struct { 10 | endpoints []string 11 | targets []*cke.Node 12 | ids []uint64 13 | params cke.EtcdParams 14 | step int 15 | } 16 | 17 | // DestroyMemberOp returns an Operator to remove and destroy a member. 18 | func DestroyMemberOp(cp []*cke.Node, targets []*cke.Node, ids []uint64) cke.Operator { 19 | return &destroyMemberOp{ 20 | endpoints: etcdEndpoints(cp), 21 | targets: targets, 22 | ids: ids, 23 | } 24 | } 25 | 26 | func (o *destroyMemberOp) Name() string { 27 | return "etcd-destroy-member" 28 | } 29 | 30 | func (o *destroyMemberOp) NextCommand() cke.Commander { 31 | switch o.step { 32 | case 0: 33 | o.step++ 34 | return common.VolumeRemoveCommand(o.targets, op.EtcdAddedMemberVolumeName) 35 | case 1: 36 | o.step++ 37 | return removeMemberCommand{o.endpoints, o.ids} 38 | case 2: 39 | o.step++ 40 | return common.KillContainersCommand(o.targets, op.EtcdContainerName) 41 | case 3: 42 | o.step++ 43 | return common.VolumeRemoveCommand(o.targets, op.EtcdVolumeName(o.params)) 44 | case 4: 45 | o.step++ 46 | return waitEtcdSyncCommand{o.endpoints, false} 47 | } 48 | return nil 49 | } 50 | 51 | func (o *destroyMemberOp) Targets() []string { 52 | ips := make([]string, len(o.targets)) 53 | for i, n := range o.targets { 54 | ips[i] = n.Address 55 | } 56 | return ips 57 | } 58 | -------------------------------------------------------------------------------- /server/apierror.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import "net/http" 4 | 5 | // APIError is to define REST API errors. 6 | type APIError struct { 7 | Status int 8 | Message string 9 | Err error 10 | } 11 | 12 | // Error implements error interface. 13 | func (e APIError) Error() string { 14 | if e.Err == nil { 15 | return e.Message 16 | } 17 | 18 | return e.Err.Error() + ": " + e.Message 19 | } 20 | 21 | // InternalServerError creates an APIError. 22 | func InternalServerError(e error) APIError { 23 | return APIError{ 24 | http.StatusInternalServerError, 25 | http.StatusText(http.StatusInternalServerError), 26 | e, 27 | } 28 | } 29 | 30 | // BadRequest creates an APIError that describes what was bad in the request. 31 | func BadRequest(reason string) APIError { 32 | return APIError{http.StatusBadRequest, "invalid request: " + reason, nil} 33 | } 34 | 35 | // Common API errors 36 | var ( 37 | APIErrBadRequest = APIError{http.StatusBadRequest, "invalid request", nil} 38 | APIErrForbidden = APIError{http.StatusForbidden, "forbidden", nil} 39 | APIErrNotFound = APIError{http.StatusNotFound, "requested resource is not found", nil} 40 | APIErrBadMethod = APIError{http.StatusMethodNotAllowed, "method not allowed", nil} 41 | APIErrConflict = APIError{http.StatusConflict, "conflicted", nil} 42 | APIErrLengthRequired = APIError{http.StatusLengthRequired, "content-length is required", nil} 43 | APIErrTooLargeAsset = APIError{http.StatusRequestEntityTooLarge, "too large asset", nil} 44 | ) 45 | -------------------------------------------------------------------------------- /server/server.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | "time" 7 | 8 | "github.com/cybozu-go/cke" 9 | "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" 10 | clientv3 "go.etcd.io/etcd/client/v3" 11 | ) 12 | 13 | // Server is the cke server. 14 | type Server struct { 15 | EtcdClient *clientv3.Client 16 | Timeout time.Duration 17 | } 18 | 19 | type version struct { 20 | Version string `json:"version"` 21 | } 22 | 23 | type health struct { 24 | Health string `json:"health"` 25 | } 26 | 27 | func (s Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { 28 | if r.Method == http.MethodGet && r.URL.Path == "/version" { 29 | s.handleVersion(w, r) 30 | } else if r.Method == http.MethodGet && r.URL.Path == "/health" { 31 | s.handleHealth(w, r) 32 | } else { 33 | renderError(r.Context(), w, APIErrNotFound) 34 | } 35 | } 36 | 37 | func (s Server) handleVersion(w http.ResponseWriter, r *http.Request) { 38 | renderJSON(w, version{ 39 | Version: cke.Version, 40 | }, http.StatusOK) 41 | } 42 | 43 | func (s Server) handleHealth(w http.ResponseWriter, r *http.Request) { 44 | ctxWithTimeout, cancel := context.WithTimeout(r.Context(), s.Timeout) 45 | defer cancel() 46 | 47 | _, err := s.EtcdClient.Get(ctxWithTimeout, "health") 48 | if err == nil || err == rpctypes.ErrPermissionDenied { 49 | renderJSON(w, health{ 50 | Health: "healthy", 51 | }, http.StatusOK) 52 | } else { 53 | renderJSON(w, health{ 54 | Health: "unhealthy", 55 | }, http.StatusInternalServerError) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /sabakan/score.go: -------------------------------------------------------------------------------- 1 | package sabakan 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | const ( 8 | // maxCountPerRack should be more than max machine num per rack + 1 9 | maxCountPerRack = 100 10 | // healthyScore is added when the machine status is healthy. 11 | healthyScore = 1000 12 | ) 13 | 14 | func scoreByDays(days int) int { 15 | var score int 16 | if days > 250 { 17 | score++ 18 | } 19 | if days > 500 { 20 | score++ 21 | } 22 | if days > 1000 { 23 | score++ 24 | } 25 | if days < -250 { 26 | score-- 27 | } 28 | if days < -500 { 29 | score-- 30 | } 31 | if days < -1000 { 32 | score-- 33 | } 34 | return score 35 | } 36 | 37 | func scoreMachine(m *Machine, rackCount map[int]int, ts time.Time) int { 38 | rackScore := maxCountPerRack - rackCount[m.Spec.Rack] 39 | 40 | days := int(m.Spec.RetireDate.Sub(ts).Hours() / 24) 41 | daysScore := scoreByDays(days) 42 | 43 | return rackScore*10 + daysScore 44 | } 45 | 46 | func scoreMachineWithHealthStatus(m *Machine, rackCount map[int]int, ts time.Time) int { 47 | score := scoreMachine(m, rackCount, ts) 48 | if m.Status.State != StateHealthy { 49 | return score 50 | } 51 | return healthyScore + score 52 | } 53 | 54 | func filterHealthyMachinesByRole(ms []*Machine, role string) []*Machine { 55 | var filtered []*Machine 56 | for _, m := range ms { 57 | if m.Status.State != StateHealthy { 58 | continue 59 | } 60 | if role != "" && m.Spec.Role != role { 61 | continue 62 | } 63 | filtered = append(filtered, m) 64 | } 65 | 66 | return filtered 67 | } 68 | -------------------------------------------------------------------------------- /op/clusterdns/clusterdns.go: -------------------------------------------------------------------------------- 1 | package clusterdns 2 | 3 | import ( 4 | "bytes" 5 | "strings" 6 | "text/template" 7 | 8 | "github.com/cybozu-go/cke/op" 9 | corev1 "k8s.io/api/core/v1" 10 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 11 | ) 12 | 13 | // CoreDNSTemplateVersion is the version of CoreDNS template 14 | const CoreDNSTemplateVersion = "3" 15 | 16 | var clusterDNSTemplate = template.Must(template.New("").Parse(`.:1053 { 17 | errors 18 | health 19 | ready 20 | log . {combined} { 21 | class denial error 22 | } 23 | kubernetes {{ .Domain }} in-addr.arpa ip6.arpa { 24 | pods verified 25 | {{- if .Upstreams }} 26 | fallthrough in-addr.arpa ip6.arpa 27 | {{- end }} 28 | } 29 | {{- if .Upstreams }} 30 | forward . {{ .Upstreams }} 31 | {{- end }} 32 | prometheus :9153 33 | cache 30 34 | reload 35 | loadbalance 36 | } 37 | `)) 38 | 39 | // ConfigMap returns ConfigMap for CoreDNS 40 | func ConfigMap(domain string, dnsServers []string) *corev1.ConfigMap { 41 | buf := new(bytes.Buffer) 42 | err := clusterDNSTemplate.Execute(buf, struct { 43 | Domain string 44 | Upstreams string 45 | }{ 46 | Domain: domain, 47 | Upstreams: strings.Join(dnsServers, " "), 48 | }) 49 | if err != nil { 50 | panic(err) 51 | } 52 | 53 | return &corev1.ConfigMap{ 54 | ObjectMeta: metav1.ObjectMeta{ 55 | Name: op.ClusterDNSAppName, 56 | Namespace: "kube-system", 57 | }, 58 | Data: map[string]string{ 59 | "Corefile": buf.String(), 60 | }, 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/resource_delete.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "io" 7 | "os" 8 | 9 | "github.com/cybozu-go/cke" 10 | "github.com/cybozu-go/well" 11 | "github.com/spf13/cobra" 12 | 13 | k8sYaml "k8s.io/apimachinery/pkg/util/yaml" 14 | ) 15 | 16 | var resourceDeleteCmd = &cobra.Command{ 17 | Use: "delete FILE", 18 | Short: "remove user-defined resources.", 19 | Long: `Remove user-defined resources. 20 | 21 | FILE should contain multiple Kubernetes resources in YAML or JSON format. 22 | If FILE is "-", then data is read from stdin. 23 | 24 | Note that resources in Kubernetes will not be removed automatically.`, 25 | 26 | Args: cobra.ExactArgs(1), 27 | RunE: func(cmd *cobra.Command, args []string) error { 28 | r := os.Stdin 29 | if args[0] != "-" { 30 | f, err := os.Open(args[0]) 31 | if err != nil { 32 | return err 33 | } 34 | defer f.Close() 35 | r = f 36 | } 37 | 38 | well.Go(func(ctx context.Context) error { 39 | y := k8sYaml.NewYAMLReader(bufio.NewReader(r)) 40 | for { 41 | data, err := y.Read() 42 | if err == io.EOF { 43 | break 44 | } else if err != nil { 45 | return err 46 | } 47 | key, err := cke.ParseResource(data) 48 | if err != nil { 49 | return err 50 | } 51 | err = storage.DeleteResource(ctx, key) 52 | if err != nil { 53 | return err 54 | } 55 | } 56 | return nil 57 | }) 58 | well.Stop() 59 | return well.Wait() 60 | }, 61 | } 62 | 63 | func init() { 64 | resourceCmd.AddCommand(resourceDeleteCmd) 65 | } 66 | -------------------------------------------------------------------------------- /op/rivers_restart.go: -------------------------------------------------------------------------------- 1 | package op 2 | 3 | import ( 4 | "github.com/cybozu-go/cke" 5 | "github.com/cybozu-go/cke/op/common" 6 | ) 7 | 8 | type riversRestartOp struct { 9 | nodes []*cke.Node 10 | upstreams []*cke.Node 11 | params cke.ServiceParams 12 | name string 13 | upstreamPort int 14 | listenPort int 15 | 16 | pulled bool 17 | finished bool 18 | } 19 | 20 | // RiversRestartOp returns an Operator to restart rivers. 21 | func RiversRestartOp(nodes, upstreams []*cke.Node, params cke.ServiceParams, name string, upstreamPort, listenPort int) cke.Operator { 22 | return &riversRestartOp{ 23 | nodes: nodes, 24 | upstreams: upstreams, 25 | params: params, 26 | name: name, 27 | upstreamPort: upstreamPort, 28 | listenPort: listenPort, 29 | } 30 | } 31 | 32 | func (o *riversRestartOp) Name() string { 33 | return o.name + "-restart" 34 | } 35 | 36 | func (o *riversRestartOp) NextCommand() cke.Commander { 37 | if !o.pulled { 38 | o.pulled = true 39 | return common.ImagePullCommand(o.nodes, cke.ToolsImage) 40 | } 41 | 42 | if !o.finished { 43 | o.finished = true 44 | return common.RunContainerCommand(o.nodes, o.name, cke.ToolsImage, 45 | common.WithParams(RiversParams(o.upstreams, o.upstreamPort, o.listenPort)), 46 | common.WithExtra(o.params), 47 | common.WithRestart()) 48 | } 49 | return nil 50 | } 51 | 52 | func (o *riversRestartOp) Targets() []string { 53 | ips := make([]string, len(o.nodes)) 54 | for i, n := range o.nodes { 55 | ips[i] = n.Address 56 | } 57 | return ips 58 | } 59 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/vault_config.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "os" 7 | 8 | "github.com/cybozu-go/cke" 9 | "github.com/cybozu-go/well" 10 | "github.com/spf13/cobra" 11 | ) 12 | 13 | // vaultConfigCmd represents the "vault config" command 14 | var vaultConfigCmd = &cobra.Command{ 15 | Use: "config FILE|-", 16 | Short: "store parameters to connect Vault", 17 | Long: `Load parameters to connect Vault from a FILE or stdin, 18 | and stores it in etcd. 19 | 20 | The parameters are given by a JSON object having these fields: 21 | 22 | endpoint: Vault URL. 23 | ca-cert: PEM encoded CA certificate to verify server certificate. 24 | role-id: AppRole ID to login to Vault. 25 | secret-id: AppRole secret to login to Vault. 26 | 27 | If the argument is "-", the JSON is read from stdin.`, 28 | 29 | Args: cobra.ExactArgs(1), 30 | RunE: func(cmd *cobra.Command, args []string) error { 31 | f := os.Stdin 32 | if args[0] != "-" { 33 | var err error 34 | f, err = os.Open(args[0]) 35 | if err != nil { 36 | return err 37 | } 38 | defer f.Close() 39 | } 40 | 41 | cfg := new(cke.VaultConfig) 42 | err := json.NewDecoder(f).Decode(cfg) 43 | if err != nil { 44 | return err 45 | } 46 | err = cfg.Validate() 47 | if err != nil { 48 | return err 49 | } 50 | 51 | well.Go(func(ctx context.Context) error { 52 | return storage.PutVaultConfig(ctx, cfg) 53 | }) 54 | well.Stop() 55 | return well.Wait() 56 | }, 57 | } 58 | 59 | func init() { 60 | vaultCmd.AddCommand(vaultConfigCmd) 61 | } 62 | -------------------------------------------------------------------------------- /server/watch.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "context" 5 | "strings" 6 | 7 | "github.com/cybozu-go/cke" 8 | clientv3 "go.etcd.io/etcd/client/v3" 9 | ) 10 | 11 | func initStateless(ctx context.Context, etcd *clientv3.Client, ch chan<- struct{}) (int64, error) { 12 | defer func() { 13 | // notify the caller of the readiness 14 | ch <- struct{}{} 15 | }() 16 | 17 | resp, err := etcd.Get(ctx, cke.KeyVault) 18 | if err != nil { 19 | return 0, err 20 | } 21 | rev := resp.Header.Revision 22 | 23 | if len(resp.Kvs) == 1 { 24 | err = cke.ConnectVault(ctx, resp.Kvs[0].Value) 25 | if err != nil { 26 | return 0, err 27 | } 28 | } 29 | 30 | return rev, nil 31 | } 32 | 33 | func startWatcher(ctx context.Context, etcd *clientv3.Client, ch chan<- struct{}) error { 34 | rev, err := initStateless(ctx, etcd, ch) 35 | if err != nil { 36 | return err 37 | } 38 | 39 | wch := etcd.Watch(ctx, "", clientv3.WithPrefix(), clientv3.WithRev(rev+1)) 40 | for resp := range wch { 41 | if err := resp.Err(); err != nil { 42 | return err 43 | } 44 | 45 | for _, ev := range resp.Events { 46 | if ev.Type != clientv3.EventTypePut { 47 | continue 48 | } 49 | 50 | key := string(ev.Kv.Key) 51 | switch { 52 | case key == cke.KeyCluster || strings.HasPrefix(key, cke.KeyResourcePrefix): 53 | select { 54 | case ch <- struct{}{}: 55 | default: 56 | } 57 | case key == cke.KeyVault: 58 | err = cke.ConnectVault(ctx, ev.Kv.Value) 59 | if err != nil { 60 | return err 61 | } 62 | } 63 | } 64 | } 65 | return nil 66 | } 67 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/resource_set.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "io" 7 | "os" 8 | 9 | "github.com/cybozu-go/cke" 10 | "github.com/cybozu-go/well" 11 | "github.com/spf13/cobra" 12 | 13 | k8sYaml "k8s.io/apimachinery/pkg/util/yaml" 14 | ) 15 | 16 | func updateResource(ctx context.Context, data []byte) error { 17 | key, err := cke.ParseResource(data) 18 | if err != nil { 19 | return err 20 | } 21 | 22 | return storage.SetResource(ctx, key, string(data)) 23 | } 24 | 25 | var resourceSetCmd = &cobra.Command{ 26 | Use: "set FILE", 27 | Short: "register user-defined resources.", 28 | Long: `Register user-defined resources. 29 | 30 | FILE should contain multiple Kubernetes resources in YAML or JSON format. 31 | If FILE is "-", then data is read from stdin.`, 32 | 33 | Args: cobra.ExactArgs(1), 34 | RunE: func(cmd *cobra.Command, args []string) error { 35 | r := os.Stdin 36 | if args[0] != "-" { 37 | f, err := os.Open(args[0]) 38 | if err != nil { 39 | return err 40 | } 41 | defer f.Close() 42 | r = f 43 | } 44 | 45 | well.Go(func(ctx context.Context) error { 46 | y := k8sYaml.NewYAMLReader(bufio.NewReader(r)) 47 | for { 48 | data, err := y.Read() 49 | if err == io.EOF { 50 | return nil 51 | } 52 | if err != nil { 53 | return err 54 | } 55 | 56 | err = updateResource(ctx, data) 57 | if err != nil { 58 | return err 59 | } 60 | } 61 | }) 62 | well.Stop() 63 | return well.Wait() 64 | }, 65 | } 66 | 67 | func init() { 68 | resourceCmd.AddCommand(resourceSetCmd) 69 | } 70 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/ca_set.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "crypto/x509" 6 | "encoding/pem" 7 | "errors" 8 | "os" 9 | 10 | "github.com/cybozu-go/well" 11 | "github.com/spf13/cobra" 12 | ) 13 | 14 | var caData []byte 15 | 16 | // caSetCmd represents the "ca set" command 17 | var caSetCmd = &cobra.Command{ 18 | Use: "set NAME FILE", 19 | Short: "store CA certificate in etcd", 20 | Long: `Load PEM encoded x509 CA certificate from FILE, and stores it in etcd. 21 | 22 | NAME is one of: 23 | server 24 | etcd-peer 25 | etcd-client 26 | kubernetes 27 | 28 | In fact, these CA should be created in Vault.`, 29 | 30 | Args: func(cmd *cobra.Command, args []string) error { 31 | if len(args) != 2 { 32 | return errors.New("wrong number of arguments") 33 | } 34 | 35 | if !isValidCAName(args[0]) { 36 | return errors.New("wrong CA name: " + args[0]) 37 | } 38 | 39 | var err error 40 | caData, err = os.ReadFile(args[1]) 41 | if err != nil { 42 | return err 43 | } 44 | 45 | block, _ := pem.Decode(caData) 46 | if block == nil { 47 | return errors.New("invalid PEM data") 48 | } 49 | _, err = x509.ParseCertificate(block.Bytes) 50 | if err != nil { 51 | return errors.New("invalid certificate") 52 | } 53 | 54 | return nil 55 | }, 56 | RunE: func(cmd *cobra.Command, args []string) error { 57 | well.Go(func(ctx context.Context) error { 58 | return storage.PutCACertificate(ctx, args[0], string(caData)) 59 | }) 60 | well.Stop() 61 | return well.Wait() 62 | }, 63 | } 64 | 65 | func init() { 66 | caCmd.AddCommand(caSetCmd) 67 | } 68 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/history.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "os" 7 | 8 | "github.com/cybozu-go/well" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | var historyCount int 13 | var followMode bool 14 | 15 | // historyCmd represents the history command 16 | var historyCmd = &cobra.Command{ 17 | Use: "history", 18 | Short: "show the hostname of the current history process", 19 | Long: `Show the hostname of the current history process.`, 20 | 21 | RunE: func(cmd *cobra.Command, args []string) error { 22 | well.Go(func(ctx context.Context) error { 23 | enc := json.NewEncoder(os.Stdout) 24 | enc.SetIndent("", " ") 25 | 26 | if followMode { 27 | recordCh, err := storage.WatchRecords(ctx, int64(historyCount)) 28 | if err != nil { 29 | return err 30 | } 31 | 32 | for r := range recordCh { 33 | err := enc.Encode(r) 34 | if err != nil { 35 | return err 36 | } 37 | } 38 | return nil 39 | } 40 | 41 | records, err := storage.GetRecords(ctx, int64(historyCount)) 42 | if err != nil { 43 | return err 44 | } 45 | 46 | for _, r := range records { 47 | err = enc.Encode(r) 48 | if err != nil { 49 | return err 50 | } 51 | } 52 | return nil 53 | }) 54 | well.Stop() 55 | return well.Wait() 56 | }, 57 | } 58 | 59 | func init() { 60 | historyCmd.Flags().IntVarP(&historyCount, "count", "n", 0, "limit the number of operations to show") 61 | historyCmd.Flags().BoolVarP(&followMode, "follow", "f", false, "show operations continuously") 62 | rootCmd.AddCommand(historyCmd) 63 | } 64 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/sabakan_set_variables.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "os" 7 | 8 | "github.com/cybozu-go/cke/sabakan" 9 | "github.com/cybozu-go/well" 10 | "github.com/spf13/cobra" 11 | ) 12 | 13 | // sabakanSetVariablesCmd represents the "sabakan set-variables" command 14 | var sabakanSetVariablesCmd = &cobra.Command{ 15 | Use: "set-variables FILE", 16 | Short: "set the query variables to search available machines in sabakan", 17 | Long: `Set the query variables to search available machines in sabakan. 18 | 19 | FILE should contain a JSON object like this: 20 | 21 | { 22 | "having": { 23 | "labels": [{"name": "foo", "value": "bar"}], 24 | "racks": [0, 1, 2], 25 | "roles": ["worker"], 26 | "states": ["HEALTHY"], 27 | "minDaysBeforeRetire": 90 28 | }, 29 | "notHaving": { 30 | } 31 | } 32 | `, 33 | 34 | Args: cobra.ExactArgs(1), 35 | RunE: func(cmd *cobra.Command, args []string) error { 36 | data, err := os.ReadFile(args[0]) 37 | if err != nil { 38 | return err 39 | } 40 | 41 | vars := new(sabakan.QueryVariables) 42 | err = json.Unmarshal(data, vars) 43 | if err != nil { 44 | return err 45 | } 46 | err = vars.IsValid() 47 | if err != nil { 48 | return err 49 | } 50 | 51 | well.Go(func(ctx context.Context) error { 52 | return storage.SetSabakanQueryVariables(ctx, string(data)) 53 | }) 54 | well.Stop() 55 | return well.Wait() 56 | }, 57 | } 58 | 59 | func init() { 60 | sabakanCmd.AddCommand(sabakanSetVariablesCmd) 61 | } 62 | -------------------------------------------------------------------------------- /op/k8s/scheduler_restart.go: -------------------------------------------------------------------------------- 1 | package k8s 2 | 3 | import ( 4 | "github.com/cybozu-go/cke" 5 | "github.com/cybozu-go/cke/op" 6 | "github.com/cybozu-go/cke/op/common" 7 | ) 8 | 9 | type schedulerRestartOp struct { 10 | nodes []*cke.Node 11 | 12 | cluster string 13 | params cke.SchedulerParams 14 | 15 | step int 16 | files *common.FilesBuilder 17 | } 18 | 19 | // SchedulerRestartOp returns an Operator to restart kube-scheduler 20 | func SchedulerRestartOp(nodes []*cke.Node, cluster string, params cke.SchedulerParams) cke.Operator { 21 | return &schedulerRestartOp{ 22 | nodes: nodes, 23 | cluster: cluster, 24 | params: params, 25 | files: common.NewFilesBuilder(nodes), 26 | } 27 | } 28 | 29 | func (o *schedulerRestartOp) Name() string { 30 | return "kube-scheduler-restart" 31 | } 32 | 33 | func (o *schedulerRestartOp) NextCommand() cke.Commander { 34 | switch o.step { 35 | case 0: 36 | o.step++ 37 | return common.ImagePullCommand(o.nodes, cke.KubernetesImage) 38 | case 1: 39 | o.step++ 40 | return prepareSchedulerFilesCommand{o.cluster, o.files, o.params} 41 | case 2: 42 | o.step++ 43 | return o.files 44 | case 3: 45 | o.step++ 46 | return common.RunContainerCommand(o.nodes, op.KubeSchedulerContainerName, cke.KubernetesImage, 47 | common.WithParams(SchedulerParams()), 48 | common.WithExtra(o.params.ServiceParams), 49 | common.WithRestart()) 50 | default: 51 | return nil 52 | } 53 | } 54 | 55 | func (o *schedulerRestartOp) Targets() []string { 56 | ips := make([]string, len(o.nodes)) 57 | for i, n := range o.nodes { 58 | ips[i] = n.Address 59 | } 60 | return ips 61 | } 62 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/auto_repair_set_variables.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "os" 7 | 8 | "github.com/cybozu-go/cke/sabakan" 9 | "github.com/cybozu-go/well" 10 | "github.com/spf13/cobra" 11 | ) 12 | 13 | // autoRepairSetVariablesCmd represents the "auto-repair set-variables" command 14 | var autoRepairSetVariablesCmd = &cobra.Command{ 15 | Use: "set-variables FILE", 16 | Short: "set the query variables to search non-healthy machines in sabakan", 17 | Long: `Set the query variables to search non-healthy machines in sabakan. 18 | 19 | FILE should contain a JSON object like this: 20 | 21 | { 22 | "having": { 23 | "labels": [{"name": "foo", "value": "bar"}], 24 | "racks": [0, 1, 2], 25 | "roles": ["worker"], 26 | "states": ["UNREACHABLE"], 27 | "minDaysBeforeRetire": 90 28 | }, 29 | "notHaving": { 30 | } 31 | } 32 | `, 33 | 34 | Args: cobra.ExactArgs(1), 35 | RunE: func(cmd *cobra.Command, args []string) error { 36 | data, err := os.ReadFile(args[0]) 37 | if err != nil { 38 | return err 39 | } 40 | 41 | vars := new(sabakan.QueryVariables) 42 | err = json.Unmarshal(data, vars) 43 | if err != nil { 44 | return err 45 | } 46 | err = vars.IsValid() 47 | if err != nil { 48 | return err 49 | } 50 | 51 | well.Go(func(ctx context.Context) error { 52 | return storage.SetAutoRepairQueryVariables(ctx, string(data)) 53 | }) 54 | well.Stop() 55 | return well.Wait() 56 | }, 57 | } 58 | 59 | func init() { 60 | autoRepairCmd.AddCommand(autoRepairSetVariablesCmd) 61 | } 62 | -------------------------------------------------------------------------------- /op/k8s/controller_manager_restart.go: -------------------------------------------------------------------------------- 1 | package k8s 2 | 3 | import ( 4 | "github.com/cybozu-go/cke" 5 | "github.com/cybozu-go/cke/op" 6 | "github.com/cybozu-go/cke/op/common" 7 | ) 8 | 9 | type controllerManagerRestartOp struct { 10 | nodes []*cke.Node 11 | 12 | cluster string 13 | serviceSubnet string 14 | params cke.ServiceParams 15 | 16 | pulled bool 17 | finished bool 18 | } 19 | 20 | // ControllerManagerRestartOp returns an Operator to restart kube-controller-manager 21 | func ControllerManagerRestartOp(nodes []*cke.Node, cluster, serviceSubnet string, params cke.ServiceParams) cke.Operator { 22 | return &controllerManagerRestartOp{ 23 | nodes: nodes, 24 | cluster: cluster, 25 | serviceSubnet: serviceSubnet, 26 | params: params, 27 | } 28 | } 29 | 30 | func (o *controllerManagerRestartOp) Name() string { 31 | return "kube-controller-manager-restart" 32 | } 33 | 34 | func (o *controllerManagerRestartOp) NextCommand() cke.Commander { 35 | if !o.pulled { 36 | o.pulled = true 37 | return common.ImagePullCommand(o.nodes, cke.KubernetesImage) 38 | } 39 | 40 | if !o.finished { 41 | o.finished = true 42 | return common.RunContainerCommand(o.nodes, op.KubeControllerManagerContainerName, cke.KubernetesImage, 43 | common.WithParams(ControllerManagerParams(o.cluster, o.serviceSubnet)), 44 | common.WithExtra(o.params), 45 | common.WithRestart()) 46 | } 47 | return nil 48 | } 49 | 50 | func (o *controllerManagerRestartOp) Targets() []string { 51 | ips := make([]string, len(o.nodes)) 52 | for i, n := range o.nodes { 53 | ips[i] = n.Address 54 | } 55 | return ips 56 | } 57 | -------------------------------------------------------------------------------- /tools/RELEASE.md: -------------------------------------------------------------------------------- 1 | Release procedure 2 | ================= 3 | 4 | This document describes how to release a new version of `cke-tools`. 5 | 6 | ## Versioning 7 | 8 | Given a version number MAJOR.MINOR.PATCH. 9 | The MAJOR and MINOR version matches that of Kubernetes. 10 | The patch version is increased with `cke-tools` update. 11 | 12 | ## Bump version 13 | 14 | 1. Determine a new version number. Then set `VERSION` variable. 15 | 16 | ```console 17 | # Set VERSION and confirm it. It should not have "v" prefix. 18 | $ VERSION=x.y.z 19 | $ echo $VERSION 20 | ``` 21 | 22 | 2. Make a branch to release 23 | 24 | ```console 25 | $ git checkout main 26 | $ git pull 27 | $ git checkout -b "bump-tools-$VERSION" 28 | ``` 29 | 30 | 3. Edit `CHANGELOG.md` in this directory. 31 | 4. Commit the change and create a pull request. 32 | 33 | ```console 34 | $ git commit -a -m "Bump cke-tools version to $VERSION" 35 | $ git push -u origin HEAD 36 | $ gh pr create -f 37 | ``` 38 | 39 | 5. Merge the pull request. 40 | 6. Add a git tag to the main HEAD, then push it. 41 | 42 | ```console 43 | # Set VERSION again. 44 | $ VERSION=x.y.z 45 | $ echo $VERSION 46 | 47 | $ git checkout main 48 | $ git pull 49 | $ git tag -a -m "Release tools-$VERSION" "tools-$VERSION" 50 | 51 | # Make sure the release tag exists. 52 | $ git tag -ln | grep "tools-$VERSION" 53 | 54 | $ git push origin "tools-$VERSION" 55 | ``` 56 | 57 | GitHub Actions will build and push the new image as `ghcr.io/cybozu-go/cke-tools:X.Y.Z`. 58 | 59 | [semver]: https://semver.org/spec/v2.0.0.html 60 | -------------------------------------------------------------------------------- /op/k8s/encryption.go: -------------------------------------------------------------------------------- 1 | package k8s 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "errors" 7 | 8 | "github.com/cybozu-go/cke" 9 | apiserverv1 "k8s.io/apiserver/pkg/apis/apiserver/v1" 10 | ) 11 | 12 | const ( 13 | encryptionConfigDir = "/etc/kubernetes/apiserver" 14 | encryptionConfigFile = encryptionConfigDir + "/encryption.yml" 15 | ) 16 | 17 | func getEncryptionSecret(ctx context.Context, inf cke.Infrastructure, key string) (string, error) { 18 | vc, err := inf.Vault() 19 | if err != nil { 20 | return "", err 21 | } 22 | 23 | secret, err := vc.Logical().Read(cke.K8sSecret) 24 | if err != nil { 25 | return "", err 26 | } 27 | if secret == nil { 28 | return "", errors.New("no encryption secrets for API server") 29 | } 30 | 31 | data, ok := secret.Data[key] 32 | if !ok { 33 | return "", errors.New("no secret data for " + key) 34 | } 35 | return data.(string), nil 36 | } 37 | 38 | func getEncryptionConfiguration(ctx context.Context, inf cke.Infrastructure) (*apiserverv1.EncryptionConfiguration, error) { 39 | data, err := getEncryptionSecret(ctx, inf, "aescbc") 40 | if err != nil { 41 | return nil, err 42 | } 43 | 44 | aescfg := new(apiserverv1.AESConfiguration) 45 | err = json.Unmarshal([]byte(data), aescfg) 46 | if err != nil { 47 | return nil, err 48 | } 49 | 50 | return &apiserverv1.EncryptionConfiguration{ 51 | Resources: []apiserverv1.ResourceConfiguration{ 52 | { 53 | Resources: []string{"secrets"}, 54 | Providers: []apiserverv1.ProviderConfiguration{ 55 | {AESCBC: aescfg}, 56 | {Identity: &apiserverv1.IdentityConfiguration{}}, 57 | }, 58 | }, 59 | }, 60 | }, nil 61 | } 62 | -------------------------------------------------------------------------------- /docs/cke.md: -------------------------------------------------------------------------------- 1 | cke command reference 2 | ===================== 3 | 4 | `cke` installs and maintains a Kubernetes cluster. 5 | It uses etcd as data storage and to elect a leader instance. 6 | 7 | Usage 8 | ----- 9 | 10 | `cke [OPTIONS]` 11 | 12 | ```console 13 | Usage of ./cke: 14 | --certs-gc-interval string tidy interval for expired certificates (default "1h") 15 | --config string configuration file path (default "/etc/cke/config.yml") 16 | --debug-sabakan debug sabakan integration 17 | --http string : (default "0.0.0.0:10180") 18 | --interval string check interval (default "1m") 19 | --logfile string Log filename 20 | --logformat string Log format [plain,logfmt,json] 21 | --loglevel string Log level [critical,error,warning,info,debug] 22 | --max-concurrent-updates int the maximum number of components that can be updated simultaneously (default 10) 23 | --session-ttl string leader session's TTL (default "60s") 24 | ``` 25 | 26 | Configuration file 27 | ------------------ 28 | 29 | CKE read etcd configurations from a YAML file. 30 | Parameters are defined by [cybozu-go/etcdutil](https://github.com/cybozu-go/etcdutil), and not shown below will use default values of the etcdutil. 31 | 32 | | Name | Type | Required | Description | 33 | | -------- | ------ | -------- | ------------------------------------------------ | 34 | | `prefix` | string | No | Key prefix of etcd objects. Default is `/cke/`. | 35 | -------------------------------------------------------------------------------- /main_test.go: -------------------------------------------------------------------------------- 1 | package cke 2 | 3 | import ( 4 | "os" 5 | "os/exec" 6 | "testing" 7 | 8 | "github.com/cybozu-go/etcdutil" 9 | "github.com/cybozu-go/log" 10 | clientv3 "go.etcd.io/etcd/client/v3" 11 | ) 12 | 13 | const ( 14 | etcdClientURL = "http://localhost:12379" 15 | etcdPeerURL = "http://localhost:12380" 16 | ) 17 | 18 | func testMain(m *testing.M) int { 19 | ci := os.Getenv("CI") == "true" 20 | if ci { 21 | code := m.Run() 22 | os.Exit(code) 23 | } 24 | 25 | etcdPath, err := os.MkdirTemp("", "cke-test") 26 | if err != nil { 27 | log.ErrorExit(err) 28 | } 29 | 30 | cmd := exec.Command("etcd", 31 | "--data-dir", etcdPath, 32 | "--initial-cluster", "default="+etcdPeerURL, 33 | "--listen-peer-urls", etcdPeerURL, 34 | "--initial-advertise-peer-urls", etcdPeerURL, 35 | "--listen-client-urls", etcdClientURL, 36 | "--advertise-client-urls", etcdClientURL) 37 | cmd.Stdout = os.Stdout 38 | cmd.Stderr = os.Stderr 39 | err = cmd.Start() 40 | if err != nil { 41 | log.ErrorExit(err) 42 | } 43 | defer func() { 44 | cmd.Process.Kill() 45 | cmd.Wait() 46 | os.RemoveAll(etcdPath) 47 | }() 48 | 49 | return m.Run() 50 | } 51 | 52 | func TestMain(m *testing.M) { 53 | os.Exit(testMain(m)) 54 | } 55 | 56 | func newEtcdClient(t *testing.T) *clientv3.Client { 57 | var clientURL string 58 | ci := os.Getenv("CI") == "true" 59 | if ci { 60 | clientURL = "http://localhost:2379" 61 | } else { 62 | clientURL = etcdClientURL 63 | } 64 | 65 | cfg := etcdutil.NewConfig(t.Name() + "/") 66 | cfg.Endpoints = []string{clientURL} 67 | 68 | etcd, err := etcdutil.NewClient(cfg) 69 | if err != nil { 70 | t.Fatal(err) 71 | } 72 | return etcd 73 | } 74 | -------------------------------------------------------------------------------- /op/nodedns/update_configmap.go: -------------------------------------------------------------------------------- 1 | package nodedns 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/cybozu-go/cke" 7 | corev1 "k8s.io/api/core/v1" 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | ) 10 | 11 | type updateConfigMapOp struct { 12 | apiserver *cke.Node 13 | configMap *corev1.ConfigMap 14 | finished bool 15 | } 16 | 17 | // UpdateConfigMapOp returns an Operator to update unbound as Node local resolver. 18 | func UpdateConfigMapOp(apiserver *cke.Node, configMap *corev1.ConfigMap) cke.Operator { 19 | return &updateConfigMapOp{ 20 | apiserver: apiserver, 21 | configMap: configMap, 22 | } 23 | } 24 | 25 | func (o *updateConfigMapOp) Name() string { 26 | return "update-node-dns-configmap" 27 | } 28 | 29 | func (o *updateConfigMapOp) NextCommand() cke.Commander { 30 | if o.finished { 31 | return nil 32 | } 33 | o.finished = true 34 | return updateConfigMapCommand{o.apiserver, o.configMap} 35 | } 36 | 37 | func (o *updateConfigMapOp) Targets() []string { 38 | return []string{ 39 | o.apiserver.Address, 40 | } 41 | } 42 | 43 | type updateConfigMapCommand struct { 44 | apiserver *cke.Node 45 | configMap *corev1.ConfigMap 46 | } 47 | 48 | func (c updateConfigMapCommand) Run(ctx context.Context, inf cke.Infrastructure, _ string) error { 49 | cs, err := inf.K8sClient(ctx, c.apiserver) 50 | if err != nil { 51 | return err 52 | } 53 | 54 | configs := cs.CoreV1().ConfigMaps("kube-system") 55 | _, err = configs.Update(ctx, c.configMap, metav1.UpdateOptions{}) 56 | return err 57 | } 58 | 59 | func (c updateConfigMapCommand) Command() cke.Command { 60 | return cke.Command{ 61 | Name: "updateConfigMapCommand", 62 | Target: "kube-system", 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /docs/user-resources.md: -------------------------------------------------------------------------------- 1 | User-defined resources 2 | ====================== 3 | 4 | CKE can automatically create or update user-defined resources on Kubernetes. 5 | This can be considered as `kubectl apply --server-side=true --field-manager=cke` automatically executed by CKE. 6 | 7 | ## Supported resources 8 | 9 | All the standard Kubernetes resources, including `CustomResourceDefinition`, are supported. 10 | 11 | Custom resources (not `CustomResourceDefinition`s) are not supported. 12 | 13 | ## Order of application 14 | 15 | The resources are applied in the following order according to their kind. 16 | 17 | - Namespace 18 | - ServiceAccount 19 | - CustomResourceDefinition 20 | - ClusterRole 21 | - ClusterRoleBinding 22 | - (Other cluster-scope resources) 23 | - Role 24 | - RoleBinding 25 | - NetworkPolicy 26 | - Secret 27 | - ConfigMap 28 | - (Other namespace-scoped resources) 29 | 30 | ## Annotations 31 | 32 | User-defined resources are automatically annotated as follows: 33 | 34 | - `cke.cybozu.com/revision`: The last applied revision of this resource. 35 | 36 | ### Annotations for admission webhooks 37 | 38 | By annotating ValidatingWebhookConfiguration or MutatingWebhookConfiguration 39 | with `cke.cybozu.com/inject-cacert=true`, CKE automatically fill it with CA 40 | certificates. 41 | 42 | By annotating Secret with `cke.cybozu.com/issue-cert=`, CKE 43 | automatically issues a new certificate for the named `Service` resource and 44 | sets the certificate and private key in Secret data. 45 | 46 | Read [k8s.md](k8s.md#certificates-for-admission-webhooks) for more details. 47 | 48 | ## Usage 49 | 50 | Use `ckecli resource` subcommand to set, list, or delete user-defined resources. 51 | -------------------------------------------------------------------------------- /server/integrator.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/cybozu-go/cke" 8 | "github.com/cybozu-go/well" 9 | ) 10 | 11 | // Integrator defines interface to integrate external addon into CKE server. 12 | type Integrator interface { 13 | // StartWatch starts watching etcd until the context is canceled. 14 | // 15 | // It should send an empty struct to the channel when some event occurs. 16 | // To avoid blocking, use select when sending. 17 | // 18 | // If the integrator does not implement StartWatch, simply return nil. 19 | StartWatch(context.Context, chan<- struct{}) error 20 | 21 | // Init is called just once when the server becomes a new leader. 22 | Init(ctx context.Context, leaderKey string) error 23 | 24 | // Do does something for CKE. leaderKey is an etcd object key that 25 | // exists as long as the current process is the leader. 26 | Do(ctx context.Context, leaderKey string, clusterStatus *cke.ClusterStatus) error 27 | } 28 | 29 | // RunIntegrator simply executes Integrator until ctx is canceled. 30 | // This is for debugging. 31 | func RunIntegrator(ctx context.Context, it Integrator) error { 32 | ch := make(chan struct{}, 1) 33 | env := well.NewEnvironment(ctx) 34 | 35 | env.Go(func(ctx context.Context) error { 36 | return it.StartWatch(ctx, ch) 37 | }) 38 | env.Go(func(ctx context.Context) error { 39 | for { 40 | select { 41 | case <-ctx.Done(): 42 | return nil 43 | case <-ch: 44 | case <-time.After(5 * time.Second): 45 | } 46 | 47 | err := it.Do(ctx, cke.KeySabakanTemplate, nil) 48 | if err != nil { 49 | return err 50 | } 51 | } 52 | }) 53 | env.Stop() 54 | 55 | return env.Wait() 56 | } 57 | -------------------------------------------------------------------------------- /op/clusterdns/update_config_map.go: -------------------------------------------------------------------------------- 1 | package clusterdns 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/cybozu-go/cke" 7 | corev1 "k8s.io/api/core/v1" 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | ) 10 | 11 | type updateConfigMapOp struct { 12 | apiserver *cke.Node 13 | configmap *corev1.ConfigMap 14 | finished bool 15 | } 16 | 17 | // UpdateConfigMapOp returns an Operator to update ConfigMap for CoreDNS. 18 | func UpdateConfigMapOp(apiserver *cke.Node, configmap *corev1.ConfigMap) cke.Operator { 19 | return &updateConfigMapOp{ 20 | apiserver: apiserver, 21 | configmap: configmap, 22 | } 23 | } 24 | 25 | func (o *updateConfigMapOp) Name() string { 26 | return "update-cluster-dns-configmap" 27 | } 28 | 29 | func (o *updateConfigMapOp) NextCommand() cke.Commander { 30 | if o.finished { 31 | return nil 32 | } 33 | o.finished = true 34 | return updateConfigMapCommand{o.apiserver, o.configmap} 35 | } 36 | 37 | func (o *updateConfigMapOp) Targets() []string { 38 | return []string{ 39 | o.apiserver.Address, 40 | } 41 | } 42 | 43 | type updateConfigMapCommand struct { 44 | apiserver *cke.Node 45 | configmap *corev1.ConfigMap 46 | } 47 | 48 | func (c updateConfigMapCommand) Run(ctx context.Context, inf cke.Infrastructure, _ string) error { 49 | cs, err := inf.K8sClient(ctx, c.apiserver) 50 | if err != nil { 51 | return err 52 | } 53 | 54 | // ConfigMap 55 | configs := cs.CoreV1().ConfigMaps("kube-system") 56 | _, err = configs.Update(ctx, c.configmap, metav1.UpdateOptions{}) 57 | return err 58 | } 59 | 60 | func (c updateConfigMapCommand) Command() cke.Command { 61 | return cke.Command{ 62 | Name: "updateConfigMapCommand", 63 | Target: "kube-system", 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /op/etcd/start.go: -------------------------------------------------------------------------------- 1 | package etcd 2 | 3 | import ( 4 | "github.com/cybozu-go/cke" 5 | "github.com/cybozu-go/cke/op" 6 | "github.com/cybozu-go/cke/op/common" 7 | ) 8 | 9 | type etcdStartOp struct { 10 | nodes []*cke.Node 11 | params cke.EtcdParams 12 | step int 13 | files *common.FilesBuilder 14 | } 15 | 16 | // StartOp returns an Operator to start etcd containers. 17 | func StartOp(nodes []*cke.Node, params cke.EtcdParams) cke.Operator { 18 | return &etcdStartOp{ 19 | nodes: nodes, 20 | params: params, 21 | files: common.NewFilesBuilder(nodes), 22 | } 23 | } 24 | 25 | func (o *etcdStartOp) Name() string { 26 | return "etcd-start" 27 | } 28 | 29 | func (o *etcdStartOp) NextCommand() cke.Commander { 30 | switch o.step { 31 | case 0: 32 | o.step++ 33 | return prepareEtcdCertificatesCommand{o.files} 34 | case 1: 35 | o.step++ 36 | return o.files 37 | case 2: 38 | o.step++ 39 | opts := []string{ 40 | "--mount", 41 | "type=volume,src=" + op.EtcdVolumeName(o.params) + ",dst=/var/lib/etcd", 42 | } 43 | paramsMap := make(map[string]cke.ServiceParams) 44 | for _, n := range o.nodes { 45 | paramsMap[n.Address] = BuiltInParams(n, nil, "") 46 | } 47 | return common.RunContainerCommand(o.nodes, op.EtcdContainerName, cke.EtcdImage, 48 | common.WithOpts(opts), 49 | common.WithParamsMap(paramsMap), 50 | common.WithExtra(o.params.ServiceParams)) 51 | case 3: 52 | o.step++ 53 | return waitEtcdSyncCommand{etcdEndpoints(o.nodes), false} 54 | default: 55 | return nil 56 | } 57 | } 58 | 59 | func (o *etcdStartOp) Targets() []string { 60 | ips := make([]string, len(o.nodes)) 61 | for i, n := range o.nodes { 62 | ips[i] = n.Address 63 | } 64 | return ips 65 | } 66 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/vault_ssh_privkey.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "io" 5 | "os" 6 | 7 | "github.com/cybozu-go/cke" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var vaultSSHPrivKeyHost string 12 | 13 | // vaultSSHPrivKeyCmd represents the "vault ssh-privkey" command 14 | var vaultSSHPrivKeyCmd = &cobra.Command{ 15 | Use: "ssh-privkey FILE|-", 16 | Short: "store SSH private key into Vault", 17 | Long: `Store SSH private key for a host into Vault. 18 | 19 | If --host is not specified, the key will be used as the default key. 20 | 21 | FILE should be a SSH private key file. 22 | If FILE is -, the contents are read from stdin.`, 23 | 24 | Args: cobra.ExactArgs(1), 25 | RunE: func(cmd *cobra.Command, args []string) error { 26 | f := os.Stdin 27 | if args[0] != "-" { 28 | var err error 29 | f, err = os.Open(args[0]) 30 | if err != nil { 31 | return err 32 | } 33 | defer f.Close() 34 | } 35 | 36 | data, err := io.ReadAll(f) 37 | if err != nil { 38 | return err 39 | } 40 | 41 | vc, err := inf.Vault() 42 | if err != nil { 43 | return err 44 | } 45 | 46 | secret, err := vc.Logical().Read(cke.SSHSecret) 47 | if err != nil { 48 | return err 49 | } 50 | 51 | var privkeys map[string]interface{} 52 | if secret != nil && secret.Data != nil { 53 | privkeys = secret.Data 54 | } else { 55 | privkeys = make(map[string]interface{}) 56 | } 57 | privkeys[vaultSSHPrivKeyHost] = string(data) 58 | 59 | _, err = vc.Logical().Write(cke.SSHSecret, privkeys) 60 | return err 61 | }, 62 | } 63 | 64 | func init() { 65 | vaultSSHPrivKeyCmd.Flags().StringVar(&vaultSSHPrivKeyHost, "host", "", "target host of SSH key") 66 | vaultCmd.AddCommand(vaultSSHPrivKeyCmd) 67 | } 68 | -------------------------------------------------------------------------------- /phase.go: -------------------------------------------------------------------------------- 1 | package cke 2 | 3 | import "time" 4 | 5 | // OperationPhase represents the processing status of CKE server. 6 | type OperationPhase string 7 | 8 | // Processing statuses of CKE server. 9 | const ( 10 | PhaseUpgradeAborted = OperationPhase("upgrade-aborted") 11 | PhaseUpgrade = OperationPhase("upgrade") 12 | PhaseRivers = OperationPhase("rivers") 13 | PhaseEtcdBootAborted = OperationPhase("etcd-boot-aborted") 14 | PhaseEtcdBoot = OperationPhase("etcd-boot") 15 | PhaseEtcdStart = OperationPhase("etcd-start") 16 | PhaseEtcdWait = OperationPhase("etcd-wait") 17 | PhaseK8sStart = OperationPhase("k8s-start") 18 | PhaseEtcdMaintain = OperationPhase("etcd-maintain") 19 | PhaseK8sMaintain = OperationPhase("k8s-maintain") 20 | PhaseStopCP = OperationPhase("stop-control-plane") 21 | PhaseRepairMachines = OperationPhase("repair-machines") 22 | PhaseUncordonNodes = OperationPhase("uncordon-nodes") 23 | PhaseRebootNodes = OperationPhase("reboot-nodes") 24 | PhaseCompleted = OperationPhase("completed") 25 | ) 26 | 27 | // AllOperationPhases contains all kinds of OperationPhases. 28 | var AllOperationPhases = []OperationPhase{ 29 | PhaseUpgradeAborted, 30 | PhaseUpgrade, 31 | PhaseRivers, 32 | PhaseEtcdBootAborted, 33 | PhaseEtcdBoot, 34 | PhaseEtcdStart, 35 | PhaseEtcdWait, 36 | PhaseK8sStart, 37 | PhaseEtcdMaintain, 38 | PhaseK8sMaintain, 39 | PhaseStopCP, 40 | PhaseRepairMachines, 41 | PhaseUncordonNodes, 42 | PhaseRebootNodes, 43 | PhaseCompleted, 44 | } 45 | 46 | // ServerStatus represents the current server status. 47 | type ServerStatus struct { 48 | Phase OperationPhase `json:"phase"` 49 | Timestamp time.Time `json:"timestamp"` 50 | } 51 | -------------------------------------------------------------------------------- /op/kube_node_update.go: -------------------------------------------------------------------------------- 1 | package op 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/cybozu-go/cke" 7 | corev1 "k8s.io/api/core/v1" 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | ) 10 | 11 | type kubeNodeUpdate struct { 12 | apiserver *cke.Node 13 | nodes []*corev1.Node 14 | done bool 15 | } 16 | 17 | // KubeNodeUpdateOp updates k8s Node resources. 18 | func KubeNodeUpdateOp(apiserver *cke.Node, nodes []*corev1.Node) cke.Operator { 19 | return &kubeNodeUpdate{apiserver: apiserver, nodes: nodes} 20 | } 21 | 22 | func (o *kubeNodeUpdate) Name() string { 23 | return "update-node" 24 | } 25 | 26 | func (o *kubeNodeUpdate) NextCommand() cke.Commander { 27 | if o.done { 28 | return nil 29 | } 30 | 31 | o.done = true 32 | return nodeUpdateCommand{o.apiserver, o.nodes} 33 | } 34 | 35 | func (o *kubeNodeUpdate) Targets() []string { 36 | ips := make([]string, len(o.nodes)) 37 | for i, n := range o.nodes { 38 | ips[i] = n.Name 39 | } 40 | return ips 41 | } 42 | 43 | type nodeUpdateCommand struct { 44 | apiserver *cke.Node 45 | nodes []*corev1.Node 46 | } 47 | 48 | func (c nodeUpdateCommand) Run(ctx context.Context, inf cke.Infrastructure, _ string) error { 49 | cs, err := inf.K8sClient(ctx, c.apiserver) 50 | if err != nil { 51 | return err 52 | } 53 | 54 | nodesAPI := cs.CoreV1().Nodes() 55 | for _, n := range c.nodes { 56 | _, err := nodesAPI.Update(ctx, n, metav1.UpdateOptions{}) 57 | if err != nil { 58 | return err 59 | } 60 | } 61 | 62 | return nil 63 | } 64 | 65 | func (c nodeUpdateCommand) Command() cke.Command { 66 | names := make([]string, len(c.nodes)) 67 | for i, n := range c.nodes { 68 | names[i] = n.Name 69 | } 70 | return cke.Command{ 71 | Name: "updateNode", 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /mtest/mtest_key: -------------------------------------------------------------------------------- 1 | -----BEGIN RSA PRIVATE KEY----- 2 | MIIEowIBAAKCAQEAqGDTa8Fw0qWuQ/7xvsTdFfP0oqgpXUxl8FpOFWpJWTUM9ZhJ 3 | NidBfglbnAAQ14/7JfvTUHAQgQ7X0ih8IJ2Z6VDGGZ9kr1N42iN5/17yMCZwgGgR 4 | bwtvj+/X4lFUa0zVHe/dnLxkBQyV2hOvlLO/E7fUeVy70Mt/1NdlZtI15l1JX/+p 5 | JJVSW+YaL7QF4X4aYad+PSQU5jRQB5KMfhTSayuAMDJc/FCYcArqhHvC/eh6lbyu 6 | Qy3WokI1p+XaRb1giybfFW35ymfVT3EsGBZjkpsgZGCMfnQ5H+xr9JiItvLE7yAd 7 | nBZevx5DyKmrC95tvjKiDGSULd8j4IiAvlsALwIDAQABAoIBACQJJPZo3gaXIua2 8 | h3J2m4J5RaASMVggY6i/CvsWVkBbVDyzrOeEG0YoJo0KjpAz5mJItP8AHOgiDxqR 9 | Q4+Pa0M94EfXjyreyHyXHyMCZP7dGzLAEwsa/XNmt2NeWJzmQq43icxjnVxfRyr3 10 | D5rZpUlJDJY0vJWBGAirWK5ayuJUN9SFfsJWqEk4CDNQvONWNK1gvxazbppdCu93 11 | FuuQvNkutosx8tmyl9eCev6sIugB6pp/YRf57JLRKJ0BwG7qn3gRNpyQOhGrF1MX 12 | +0I9Ldi42OluLKP1X7n6MOux7Alxh5KuIq28d4mrE0iKUGU3yBt9R61UUGgynWc/ 13 | 98QUQ/ECgYEA11Oj2fizzNnvEWn8nO1apYohtG+fjga8Tt472EpDjwwvhFVAX59f 14 | 2VoTJZct/oCkgffeut+bTB9FIYMRPoO1OH7Vd5lqsa+GCO+vTDM2mezFdfItxPoe 15 | 8h8u4brBy+x0aPyiNLEuYIjUh0ymUoviFGB4jP/J2QNzJvhM1nu12BsCgYEAyC7w 16 | nHiMmkfPEslG1DyKsD2rmPiVHLldjVzYSOgBcL8bPGU2SYQedRdQBpzK6OF9TqXv 17 | QsvO6HVgq8bmZVr2e0zhZhCak+NyxczObOdP2i+M2QUIXGBXG7ivCBexSiUH0DUd 18 | xV2LEWkXA+3WuJ9gKY9GBBBdTOD+jqssiLZvIX0CgYEAtlHgo9g8TZCeJy2Jskoa 19 | /Z2nCkOVYsl7OoBbRbkj2QRlW3RfzFeC7eOh4KtQS3UbVdzN34cj1GGJxGVY/YjB 20 | sfNaxijFuWu4XuqrkCaw7cYYL9T+QhHSkAotRP4/x24P5zE6GsmHTj+tTF5vWeeN 21 | ZtmEWUbf3vtXzkBhtx4Ki88CgYAaliFepqQF2YOm+xRtG51PyuD/cARdzECghbQz 22 | +pw2XStA2jBbkzB4XKBEQI6yX0BFMcSVGnxgYzZzmfb/fxU9SviklY/yFEMqAglo 23 | bVAtqiMKr6BspF7tT5nveTYSothmzqclj0bpCQwFeZEK9B/RZTXnVEUP8NHeIN3J 24 | SnF4AQKBgCXupLs3AqbEWg2iUs+Eqeru0rEWopuTUiLJOvoT6X5NQlUIlpv5Ye+Z 25 | tsChz55NjCxNEpn4NvGyeGgJrBEGwAPbx/X2v2BWFxWPNWh6byHi9ZxELa0Utlc8 26 | B29lX8k9dqD0HitCL6ibsw0DqsU6FC3fd179rH8Bik83FuukuxvD 27 | -----END RSA PRIVATE KEY----- 28 | -------------------------------------------------------------------------------- /op/etcd/restart.go: -------------------------------------------------------------------------------- 1 | package etcd 2 | 3 | import ( 4 | "github.com/cybozu-go/cke" 5 | "github.com/cybozu-go/cke/op" 6 | "github.com/cybozu-go/cke/op/common" 7 | ) 8 | 9 | type etcdRestartOp struct { 10 | cpNodes []*cke.Node 11 | target *cke.Node 12 | params cke.EtcdParams 13 | step int 14 | } 15 | 16 | // RestartOp returns an Operator to restart an etcd member. 17 | func RestartOp(cpNodes []*cke.Node, target *cke.Node, params cke.EtcdParams) cke.Operator { 18 | return &etcdRestartOp{ 19 | cpNodes: cpNodes, 20 | target: target, 21 | params: params, 22 | } 23 | } 24 | 25 | func (o *etcdRestartOp) Name() string { 26 | return "etcd-restart" 27 | } 28 | 29 | func (o *etcdRestartOp) NextCommand() cke.Commander { 30 | switch o.step { 31 | case 0: 32 | o.step++ 33 | return waitEtcdSyncCommand{etcdEndpoints(o.cpNodes), true} 34 | case 1: 35 | o.step++ 36 | return common.ImagePullCommand([]*cke.Node{o.target}, cke.EtcdImage) 37 | case 2: 38 | o.step++ 39 | return common.StopContainerCommand(o.target, op.EtcdContainerName) 40 | case 3: 41 | o.step++ 42 | opts := []string{ 43 | "--mount", 44 | "type=volume,src=" + op.EtcdVolumeName(o.params) + ",dst=/var/lib/etcd", 45 | } 46 | var initialCluster []string 47 | for _, n := range o.cpNodes { 48 | initialCluster = append(initialCluster, n.Address+"=https://"+n.Address+":2380") 49 | } 50 | return common.RunContainerCommand([]*cke.Node{o.target}, op.EtcdContainerName, cke.EtcdImage, 51 | common.WithOpts(opts), 52 | common.WithParams(BuiltInParams(o.target, initialCluster, "new")), 53 | common.WithExtra(o.params.ServiceParams)) 54 | } 55 | return nil 56 | } 57 | 58 | func (o *etcdRestartOp) Targets() []string { 59 | return []string{ 60 | o.target.Address, 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /tools/rivers/upstream_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net" 5 | "testing" 6 | ) 7 | 8 | func TestUpstream(t *testing.T) { 9 | var upstream Upstream 10 | 11 | if upstream.IsHealthy() { 12 | t.Errorf("new upstream should be unhealthy\n") 13 | } 14 | 15 | upstream.SetHealthy(true) 16 | if !upstream.IsHealthy() { 17 | t.Errorf("upstream should become healthy by SetHealthy(true)\n") 18 | } 19 | 20 | upstream.SetHealthy(false) 21 | if upstream.IsHealthy() { 22 | t.Errorf("upstream should become unhealthy by SetHealthy(false)\n") 23 | } 24 | 25 | conn1, conn2 := net.Pipe() 26 | defer conn1.Close() 27 | defer conn2.Close() 28 | called1 := 0 29 | called2 := 0 30 | 31 | cancelFunc := func(x *int) func() { 32 | return func() { 33 | *x++ 34 | } 35 | } 36 | 37 | upstream.AddConn(conn1, cancelFunc(&called1)) 38 | upstream.SetHealthy(false) 39 | if called1 != 1 { 40 | t.Errorf("a cancel function should be called by SetHealthy(false): called1=%d\n", called1) 41 | } 42 | upstream.SetHealthy(false) 43 | if called1 != 1 { 44 | t.Errorf("all connections are removed by SetHealthy(false): called1=%d\n", called1) 45 | } 46 | 47 | upstream.AddConn(conn1, cancelFunc(&called1)) 48 | upstream.AddConn(conn2, cancelFunc(&called2)) 49 | upstream.SetHealthy(false) 50 | if called1 != 2 || called2 != 1 { 51 | t.Errorf("all cancel functions should be called by SetHealthy(false): called1=%d called2=%d\n", called1, called2) 52 | } 53 | 54 | upstream.AddConn(conn1, cancelFunc(&called1)) 55 | upstream.AddConn(conn2, cancelFunc(&called2)) 56 | upstream.RemoveConn(conn1) 57 | upstream.SetHealthy(false) 58 | if called1 != 2 || called2 != 2 { 59 | t.Errorf("the cancel function for removed conn should not be called by setHealthy(false): called1=%d called2=%d\n", called1, called2) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /bin/run-mtest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -ex 2 | 3 | SUITE=$1 4 | CLUSTER=$2 5 | 6 | . $(dirname $0)/env 7 | 8 | # Create GCE instance 9 | $GCLOUD compute instances delete ${INSTANCE_NAME} --zone ${ZONE} || true 10 | $GCLOUD compute instances create ${INSTANCE_NAME} \ 11 | --zone ${ZONE} \ 12 | --machine-type ${MACHINE_TYPE} \ 13 | --image vmx-enabled \ 14 | --boot-disk-type ${DISK_TYPE} \ 15 | --boot-disk-size ${BOOT_DISK_SIZE} \ 16 | --local-ssd interface=nvme \ 17 | --local-ssd interface=nvme \ 18 | --local-ssd interface=nvme \ 19 | --local-ssd interface=nvme 20 | 21 | # Run multi-host test 22 | for i in $(seq 300); do 23 | if $GCLOUD compute ssh --zone=${ZONE} cybozu@${INSTANCE_NAME} --command=date 2>/dev/null; then 24 | break 25 | fi 26 | sleep 1 27 | done 28 | 29 | cat >run.sh </suite_test.go`. 26 | 27 | Synopsis 28 | -------- 29 | 30 | [`Makefile`](../mtest/Makefile) setup virtual machine environment and runs mtest. 31 | 32 | * `make setup` 33 | 34 | Install mtest required components. 35 | 36 | * `make clean` 37 | 38 | Delete generated files in `output/` directory. 39 | 40 | * `make placemat` 41 | 42 | Run `placemat` in background by systemd-run to start virtual machines. 43 | 44 | * `make stop` 45 | 46 | Stop `placemat`. 47 | 48 | * `make test` 49 | 50 | Run mtest on a running `placemat`. 51 | 52 | * `make bootstrap` 53 | 54 | Create the kubernetes cluster on a running `placemat` using a part of `functions` suite. 55 | 56 | Options 57 | ------- 58 | 59 | ### `SUITE` 60 | 61 | You can choose the type of test suite by specifying `SUITE` make variable. 62 | The value can be `functions` (default), `operators`, or `robustness`. 63 | 64 | `make test` accepts this variable. 65 | 66 | The value of `SUITE` is interpreted as a Go package name. You can write 67 | a new test suite and specify its package name by `SUITE`. As a side note, 68 | the forms of `./functions`, `./operators`, and `./robustness` are more proper. 69 | -------------------------------------------------------------------------------- /metrics/updater.go: -------------------------------------------------------------------------------- 1 | package metrics 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/cybozu-go/cke" 8 | ) 9 | 10 | func alwaysAvailable(_ context.Context, _ storage) (bool, error) { 11 | return true, nil 12 | } 13 | 14 | var isLeader bool 15 | 16 | // UpdateLeader updates "leader". 17 | func UpdateLeader(flag bool) { 18 | if flag { 19 | leader.Set(1) 20 | } else { 21 | leader.Set(0) 22 | } 23 | isLeader = flag 24 | } 25 | 26 | // UpdateOperationPhase updates "operation_phase" and its timestamp. 27 | func UpdateOperationPhase(phase cke.OperationPhase, ts time.Time) { 28 | for _, labelPhase := range cke.AllOperationPhases { 29 | if labelPhase == phase { 30 | operationPhase.WithLabelValues(string(labelPhase)).Set(1) 31 | } else { 32 | operationPhase.WithLabelValues(string(labelPhase)).Set(0) 33 | } 34 | } 35 | operationPhaseTimestampSeconds.Set(float64(ts.Unix())) 36 | } 37 | 38 | func isOperationPhaseAvailable(_ context.Context, _ storage) (bool, error) { 39 | return isLeader, nil 40 | } 41 | 42 | func isNodeAvailable(_ context.Context, _ storage) (bool, error) { 43 | return isLeader, nil 44 | } 45 | 46 | // UpdateSabakanIntegration updates Sabakan integration metrics. 47 | func UpdateSabakanIntegration(isSuccessful bool, workersByRole map[string]int, unusedMachines int, ts time.Time) { 48 | sabakanIntegrationTimestampSeconds.Set(float64(ts.Unix())) 49 | if !isSuccessful { 50 | sabakanIntegrationSuccessful.Set(0) 51 | return 52 | } 53 | 54 | sabakanIntegrationSuccessful.Set(1) 55 | for role, num := range workersByRole { 56 | sabakanWorkers.WithLabelValues(role).Set(float64(num)) 57 | } 58 | sabakanUnusedMachines.Set(float64(unusedMachines)) 59 | } 60 | 61 | func isSabakanIntegrationAvailable(ctx context.Context, st storage) (bool, error) { 62 | if !isLeader { 63 | return false, nil 64 | } 65 | 66 | disabled, err := st.IsSabakanDisabled(ctx) 67 | if err != nil { 68 | return false, err 69 | } 70 | return !disabled, nil 71 | } 72 | -------------------------------------------------------------------------------- /sabakan/mock/schema.graphql: -------------------------------------------------------------------------------- 1 | type Query { 2 | machine(serial: ID!): Machine! 3 | searchMachines(having: MachineParams, notHaving: MachineParams): [Machine!]! 4 | } 5 | 6 | """ 7 | MachineParams is a set of input parameters to search machines. 8 | """ 9 | input MachineParams { 10 | labels: [LabelInput!] = null 11 | racks: [Int!] = null 12 | roles: [String!] = null 13 | states: [MachineState!] = null 14 | minDaysBeforeRetire: Int = null 15 | } 16 | 17 | """ 18 | LabelInput represents a label to search machines. 19 | """ 20 | input LabelInput { 21 | name: String! 22 | value: String! 23 | } 24 | 25 | """ 26 | Machine represents a physical server in a datacenter rack. 27 | """ 28 | type Machine { 29 | spec: MachineSpec! 30 | status: MachineStatus! 31 | } 32 | 33 | """ 34 | MachineSpec represents specifications of a machine. 35 | """ 36 | type MachineSpec { 37 | serial: ID! 38 | labels: [Label!] 39 | rack: Int! 40 | indexInRack: Int! 41 | role: String! 42 | ipv4: [IPAddress!]! 43 | registerDate: DateTime! 44 | retireDate: DateTime! 45 | bmc: BMC! 46 | } 47 | 48 | """ 49 | Label represents an arbitrary key-value pairs. 50 | """ 51 | type Label { 52 | name: String! 53 | value: String! 54 | } 55 | 56 | """ 57 | IPAddress represents an IPv4 or IPv6 address. 58 | """ 59 | scalar IPAddress 60 | 61 | """ 62 | DateTime represents a date and time value. 63 | """ 64 | scalar DateTime 65 | 66 | """ 67 | BMC represents a Baseboard Management Controller. 68 | """ 69 | type BMC { 70 | bmcType: String! 71 | ipv4: IPAddress! 72 | } 73 | 74 | """ 75 | MachineStatus represents status of a Machine. 76 | """ 77 | type MachineStatus { 78 | state: MachineState! 79 | timestamp: DateTime! 80 | duration: Float! 81 | } 82 | 83 | """ 84 | MachineState enumerates machine states. 85 | """ 86 | enum MachineState { 87 | UNINITIALIZED 88 | HEALTHY 89 | UNHEALTHY 90 | UNREACHABLE 91 | UPDATING 92 | RETIRING 93 | RETIRED 94 | } 95 | -------------------------------------------------------------------------------- /op/rivers_boot.go: -------------------------------------------------------------------------------- 1 | package op 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/cybozu-go/cke" 8 | "github.com/cybozu-go/cke/op/common" 9 | ) 10 | 11 | type riversBootOp struct { 12 | nodes []*cke.Node 13 | upstreams []*cke.Node 14 | params cke.ServiceParams 15 | step int 16 | name string 17 | upstreamPort int 18 | listenPort int 19 | } 20 | 21 | // RiversBootOp returns an Operator to bootstrap rivers. 22 | func RiversBootOp(nodes, upstreams []*cke.Node, params cke.ServiceParams, name string, upstreamPort, listenPort int) cke.Operator { 23 | return &riversBootOp{ 24 | nodes: nodes, 25 | upstreams: upstreams, 26 | params: params, 27 | name: name, 28 | upstreamPort: upstreamPort, 29 | listenPort: listenPort, 30 | } 31 | } 32 | 33 | func (o *riversBootOp) Name() string { 34 | return o.name + "-bootstrap" 35 | } 36 | 37 | func (o *riversBootOp) NextCommand() cke.Commander { 38 | switch o.step { 39 | case 0: 40 | o.step++ 41 | return common.ImagePullCommand(o.nodes, cke.ToolsImage) 42 | case 1: 43 | o.step++ 44 | return common.RunContainerCommand(o.nodes, o.name, cke.ToolsImage, 45 | common.WithParams(RiversParams(o.upstreams, o.upstreamPort, o.listenPort)), 46 | common.WithExtra(o.params)) 47 | default: 48 | return nil 49 | } 50 | } 51 | 52 | // RiversParams returns parameters for rivers. 53 | func RiversParams(upstreams []*cke.Node, upstreamPort, listenPort int) cke.ServiceParams { 54 | var ups []string 55 | for _, n := range upstreams { 56 | ups = append(ups, fmt.Sprintf("%s:%d", n.Address, upstreamPort)) 57 | } 58 | args := []string{ 59 | "rivers", 60 | "--upstreams=" + strings.Join(ups, ","), 61 | "--listen=" + fmt.Sprintf("127.0.0.1:%d", listenPort), 62 | } 63 | return cke.ServiceParams{ExtraArguments: args} 64 | } 65 | 66 | func (o *riversBootOp) Targets() []string { 67 | ips := make([]string, len(o.nodes)) 68 | for i, n := range o.nodes { 69 | ips[i] = n.Address 70 | } 71 | return ips 72 | } 73 | -------------------------------------------------------------------------------- /op/kube_wait.go: -------------------------------------------------------------------------------- 1 | package op 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/cybozu-go/cke" 8 | "github.com/cybozu-go/log" 9 | "k8s.io/apimachinery/pkg/api/errors" 10 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 11 | ) 12 | 13 | type kubeWaitOp struct { 14 | apiserver *cke.Node 15 | finished bool 16 | } 17 | 18 | // KubeWaitOp returns an Operator to wait for Kubernetes resources gets initialized 19 | func KubeWaitOp(apiserver *cke.Node) cke.Operator { 20 | return &kubeWaitOp{apiserver: apiserver} 21 | } 22 | 23 | func (o *kubeWaitOp) Name() string { 24 | return "wait-kubernetes" 25 | } 26 | 27 | func (o *kubeWaitOp) NextCommand() cke.Commander { 28 | if o.finished { 29 | return nil 30 | } 31 | 32 | o.finished = true 33 | return waitKubeCommand{o.apiserver} 34 | } 35 | 36 | func (o *kubeWaitOp) Targets() []string { 37 | return []string{ 38 | o.apiserver.Address, 39 | } 40 | } 41 | 42 | type waitKubeCommand struct { 43 | apiserver *cke.Node 44 | } 45 | 46 | func (c waitKubeCommand) Run(ctx context.Context, inf cke.Infrastructure, _ string) error { 47 | cs, err := inf.K8sClient(ctx, c.apiserver) 48 | if err != nil { 49 | return err 50 | } 51 | 52 | begin := time.Now() 53 | for i := 0; i < 100; i++ { 54 | _, err = cs.CoreV1().ServiceAccounts("kube-system").Get(ctx, "default", metav1.GetOptions{}) 55 | switch { 56 | case err == nil: 57 | elapsed := time.Since(begin) 58 | log.Info("k8s gets initialized", map[string]interface{}{ 59 | "elapsed": elapsed.Seconds(), 60 | }) 61 | return nil 62 | 63 | case errors.IsNotFound(err): 64 | select { 65 | case <-time.After(time.Second): 66 | case <-ctx.Done(): 67 | } 68 | 69 | default: 70 | return err 71 | } 72 | } 73 | 74 | // Timed-out here is not an error because waitKubeCommand will be invoked 75 | // again by the controller. 76 | return nil 77 | } 78 | 79 | func (c waitKubeCommand) Command() cke.Command { 80 | return cke.Command{ 81 | Name: "waitKubeCommand", 82 | Target: "kube-system sa/default", 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /op/upgrade.go: -------------------------------------------------------------------------------- 1 | package op 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/cybozu-go/cke" 7 | "github.com/cybozu-go/well" 8 | ) 9 | 10 | type upgradeOp struct { 11 | current string 12 | nodes []*cke.Node 13 | } 14 | 15 | // UpgradeOp returns an Operator to upgrade cluster configuration. 16 | func UpgradeOp(current string, nodes []*cke.Node) cke.Operator { 17 | return &upgradeOp{current: current, nodes: nodes} 18 | } 19 | 20 | func (u *upgradeOp) Name() string { 21 | return "upgrade" 22 | } 23 | 24 | func (u *upgradeOp) NextCommand() cke.Commander { 25 | switch u.current { 26 | case "1": 27 | u.current = "2" 28 | return UpgradeToVersion2Command(u.nodes) 29 | default: 30 | return nil 31 | } 32 | } 33 | 34 | func (u *upgradeOp) Targets() []string { 35 | targets := make([]string, len(u.nodes)) 36 | for i, n := range u.nodes { 37 | targets[i] = n.Address 38 | } 39 | return targets 40 | } 41 | 42 | type upgradeToVersion2Command struct { 43 | nodes []*cke.Node 44 | } 45 | 46 | // UpgradeToVersion2Command returns a Commander to upgrade from version 1 to 2. 47 | func UpgradeToVersion2Command(nodes []*cke.Node) cke.Commander { 48 | return upgradeToVersion2Command{nodes} 49 | } 50 | 51 | func (u upgradeToVersion2Command) Run(ctx context.Context, inf cke.Infrastructure, leaderKey string) error { 52 | env := well.NewEnvironment(ctx) 53 | for _, n := range u.nodes { 54 | ce := inf.Engine(n.Address) 55 | env.Go(func(ctx context.Context) error { 56 | exists, err := ce.VolumeExists(EtcdAddedMemberVolumeName) 57 | if err != nil { 58 | return err 59 | } 60 | if !exists { 61 | return ce.VolumeCreate(EtcdAddedMemberVolumeName) 62 | } 63 | return nil 64 | }) 65 | } 66 | env.Stop() 67 | if err := env.Wait(); err != nil { 68 | return err 69 | } 70 | 71 | return inf.Storage().PutConfigVersion(ctx, leaderKey) 72 | } 73 | 74 | func (u upgradeToVersion2Command) Command() cke.Command { 75 | return cke.Command{ 76 | Name: "upgrade-version-from-1-to-2", 77 | Target: EtcdAddedMemberVolumeName, 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /op/repair_drain_timeout.go: -------------------------------------------------------------------------------- 1 | package op 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "math/rand" 7 | "time" 8 | 9 | "github.com/cybozu-go/cke" 10 | "github.com/cybozu-go/log" 11 | ) 12 | 13 | type repairDrainTimeoutOp struct { 14 | finished bool 15 | 16 | entry *cke.RepairQueueEntry 17 | } 18 | 19 | func RepairDrainTimeoutOp(entry *cke.RepairQueueEntry) cke.Operator { 20 | return &repairDrainTimeoutOp{ 21 | entry: entry, 22 | } 23 | } 24 | 25 | func (o *repairDrainTimeoutOp) Name() string { 26 | return "repair-drain-timeout" 27 | } 28 | 29 | func (o *repairDrainTimeoutOp) NextCommand() cke.Commander { 30 | if o.finished { 31 | return nil 32 | } 33 | o.finished = true 34 | 35 | return repairDrainTimeoutCommand{ 36 | entry: o.entry, 37 | } 38 | } 39 | 40 | func (o *repairDrainTimeoutOp) Targets() []string { 41 | return []string{o.entry.Address} 42 | } 43 | 44 | type repairDrainTimeoutCommand struct { 45 | entry *cke.RepairQueueEntry 46 | } 47 | 48 | func (c repairDrainTimeoutCommand) Run(ctx context.Context, inf cke.Infrastructure, _ string) error { 49 | return repairDrainBackOff(ctx, inf, c.entry, fmt.Errorf("drain timed out: %s", c.entry.Address)) 50 | } 51 | 52 | func (c repairDrainTimeoutCommand) Command() cke.Command { 53 | return cke.Command{ 54 | Name: "repairDrainTimeoutCommand", 55 | Target: c.entry.Address, 56 | } 57 | } 58 | 59 | func repairDrainBackOff(ctx context.Context, inf cke.Infrastructure, entry *cke.RepairQueueEntry, err error) error { 60 | log.Warn("failed to drain node for repair", map[string]interface{}{ 61 | "address": entry.Address, 62 | log.FnError: err, 63 | }) 64 | entry.Status = cke.RepairStatusProcessing 65 | entry.StepStatus = cke.RepairStepStatusWaiting 66 | entry.LastTransitionTime = time.Now().Truncate(time.Second).UTC() 67 | entry.DrainBackOffCount++ 68 | entry.DrainBackOffExpire = entry.LastTransitionTime.Add(time.Second * time.Duration(drainBackOffBaseSeconds+rand.Int63n(int64(drainBackOffBaseSeconds*entry.DrainBackOffCount)))) 69 | return inf.Storage().UpdateRepairsEntry(ctx, entry) 70 | } 71 | -------------------------------------------------------------------------------- /mtest/reboot-eviction-dry-run.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | namespace: reboot-test 5 | name: 1-not-evictable 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | reboot-app: 1-not-evictable 11 | template: 12 | metadata: 13 | labels: 14 | reboot-app: 1-not-evictable 15 | spec: 16 | containers: 17 | - name: httpd 18 | image: ghcr.io/cybozu/testhttpd:0 19 | --- 20 | apiVersion: policy/v1 21 | kind: PodDisruptionBudget 22 | metadata: 23 | namespace: reboot-test 24 | name: 1-not-evictable 25 | spec: 26 | maxUnavailable: 0 27 | selector: 28 | matchLabels: 29 | reboot-app: 1-not-evictable 30 | --- 31 | apiVersion: apps/v1 32 | kind: Deployment 33 | metadata: 34 | namespace: reboot-test 35 | name: 0-evictable 36 | spec: 37 | replicas: 1 38 | selector: 39 | matchLabels: 40 | reboot-app: 0-evictable 41 | template: 42 | metadata: 43 | labels: 44 | reboot-app: 0-evictable 45 | spec: 46 | containers: 47 | - name: httpd 48 | image: ghcr.io/cybozu/testhttpd:0 49 | affinity: 50 | podAffinity: 51 | requiredDuringSchedulingIgnoredDuringExecution: 52 | - labelSelector: 53 | matchLabels: 54 | reboot-app: 1-not-evictable 55 | topologyKey: kubernetes.io/hostname 56 | --- 57 | apiVersion: apps/v1 58 | kind: Deployment 59 | metadata: 60 | namespace: reboot-test 61 | name: 2-evictable 62 | spec: 63 | replicas: 1 64 | selector: 65 | matchLabels: 66 | reboot-app: 2-evictable 67 | template: 68 | metadata: 69 | labels: 70 | reboot-app: 2-evictable 71 | spec: 72 | containers: 73 | - name: httpd 74 | image: ghcr.io/cybozu/testhttpd:0 75 | affinity: 76 | podAffinity: 77 | requiredDuringSchedulingIgnoredDuringExecution: 78 | - labelSelector: 79 | matchLabels: 80 | reboot-app: 1-not-evictable 81 | topologyKey: kubernetes.io/hostname 82 | -------------------------------------------------------------------------------- /op/clusterdns/create_configmap.go: -------------------------------------------------------------------------------- 1 | package clusterdns 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/cybozu-go/cke" 7 | "github.com/cybozu-go/cke/op" 8 | "k8s.io/apimachinery/pkg/api/errors" 9 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 10 | ) 11 | 12 | type createConfigMapOp struct { 13 | apiserver *cke.Node 14 | domain string 15 | dnsServers []string 16 | finished bool 17 | } 18 | 19 | // CreateConfigMapOp returns an Operator to create ConfigMap for CoreDNS. 20 | func CreateConfigMapOp(apiserver *cke.Node, domain string, dnsServers []string) cke.Operator { 21 | return &createConfigMapOp{ 22 | apiserver: apiserver, 23 | domain: domain, 24 | dnsServers: dnsServers, 25 | } 26 | } 27 | 28 | func (o *createConfigMapOp) Name() string { 29 | return "create-cluster-dns-configmap" 30 | } 31 | 32 | func (o *createConfigMapOp) NextCommand() cke.Commander { 33 | if o.finished { 34 | return nil 35 | } 36 | o.finished = true 37 | return createConfigMapCommand{o.apiserver, o.domain, o.dnsServers} 38 | } 39 | 40 | func (o *createConfigMapOp) Targets() []string { 41 | return []string{ 42 | o.apiserver.Address, 43 | } 44 | } 45 | 46 | func (c createConfigMapCommand) Command() cke.Command { 47 | return cke.Command{ 48 | Name: "createConfigMapCommand", 49 | Target: "kube-system", 50 | } 51 | } 52 | 53 | type createConfigMapCommand struct { 54 | apiserver *cke.Node 55 | domain string 56 | dnsServers []string 57 | } 58 | 59 | func (c createConfigMapCommand) Run(ctx context.Context, inf cke.Infrastructure, _ string) error { 60 | cs, err := inf.K8sClient(ctx, c.apiserver) 61 | if err != nil { 62 | return err 63 | } 64 | 65 | // ConfigMap 66 | configs := cs.CoreV1().ConfigMaps("kube-system") 67 | _, err = configs.Get(ctx, op.ClusterDNSAppName, metav1.GetOptions{}) 68 | switch { 69 | case err == nil: 70 | case errors.IsNotFound(err): 71 | _, err = configs.Create(ctx, ConfigMap(c.domain, c.dnsServers), metav1.CreateOptions{}) 72 | if err != nil { 73 | return err 74 | } 75 | default: 76 | return err 77 | } 78 | 79 | return nil 80 | } 81 | -------------------------------------------------------------------------------- /collections_test.go: -------------------------------------------------------------------------------- 1 | package cke 2 | 3 | import "testing" 4 | 5 | func TestCompareStrings(t *testing.T) { 6 | cases := []struct { 7 | s1 []string 8 | s2 []string 9 | ok bool 10 | }{ 11 | {[]string{}, nil, true}, 12 | {nil, []string{}, true}, 13 | {nil, nil, true}, 14 | {[]string{}, []string{}, true}, 15 | {[]string{"hello", "world"}, []string{"hello", "world"}, true}, 16 | 17 | {[]string{""}, []string{}, false}, 18 | {[]string{"A"}, []string{"B"}, false}, 19 | } 20 | for _, c := range cases { 21 | if compareStrings(c.s1, c.s2) != c.ok { 22 | t.Errorf("compareStrings(%#v, %#v) != %v", c.s1, c.s2, c.ok) 23 | } 24 | } 25 | } 26 | 27 | func TestCompareStringMap(t *testing.T) { 28 | cases := []struct { 29 | m1 map[string]string 30 | m2 map[string]string 31 | ok bool 32 | }{ 33 | {map[string]string{}, nil, true}, 34 | {nil, map[string]string{}, true}, 35 | {nil, nil, true}, 36 | {map[string]string{}, map[string]string{}, true}, 37 | {map[string]string{"hello": "world"}, map[string]string{"hello": "world"}, true}, 38 | 39 | {map[string]string{"hello": ""}, map[string]string{}, false}, 40 | {map[string]string{"hello": "world"}, map[string]string{"good": "morning"}, false}, 41 | {map[string]string{"hello": "world"}, map[string]string{"hello": "ola"}, false}, 42 | } 43 | for _, c := range cases { 44 | if compareStringMap(c.m1, c.m2) != c.ok { 45 | t.Errorf("compareStringMap(%#v, %#v) != %v", c.m1, c.m2, c.ok) 46 | } 47 | } 48 | } 49 | 50 | func TestCompareMounts(t *testing.T) { 51 | cases := []struct { 52 | m1 []Mount 53 | m2 []Mount 54 | ok bool 55 | }{ 56 | {[]Mount{}, nil, true}, 57 | {nil, []Mount{}, true}, 58 | {nil, nil, true}, 59 | {[]Mount{}, []Mount{}, true}, 60 | {[]Mount{{"/var", "/var", true, "", ""}}, []Mount{{"/var", "/var", true, "", ""}}, true}, 61 | {[]Mount{{"/tmp", "/tmp", true, "", ""}}, []Mount{{"/var", "/var", true, "", ""}}, false}, 62 | } 63 | for _, c := range cases { 64 | if compareMounts(c.m1, c.m2) != c.ok { 65 | t.Errorf("compareMounts(%#v, %#v) != %v", c.m1, c.m2, c.ok) 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /sonobuoy/worker-ign.yml: -------------------------------------------------------------------------------- 1 | passwd: 2 | users: 3 | - name: cke 4 | ssh_authorized_keys: 5 | - "PUBLIC_KEY" 6 | groups: 7 | - docker 8 | - sudo 9 | storage: 10 | files: 11 | - path: "/etc/sysctl.d/br_netfilter.conf" 12 | filesystem: root 13 | contents: 14 | inline: | 15 | net.bridge.bridge-nf-call-iptables = 1 16 | net.ipv4.ip_forward = 1 17 | net.bridge.bridge-nf-call-ip6tables = 1 18 | mode: 0644 19 | - path: "/opt/bin/setup-iptables-rules" 20 | filesystem: root 21 | contents: 22 | inline: | 23 | #!/bin/sh 24 | iptables -w -A INPUT -p tcp -j ACCEPT 25 | iptables -w -A INPUT -p udp -j ACCEPT 26 | mode: 0755 27 | - path: "/opt/bin/replace-resolv-conf" 28 | # For some reason, if we let ignition replace resolv.conf directly, it results empty file. 29 | filesystem: root 30 | contents: 31 | inline: | 32 | #!/bin/sh 33 | echo nameserver 8.8.8.8 > /etc/resolv.conf 34 | mode: 0755 35 | systemd: 36 | units: 37 | - name: systemd-resolved.service 38 | mask: true 39 | - name: setup-iptables-rules.service 40 | enabled: true 41 | contents: | 42 | [Unit] 43 | Description=Setup iptables rules 44 | After=network-online.target 45 | Wants=network-online.target 46 | 47 | [Service] 48 | Type=oneshot 49 | ExecStart=/opt/bin/setup-iptables-rules 50 | RemainAfterExit=yes 51 | 52 | [Install] 53 | WantedBy=multi-user.target 54 | - name: locksmithd.service 55 | mask: true 56 | - name: replace-resolv-conf.service 57 | enabled: true 58 | contents: | 59 | [Unit] 60 | Description=Replace resolv.conf 61 | After=network-online.target 62 | Wants=network-online.target 63 | 64 | [Service] 65 | Type=oneshot 66 | ExecStart=/opt/bin/replace-resolv-conf 67 | RemainAfterExit=yes 68 | 69 | [Install] 70 | WantedBy=multi-user.target 71 | -------------------------------------------------------------------------------- /op/stop.go: -------------------------------------------------------------------------------- 1 | package op 2 | 3 | import ( 4 | "github.com/cybozu-go/cke" 5 | "github.com/cybozu-go/cke/op/common" 6 | ) 7 | 8 | type containerStopOp struct { 9 | nodes []*cke.Node 10 | name string 11 | executed bool 12 | } 13 | 14 | func (o *containerStopOp) Name() string { 15 | return "stop-" + o.name 16 | } 17 | 18 | func (o *containerStopOp) NextCommand() cke.Commander { 19 | if o.executed { 20 | return nil 21 | } 22 | o.executed = true 23 | return common.KillContainersCommand(o.nodes, o.name) 24 | } 25 | 26 | func (o *containerStopOp) Targets() []string { 27 | ips := make([]string, len(o.nodes)) 28 | for i, n := range o.nodes { 29 | ips[i] = n.Address 30 | } 31 | return ips 32 | } 33 | 34 | // APIServerStopOp returns an Operator to stop API server 35 | func APIServerStopOp(nodes []*cke.Node) cke.Operator { 36 | return &containerStopOp{ 37 | nodes: nodes, 38 | name: KubeAPIServerContainerName, 39 | } 40 | } 41 | 42 | // ControllerManagerStopOp returns an Operator to stop kube-controller-manager 43 | func ControllerManagerStopOp(nodes []*cke.Node) cke.Operator { 44 | return &containerStopOp{ 45 | nodes: nodes, 46 | name: KubeControllerManagerContainerName, 47 | } 48 | } 49 | 50 | // SchedulerStopOp returns an Operator to stop kube-scheduler 51 | func SchedulerStopOp(nodes []*cke.Node) cke.Operator { 52 | return &containerStopOp{ 53 | nodes: nodes, 54 | name: KubeSchedulerContainerName, 55 | } 56 | } 57 | 58 | // EtcdStopOp returns an Operator to stop etcd 59 | func EtcdStopOp(nodes []*cke.Node) cke.Operator { 60 | return &containerStopOp{ 61 | nodes: nodes, 62 | name: EtcdContainerName, 63 | } 64 | } 65 | 66 | // EtcdRiversStopOp returns an Operator to stop etcd-rivers 67 | func EtcdRiversStopOp(nodes []*cke.Node) cke.Operator { 68 | return &containerStopOp{ 69 | nodes: nodes, 70 | name: EtcdRiversContainerName, 71 | } 72 | } 73 | 74 | // ProxyStopOp returns an Operator to stop kube-proxy 75 | func ProxyStopOp(nodes []*cke.Node) cke.Operator { 76 | return &containerStopOp{ 77 | nodes: nodes, 78 | name: KubeProxyContainerName, 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /pkg/ckecli/cmd/scp.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "os" 8 | "os/exec" 9 | "strings" 10 | 11 | "github.com/cybozu-go/well" 12 | "github.com/spf13/cobra" 13 | ) 14 | 15 | func detectSCPNode(args []string) (string, error) { 16 | var nodeName string 17 | for _, arg := range args { 18 | if strings.Contains(arg, ":") { 19 | nodeName = detectSSHNode(arg[:strings.Index(arg, ":")]) 20 | break 21 | } 22 | } 23 | 24 | if len(nodeName) == 0 { 25 | return "", errors.New("node name is not specified") 26 | } 27 | 28 | return nodeName, nil 29 | } 30 | 31 | func scp(ctx context.Context, args []string) error { 32 | node, err := detectSCPNode(args) 33 | if err != nil { 34 | return err 35 | } 36 | fifo, err := sshPrivateKey(node) 37 | if err != nil { 38 | return err 39 | } 40 | defer os.Remove(fifo) 41 | 42 | scpArgs := []string{ 43 | "-i", fifo, 44 | "-o", "UserKnownHostsFile=/dev/null", 45 | "-o", "StrictHostKeyChecking=no", 46 | "-o", "ConnectTimeout=60", 47 | } 48 | if scpParams.recursive { 49 | scpArgs = append(scpArgs, "-r") 50 | } 51 | 52 | scpArgs = append(scpArgs, args...) 53 | 54 | fmt.Println(scpArgs) 55 | c := exec.CommandContext(ctx, "scp", scpArgs...) 56 | c.Stdin = os.Stdin 57 | c.Stdout = os.Stdout 58 | c.Stderr = os.Stderr 59 | return c.Run() 60 | } 61 | 62 | var scpParams struct { 63 | recursive bool 64 | } 65 | 66 | // scpCmd represents the scp command 67 | var scpCmd = &cobra.Command{ 68 | Use: "scp [[user@]NODE1:]FILE1 ... [[user@]NODE2:]FILE2", 69 | Short: "copy files between hosts via scp", 70 | Long: `Copy files between hosts via scp. 71 | 72 | NODE is IP address or hostname of the node. 73 | `, 74 | 75 | Args: cobra.MinimumNArgs(2), 76 | RunE: func(cmd *cobra.Command, args []string) error { 77 | well.Go(func(ctx context.Context) error { 78 | return scp(ctx, args) 79 | }) 80 | well.Stop() 81 | return well.Wait() 82 | }, 83 | } 84 | 85 | func init() { 86 | scpCmd.Flags().BoolVarP(&scpParams.recursive, "", "r", false, "recursively copy entire directories") 87 | rootCmd.AddCommand(scpCmd) 88 | } 89 | -------------------------------------------------------------------------------- /sonobuoy/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | cke: 4 | container_name: cke 5 | image: ghcr.io/cybozu/ubuntu:24.04 6 | networks: 7 | app_net: 8 | ipv4_address: 172.30.0.11 9 | user: "${UID}:${GID}" 10 | volumes: 11 | - ./cke.config.yml:/etc/cke/config.yml 12 | - ./bin:/usr/local/bin 13 | depends_on: 14 | - etcd 15 | - vault 16 | restart: always 17 | entrypoint: 18 | - /usr/local/bin/cke 19 | - --loglevel=debug 20 | setup: 21 | container_name: setup 22 | image: ghcr.io/cybozu/ubuntu-debug:24.04 23 | networks: 24 | app_net: 25 | ipv4_address: 172.30.0.12 26 | user: "${UID}:${GID}" 27 | volumes: 28 | - ./bin:/usr/local/bin 29 | - ./setup:/opt/setup 30 | - ./cke.config.yml:/etc/cke/config.yml 31 | depends_on: 32 | - vault 33 | - etcd 34 | - cke 35 | command: /opt/setup/setup.sh 36 | vault: 37 | container_name: vault 38 | image: ghcr.io/cybozu/vault:1.20 39 | networks: 40 | app_net: 41 | ipv4_address: 172.30.0.13 42 | user: "${UID}:${GID}" 43 | cap_add: 44 | - IPC_LOCK 45 | depends_on: 46 | - etcd 47 | volumes: 48 | - ./vault.hcl:/etc/vault/config.hcl 49 | - ./bin:/host 50 | - ./vault-entrypoint.sh:/entrypoint.sh 51 | ports: 52 | - "8200:8200" 53 | - "8201:8201" 54 | restart: always 55 | entrypoint: 56 | - /entrypoint.sh 57 | etcd: 58 | container_name: etcd 59 | image: ghcr.io/cybozu/etcd:3.6 60 | networks: 61 | app_net: 62 | ipv4_address: 172.30.0.14 63 | user: "${UID}:${GID}" 64 | volumes: 65 | - ./etcd-data:/data/etcd 66 | - ./etcd.conf.yml:/etc/etcd/etcd.conf.yml 67 | - ./bin:/host 68 | - ./etcd-entrypoint.sh:/entrypoint.sh 69 | ports: 70 | - "2379:2379" 71 | - "2380:2380" 72 | restart: always 73 | entrypoint: 74 | - /entrypoint.sh 75 | networks: 76 | app_net: 77 | driver: bridge 78 | ipam: 79 | driver: default 80 | config: 81 | - subnet: 172.30.0.0/24 82 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for cke 2 | 3 | ETCD_VERSION = 3.6.5 4 | 5 | .PHONY: all 6 | all: test 7 | 8 | .PHONY: setup 9 | setup: 10 | curl -fsL https://github.com/etcd-io/etcd/releases/download/v$(ETCD_VERSION)/etcd-v$(ETCD_VERSION)-linux-amd64.tar.gz | sudo tar -xzf - --strip-components=1 -C /usr/local/bin etcd-v$(ETCD_VERSION)-linux-amd64/etcd etcd-v$(ETCD_VERSION)-linux-amd64/etcdctl 11 | 12 | .PHONY: check-generate 13 | check-generate: 14 | # gqlgen needs additional dependencies that does not exist in go.mod. 15 | cd sabakan/mock; go run github.com/99designs/gqlgen@"$$(go list -f '{{.Version}}' -m github.com/99designs/gqlgen)" generate 16 | go mod tidy 17 | $(MAKE) static 18 | git diff --exit-code --name-only 19 | 20 | .PHONY: test 21 | test: test-tools 22 | test -z "$$(gofmt -s -l . | tee /dev/stderr)" 23 | staticcheck ./... 24 | # temporarily disable nilerr due to a false positive 25 | # https://github.com/cybozu-go/cke/runs/4298557316?check_suite_focus=true 26 | #test -z "$$(nilerr ./... 2>&1 | tee /dev/stderr)" 27 | test -z "$$(custom-checker -restrictpkg.packages=html/template,log ./... 2>&1 | tee /dev/stderr)" 28 | go vet ./... 29 | go test -race -v ./... 30 | 31 | .PHONY: install 32 | install: 33 | go install ./pkg/... 34 | 35 | .PHONY: static 36 | static: goimports 37 | go generate ./static 38 | 39 | .PHONY: test-tools 40 | test-tools: staticcheck nilerr goimports custom-checker 41 | 42 | .PHONY: staticcheck 43 | staticcheck: 44 | if ! which staticcheck >/dev/null; then \ 45 | env GOFLAGS= go install honnef.co/go/tools/cmd/staticcheck@latest; \ 46 | fi 47 | 48 | .PHONY: nilerr 49 | nilerr: 50 | if ! which nilerr >/dev/null; then \ 51 | env GOFLAGS= go install github.com/gostaticanalysis/nilerr/cmd/nilerr@latest; \ 52 | fi 53 | 54 | .PHONY: goimports 55 | goimports: 56 | if ! which goimports >/dev/null; then \ 57 | env GOFLAGS= go install golang.org/x/tools/cmd/goimports@latest; \ 58 | fi 59 | 60 | .PHONY: custom-checker 61 | custom-checker: 62 | if ! which custom-checker >/dev/null; then \ 63 | env GOFLAGS= go install github.com/cybozu-go/golang-custom-analyzer/cmd/custom-checker@latest; \ 64 | fi 65 | --------------------------------------------------------------------------------