├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── build-base.yml │ ├── offline-dev.yml │ ├── offline-master.yml │ └── sync-images-to-aliyun.yml ├── .gitignore ├── 00-kernel.yml ├── 01-base.yml ├── 02-container-engine.yml ├── 03-kubernetes-component.yml ├── 04-chrony.yml ├── 05-load-balancer.yml ├── 06-etcd.yml ├── 07-kubernetes-certificates.yml ├── 08-kubernetes-master.yml ├── 09-kubernetes-worker.yml ├── 10-post.yml ├── 21-network-plugin.yml ├── 22-ingress-controller.yml ├── 23-kubernetes-dashboard.yml ├── 24-metrics-server.yml ├── 25-cert-manager.yml ├── 31-docker-to-containerd.yml ├── 81-add-worker.yml ├── 82-add-master.yml ├── 83-add-etcd.yml ├── 84-remove-worker.yml ├── 85-remove-master.yml ├── 86-remove-etcd.yml ├── 87-remove-node.yml ├── 90-init-cluster.yml ├── 91-upgrade-cluster.yml ├── 92-certificates-renew.yml ├── 93-backup-cluster.yml ├── 94-restore-cluster.yml ├── 99-reset-cluster.yml ├── LICENSE ├── README.md ├── Vagrantfile ├── ansible.cfg ├── ansible ├── homebrew-core │ ├── ansible.rb │ └── sshpass.rb └── install.sh ├── docs ├── 00-安装须知.md ├── 01-集群安装.md ├── 02-节点管理.md ├── 02 │ ├── Docker切换为containerd.md │ ├── 删除 etcd 节点.md │ ├── 删除节点 master 角色.md │ ├── 删除节点 worker 角色.md │ ├── 删除集群节点.md │ ├── 增加或删除 lb 节点.md │ ├── 添加 etcd 节点.md │ ├── 添加 master 节点.md │ └── 添加 worker 节点.md ├── 03-证书轮换.md ├── 04-集群升级.md ├── 05-集群备份.md ├── 06-集群恢复.md ├── 07-集群重置.md ├── 08-离线安装.md ├── 09-扩展阅读.md └── 09 │ └── 如何选择运行时组件.md ├── example ├── hosts.allinone.hostname.ini ├── hosts.allinone.ip.ini ├── hosts.m-master.hostname.ini ├── hosts.m-master.ip.ini ├── hosts.s-master.hostname.ini ├── hosts.s-master.ip.ini └── variables.yaml ├── offline ├── Dockerfile ├── Dockerfile.base ├── default.conf ├── download-images-to-registry.sh ├── download-yum.sh └── sync-images.sh └── roles ├── backup └── tasks │ └── main.yml ├── chrony ├── tasks │ ├── deploy.yml │ └── main.yml └── templates │ ├── 20-kubelet-override.conf.j2 │ └── chrony.yaml.j2 ├── docker-to-containerd ├── files │ └── docker.gpg ├── tasks │ └── main.yml └── templates │ ├── containerd │ ├── config.toml.j2 │ └── crictl.yaml.j2 │ └── http-proxy.conf.j2 ├── etcd ├── certificates │ ├── tasks │ │ ├── certs_stat.yml │ │ ├── distribute.yml │ │ ├── generate.yml │ │ └── main.yml │ └── templates │ │ └── etcd-openssl.cnf.j2 └── install │ ├── tasks │ ├── containerd.yml │ ├── docker.yml │ └── main.yml │ └── templates │ ├── 20-kubelet-override.conf.j2 │ ├── etcd-external.yaml.j2 │ └── etcdtools.j2 ├── kube-certificates ├── tasks │ ├── certs_stat.yml │ ├── common.yml │ ├── distribute.yml │ ├── kubeconfig.yml │ └── main.yml └── templates │ └── kube-openssl.cnf.j2 ├── kube-master ├── tasks │ ├── kubeadm-config.yml │ ├── main.yml │ ├── master-init.yml │ └── master-join.yml └── templates │ ├── apiserver-audit-policy.yaml.j2 │ ├── kubeadm-controlplane-init.v1beta1.yaml.j2 │ ├── kubeadm-controlplane-init.v1beta2.yaml.j2 │ ├── kubeadm-controlplane-join.v1beta1.yaml.j2 │ ├── kubeadm-controlplane-join.v1beta2.yaml.j2 │ ├── kubelet-certificates-renewal.yaml.j2 │ ├── kubelet-config.v1beta1.yaml.j2 │ ├── pod-security-policy.yaml.j2 │ └── secrets-encryption.yaml.j2 ├── kube-worker ├── tasks │ └── main.yml └── templates │ ├── kubeadm-join.v1beta1.yaml.j2 │ ├── kubeadm-join.v1beta2.yaml.j2 │ └── kubelet-config.v1beta1.yaml.j2 ├── load-balancer ├── defaults │ └── main.yml ├── tasks │ ├── envoy.yml │ ├── external.yml │ ├── haproxy.yml │ ├── internal.yml │ ├── main.yml │ ├── nginx.yml │ └── openresty.yml └── templates │ ├── 20-kubelet-override.conf.j2 │ ├── envoy │ ├── envoy.conf.yaml.j2 │ └── envoy.yaml.j2 │ ├── haproxy │ ├── haproxy.cfg.j2 │ └── haproxy.yaml.j2 │ ├── keepalived.yaml.j2 │ ├── nginx │ ├── nginx.conf.j2 │ └── nginx.yaml.j2 │ └── openresty │ ├── openresty.conf.j2 │ └── openresty.yaml.j2 ├── plugins ├── cert-manager │ ├── tasks │ │ └── main.yml │ └── templates │ │ └── cert-manager.yaml.j2 ├── ingress-controller │ ├── tasks │ │ ├── main.yml │ │ ├── nginx-ingress-controller.yml │ │ └── traefik-ingress-controller.yml │ └── templates │ │ ├── nginx-ingress-controller.yaml.j2 │ │ └── traefik-ingress-controller │ │ ├── crds.yaml.j2 │ │ └── traefik-ingress-controller.yaml.j2 ├── kubernetes-dashboard │ ├── tasks │ │ └── main.yml │ └── templates │ │ └── kubernetes-dashboard.yaml.j2 ├── metrics-server │ ├── tasks │ │ └── main.yml │ └── templates │ │ └── metrics-server.yaml.j2 └── network-plugins │ ├── tasks │ ├── calico.yml │ ├── flannel.yml │ └── main.yml │ └── templates │ ├── calico │ ├── calico-typha.yaml.j2 │ └── calicoctl-daemonset.yaml.j2 │ └── kube-flannel.yaml.j2 ├── post └── tasks │ └── main.yml ├── prepare ├── base │ ├── tasks │ │ ├── centos.yml │ │ ├── common.yml │ │ ├── debian.yml │ │ ├── main.yml │ │ ├── verify_node.yml │ │ └── verify_variables.yml │ └── templates │ │ ├── 10-k8s-modules.conf.j2 │ │ ├── 30-k8s-ulimits.conf.j2 │ │ ├── 95-k8s-sysctl.conf.j2 │ │ └── sunrpc.conf.j2 ├── container-engine │ ├── defaults │ │ └── main.yml │ ├── files │ │ └── docker.gpg │ ├── tasks │ │ ├── containerd │ │ │ ├── centos.yml │ │ │ ├── common.yml │ │ │ ├── debian.yml │ │ │ └── main.yml │ │ ├── docker │ │ │ ├── centos.yml │ │ │ ├── common.yml │ │ │ ├── debian.yml │ │ │ └── main.yml │ │ └── main.yml │ └── templates │ │ ├── containerd │ │ ├── config.toml.j2 │ │ ├── crictl.yaml.j2 │ │ └── hosts.toml.j2 │ │ ├── docker-daemon.json.j2 │ │ └── http-proxy.conf.j2 ├── kernel │ └── tasks │ │ ├── centos.yml │ │ ├── main.yml │ │ └── ubuntu.yml ├── kubernetes │ ├── files │ │ └── kubernetes.gpg │ └── tasks │ │ ├── centos.yml │ │ ├── debian.yml │ │ └── main.yml └── variables │ ├── defaults │ └── main.yml │ └── tasks │ └── main.yml ├── remove ├── etcd │ └── tasks │ │ └── main.yml ├── master │ ├── defaults │ │ └── main.yml │ └── tasks │ │ └── main.yml ├── node │ ├── defaults │ │ └── main.yml │ └── tasks │ │ └── main.yml └── worker │ ├── defaults │ └── main.yml │ └── tasks │ └── main.yml ├── reset └── tasks │ ├── centos.yml │ ├── debian.yml │ └── main.yml ├── restore └── tasks │ └── main.yml └── upgrade ├── files └── kubernetes.gpg └── tasks ├── centos.yml ├── common.yml ├── debian.yml └── main.yml /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 缺陷报告 3 | about: 创建缺陷报告以帮助我们改进 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **缺陷描述** 11 | 12 | 清晰而简明的描述缺陷是什么。 13 | 14 | **环境 (请填写以下信息):** 15 | 16 | 执行下面括号中的命令,提交返回结果 17 | 18 | - **OS** (`printf "$(uname -srm)\n$(cat /etc/os-release)\n"`): 19 | 20 | - **Ansible版本** (`ansible --version`): 21 | 22 | - **Python版本** (`python --version`): 23 | 24 | - **Kubeadm-ha版本(commit)** (`git rev-parse --short HEAD`): 25 | 26 | **如何复现** 27 | 28 | 复现的步骤: 29 | 30 | 1. 第一步:编写 inventory.ini 文件,内容如下 31 | ```ini 32 | [all] 33 | 192.168.56.11 ansible_port=22 ansible_user="vagrant" ansible_ssh_pass="vagrant" 34 | ...... 35 | ``` 36 | 37 | 2. 第二步:编写 variables.yaml 文件,内容如下 38 | ```yaml 39 | skip_verify_node: false 40 | timezone: Asia/Shanghai 41 | ...... 42 | ``` 43 | 44 | 3. 第三步:执行部署命令,命令如下 45 | ```yaml 46 | ansible-playbook -i inventory.ini -e @variables.yaml 90-init-cluster.yml 47 | ``` 48 | 49 | 4. 出现错误 50 | ``` 51 | 错误内容...... 52 | ``` 53 | 54 | **预期结果** 55 | 56 | 对你期望发生的结果清晰而简洁的描述。 57 | 58 | **屏幕截图** 59 | 60 | 如果可以的话,添加屏幕截图来帮助解释你的问题。 61 | 62 | **其他事项** 63 | 64 | 在此处添加有关该问题的任何其他事项。 65 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 功能需求 3 | about: 为该项目提出一个想法 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **简要描述您所需的功能,功能与什么问题有关** 11 | 12 | 清晰而简明的描述所需的功能,功能与什么问题有关。例如。当……的时候,这个功能能够……。 13 | 14 | **描述您想要的解决方案** 15 | 16 | 对您想要解决方案简洁明了的描述。 17 | 18 | **描述您考虑过的替代方案** 19 | 20 | 对您考虑过的所有替代解决方案或功能的简洁明了的描述。 21 | 22 | **其他事项** 23 | 24 | 在此处添加有关该功能的任何其他事项。 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .vagrant 3 | *.retry 4 | debug/ 5 | dev/ -------------------------------------------------------------------------------- /00-kernel.yml: -------------------------------------------------------------------------------- 1 | # 升级所有节点内核 2 | - hosts: all 3 | gather_facts: false 4 | tasks: 5 | - name: "设置代理服务器环境变量" 6 | set_fact: 7 | proxy_env: 8 | http_proxy: "{{ http_proxy | default ('') }}" 9 | HTTP_PROXY: "{{ http_proxy | default ('') }}" 10 | https_proxy: "{{ https_proxy | default ('') }}" 11 | HTTPS_PROXY: "{{ https_proxy | default ('') }}" 12 | no_proxy: "{{ no_proxy | default ('') }}" 13 | NO_PROXY: "{{ no_proxy | default ('') }}" 14 | no_log: true 15 | 16 | - hosts: 17 | - all 18 | roles: 19 | - prepare/variables 20 | - prepare/base 21 | - prepare/kernel 22 | environment: "{{ proxy_env }}" -------------------------------------------------------------------------------- /01-base.yml: -------------------------------------------------------------------------------- 1 | # 所有节点安装基础组件并加载所需内核模块 2 | - hosts: all 3 | gather_facts: false 4 | tasks: 5 | - name: "设置代理服务器环境变量" 6 | set_fact: 7 | proxy_env: 8 | http_proxy: "{{ http_proxy | default ('') }}" 9 | HTTP_PROXY: "{{ http_proxy | default ('') }}" 10 | https_proxy: "{{ https_proxy | default ('') }}" 11 | HTTPS_PROXY: "{{ https_proxy | default ('') }}" 12 | no_proxy: "{{ no_proxy | default ('') }}" 13 | NO_PROXY: "{{ no_proxy | default ('') }}" 14 | no_log: true 15 | 16 | - hosts: 17 | - all 18 | roles: 19 | - prepare/variables 20 | - prepare/base 21 | environment: "{{ proxy_env }}" -------------------------------------------------------------------------------- /02-container-engine.yml: -------------------------------------------------------------------------------- 1 | # 所有节点安装 Docker 2 | - hosts: all 3 | gather_facts: false 4 | tasks: 5 | - name: "设置代理服务器环境变量" 6 | set_fact: 7 | proxy_env: 8 | http_proxy: "{{ http_proxy | default ('') }}" 9 | HTTP_PROXY: "{{ http_proxy | default ('') }}" 10 | https_proxy: "{{ https_proxy | default ('') }}" 11 | HTTPS_PROXY: "{{ https_proxy | default ('') }}" 12 | no_proxy: "{{ no_proxy | default ('') }}" 13 | NO_PROXY: "{{ no_proxy | default ('') }}" 14 | no_log: true 15 | 16 | - hosts: 17 | - all 18 | roles: 19 | - prepare/variables 20 | - prepare/container-engine 21 | environment: "{{ proxy_env }}" -------------------------------------------------------------------------------- /03-kubernetes-component.yml: -------------------------------------------------------------------------------- 1 | # 所有节点安装 kubernetes 组件 2 | - hosts: all 3 | gather_facts: false 4 | tasks: 5 | - name: "设置代理服务器环境变量" 6 | set_fact: 7 | proxy_env: 8 | http_proxy: "{{ http_proxy | default ('') }}" 9 | HTTP_PROXY: "{{ http_proxy | default ('') }}" 10 | https_proxy: "{{ https_proxy | default ('') }}" 11 | HTTPS_PROXY: "{{ https_proxy | default ('') }}" 12 | no_proxy: "{{ no_proxy | default ('') }}" 13 | NO_PROXY: "{{ no_proxy | default ('') }}" 14 | no_log: true 15 | 16 | - hosts: 17 | - all 18 | roles: 19 | - prepare/variables 20 | - prepare/kubernetes 21 | environment: "{{ proxy_env }}" -------------------------------------------------------------------------------- /04-chrony.yml: -------------------------------------------------------------------------------- 1 | - hosts: 2 | - all 3 | roles: 4 | - prepare/variables 5 | - chrony -------------------------------------------------------------------------------- /05-load-balancer.yml: -------------------------------------------------------------------------------- 1 | # 所有节点安装 load-balancer 2 | # 进行apiserver负载均衡 3 | - hosts: 4 | - all 5 | roles: 6 | - prepare/variables 7 | - load-balancer -------------------------------------------------------------------------------- /06-etcd.yml: -------------------------------------------------------------------------------- 1 | # 安装etcd,并分发etcd证书到master节点 2 | - hosts: 3 | - etcd 4 | - new-etcd 5 | - kube-master 6 | - new-master 7 | roles: 8 | - prepare/variables 9 | - etcd/certificates 10 | - etcd/install -------------------------------------------------------------------------------- /07-kubernetes-certificates.yml: -------------------------------------------------------------------------------- 1 | # 生成kubernetes属性证书 2 | - hosts: 3 | - kube-master 4 | - new-master 5 | - kube-worker 6 | - new-worker 7 | roles: 8 | - prepare/variables 9 | - kube-certificates -------------------------------------------------------------------------------- /08-kubernetes-master.yml: -------------------------------------------------------------------------------- 1 | # 初始化master节点 2 | - hosts: 3 | - kube-master 4 | - new-master 5 | roles: 6 | - prepare/variables 7 | - kube-master -------------------------------------------------------------------------------- /09-kubernetes-worker.yml: -------------------------------------------------------------------------------- 1 | # 初始化worker节点 2 | - hosts: 3 | - kube-worker 4 | - new-worker 5 | roles: 6 | - prepare/variables 7 | - kube-worker -------------------------------------------------------------------------------- /10-post.yml: -------------------------------------------------------------------------------- 1 | # 标记各节点角色 2 | - hosts: 3 | - kube-master 4 | - kube-worker 5 | - new-master 6 | - new-worker 7 | roles: 8 | - prepare/variables 9 | - post -------------------------------------------------------------------------------- /21-network-plugin.yml: -------------------------------------------------------------------------------- 1 | # 安装基础插件 2 | - hosts: 3 | - kube-master[0] 4 | roles: 5 | - prepare/variables 6 | - plugins/network-plugins -------------------------------------------------------------------------------- /22-ingress-controller.yml: -------------------------------------------------------------------------------- 1 | # 安装基础插件 2 | - hosts: 3 | - kube-master[0] 4 | roles: 5 | - prepare/variables 6 | - plugins/ingress-controller -------------------------------------------------------------------------------- /23-kubernetes-dashboard.yml: -------------------------------------------------------------------------------- 1 | # 安装基础插件 2 | - hosts: 3 | - kube-master[0] 4 | roles: 5 | - prepare/variables 6 | - plugins/kubernetes-dashboard -------------------------------------------------------------------------------- /24-metrics-server.yml: -------------------------------------------------------------------------------- 1 | # 安装基础插件 2 | - hosts: 3 | - kube-master[0] 4 | roles: 5 | - prepare/variables 6 | - plugins/metrics-server -------------------------------------------------------------------------------- /25-cert-manager.yml: -------------------------------------------------------------------------------- 1 | # 安装基础插件 2 | - hosts: 3 | - kube-master[0] 4 | roles: 5 | - prepare/variables 6 | - plugins/cert-manager -------------------------------------------------------------------------------- /31-docker-to-containerd.yml: -------------------------------------------------------------------------------- 1 | # 校验节点软件系统及硬件系统、安装必要基础组件、docker、kubeadm、kubelet、kubectl 2 | - hosts: all 3 | vars_prompt: 4 | name: "restore_confirmation" 5 | prompt: "确认从 Docker 切换为 containerd ? 输入“yes”确认切换。" 6 | default: "no" 7 | private: no 8 | pre_tasks: 9 | - name: 确认切换 10 | fail: 11 | msg: "确认切换失败,取消切换操作。" 12 | when: restore_confirmation != "yes" 13 | gather_facts: false 14 | tasks: 15 | - name: "设置代理服务器环境变量" 16 | set_fact: 17 | proxy_env: 18 | http_proxy: "{{ http_proxy | default ('') }}" 19 | HTTP_PROXY: "{{ http_proxy | default ('') }}" 20 | https_proxy: "{{ https_proxy | default ('') }}" 21 | HTTPS_PROXY: "{{ https_proxy | default ('') }}" 22 | no_proxy: "{{ no_proxy | default ('') }}" 23 | NO_PROXY: "{{ no_proxy | default ('') }}" 24 | no_log: true 25 | 26 | - hosts: 27 | - all 28 | roles: 29 | - prepare/variables 30 | - docker-to-containerd 31 | environment: "{{ proxy_env }}" 32 | 33 | - hosts: 34 | - kube-master 35 | - new-master 36 | roles: 37 | - prepare/variables 38 | - kube-master 39 | 40 | - hosts: 41 | - kube-master 42 | - new-master 43 | - kube-worker 44 | - new-worker 45 | roles: 46 | - prepare/variables 47 | - kube-worker 48 | 49 | - hosts: 50 | - etcd 51 | - new-etcd 52 | - kube-master 53 | - new-master 54 | roles: 55 | - prepare/variables 56 | - etcd/certificates 57 | - etcd/install -------------------------------------------------------------------------------- /81-add-worker.yml: -------------------------------------------------------------------------------- 1 | # 校验节点软件系统及硬件系统、安装必要基础组件、docker、kubeadm、kubelet、kubectl 2 | - hosts: all 3 | gather_facts: false 4 | tasks: 5 | - name: "设置代理服务器环境变量" 6 | set_fact: 7 | proxy_env: 8 | http_proxy: "{{ http_proxy | default ('') }}" 9 | HTTP_PROXY: "{{ http_proxy | default ('') }}" 10 | https_proxy: "{{ https_proxy | default ('') }}" 11 | HTTPS_PROXY: "{{ https_proxy | default ('') }}" 12 | no_proxy: "{{ no_proxy | default ('') }}" 13 | NO_PROXY: "{{ no_proxy | default ('') }}" 14 | no_log: true 15 | 16 | - hosts: 17 | - new-worker 18 | roles: 19 | - prepare/variables 20 | - prepare/base 21 | - prepare/container-engine 22 | - prepare/kubernetes 23 | environment: "{{ proxy_env }}" 24 | 25 | # 进行apiserver负载均衡 26 | - hosts: 27 | - new-worker 28 | roles: 29 | - prepare/variables 30 | - load-balancer 31 | 32 | # 生成master节点证书 33 | - hosts: 34 | - kube-master 35 | - new-master 36 | - kube-worker 37 | - new-worker 38 | roles: 39 | - prepare/variables 40 | - kube-certificates 41 | 42 | # 初始化worker节点 43 | - hosts: 44 | - new-worker 45 | roles: 46 | - prepare/variables 47 | - kube-worker 48 | 49 | # 标记各节点角色 50 | - hosts: 51 | - new-worker 52 | roles: 53 | - prepare/variables 54 | - post -------------------------------------------------------------------------------- /82-add-master.yml: -------------------------------------------------------------------------------- 1 | # 校验节点软件系统及硬件系统、安装必要基础组件、docker、kubeadm、kubelet、kubectl 2 | - hosts: all 3 | gather_facts: false 4 | tasks: 5 | - name: "设置代理服务器环境变量" 6 | set_fact: 7 | proxy_env: 8 | http_proxy: "{{ http_proxy | default ('') }}" 9 | HTTP_PROXY: "{{ http_proxy | default ('') }}" 10 | https_proxy: "{{ https_proxy | default ('') }}" 11 | HTTPS_PROXY: "{{ https_proxy | default ('') }}" 12 | no_proxy: "{{ no_proxy | default ('') }}" 13 | NO_PROXY: "{{ no_proxy | default ('') }}" 14 | no_log: true 15 | 16 | - hosts: 17 | - new-master 18 | roles: 19 | - prepare/variables 20 | - prepare/base 21 | - prepare/container-engine 22 | - prepare/kubernetes 23 | environment: "{{ proxy_env }}" 24 | 25 | # 进行apiserver负载均衡 26 | - hosts: 27 | - all 28 | roles: 29 | - prepare/variables 30 | - load-balancer 31 | 32 | # 分发 etcd client 证书 33 | - hosts: 34 | - etcd 35 | - kube-master 36 | - new-master 37 | roles: 38 | - prepare/variables 39 | - etcd/certificates 40 | 41 | # 生成master节点证书 42 | - hosts: 43 | - kube-master 44 | - new-master 45 | - kube-worker 46 | - new-worker 47 | roles: 48 | - prepare/variables 49 | - kube-certificates 50 | 51 | # 初始化master节点 52 | - hosts: 53 | - kube-master 54 | - new-master 55 | roles: 56 | - prepare/variables 57 | - kube-master 58 | - kube-worker 59 | 60 | # 标记各节点角色 61 | - hosts: 62 | - new-master 63 | roles: 64 | - prepare/variables 65 | - post -------------------------------------------------------------------------------- /83-add-etcd.yml: -------------------------------------------------------------------------------- 1 | - hosts: all 2 | gather_facts: false 3 | tasks: 4 | - name: "设置代理服务器环境变量" 5 | set_fact: 6 | proxy_env: 7 | http_proxy: "{{ http_proxy | default ('') }}" 8 | HTTP_PROXY: "{{ http_proxy | default ('') }}" 9 | https_proxy: "{{ https_proxy | default ('') }}" 10 | HTTPS_PROXY: "{{ https_proxy | default ('') }}" 11 | no_proxy: "{{ no_proxy | default ('') }}" 12 | NO_PROXY: "{{ no_proxy | default ('') }}" 13 | no_log: true 14 | 15 | - hosts: new-etcd[0] 16 | tasks: 17 | - name: 校验 new-etcd 节点组数量 18 | assert: 19 | that: groups['new-etcd']|length <= 1 20 | msg: "同时只能添加一个 etcd 节点。" 21 | 22 | - hosts: 23 | - new-etcd 24 | roles: 25 | - prepare/variables 26 | - prepare/base 27 | - prepare/container-engine 28 | - prepare/kubernetes 29 | environment: "{{ proxy_env }}" 30 | 31 | - hosts: 32 | - etcd 33 | - new-etcd 34 | roles: 35 | - prepare/variables 36 | - etcd/certificates 37 | - etcd/install 38 | 39 | # 添加etcd节点后刷新master节点kubeadm配置 40 | - hosts: 41 | - kube-master 42 | roles: 43 | - prepare/variables 44 | - kube-master 45 | 46 | # 标记各节点角色 47 | - hosts: 48 | - new-etcd 49 | roles: 50 | - prepare/variables 51 | - post 52 | 53 | - hosts: 54 | - kube-master 55 | roles: 56 | - prepare/variables 57 | tasks: 58 | - name: "更新第一个 master 节点:{{ inventory_hostname }} 的配置" 59 | shell: > 60 | kubeadm upgrade apply --config=/etc/kubernetes/kubeadm-config.yaml --force --ignore-preflight-errors=ImagePull 61 | when: inventory_hostname == groups['kube-master'][0] 62 | 63 | - name: "更新 master 节点:{{ inventory_hostname }} 的配置" 64 | shell: > 65 | kubeadm upgrade node 66 | {% if kube_version.split('.')[1]|int == 13 %} 67 | experimental-control-plane 68 | {% endif %} 69 | when: 70 | - inventory_hostname != groups['kube-master'][0] 71 | - inventory_hostname in groups['kube-master'] -------------------------------------------------------------------------------- /84-remove-worker.yml: -------------------------------------------------------------------------------- 1 | - hosts: 2 | - del-worker 3 | vars_prompt: 4 | name: "restore_confirmation" 5 | prompt: "确认要移除 worker 节点? 输入“yes”确认移除。" 6 | default: "no" 7 | private: no 8 | pre_tasks: 9 | - name: 移除确认 10 | fail: 11 | msg: "移除确认失败,取消移除 worker 节点操作。" 12 | when: restore_confirmation != "yes" 13 | roles: 14 | - prepare/variables 15 | - remove/worker -------------------------------------------------------------------------------- /85-remove-master.yml: -------------------------------------------------------------------------------- 1 | - hosts: 2 | - del-master 3 | vars_prompt: 4 | name: "restore_confirmation" 5 | prompt: "确认要移除 master 节点? 输入“yes”确认移除。" 6 | default: "no" 7 | private: no 8 | pre_tasks: 9 | - name: 移除确认 10 | fail: 11 | msg: "移除确认失败,取消移除 master 节点操作。" 12 | when: restore_confirmation != "yes" 13 | roles: 14 | - prepare/variables 15 | - remove/master -------------------------------------------------------------------------------- /86-remove-etcd.yml: -------------------------------------------------------------------------------- 1 | - hosts: 2 | - del-etcd 3 | vars_prompt: 4 | name: "restore_confirmation" 5 | prompt: "确认要移除 etcd 节点? 输入“yes”确认移除。" 6 | default: "no" 7 | private: no 8 | pre_tasks: 9 | - name: 移除确认 10 | fail: 11 | msg: "移除确认失败,取消移除 etcd 节点操作。" 12 | when: restore_confirmation != "yes" 13 | roles: 14 | - prepare/variables 15 | - remove/etcd 16 | 17 | # 删除etcd节点后刷新master节点kubeadm配置 18 | - hosts: 19 | - kube-master 20 | roles: 21 | - prepare/variables 22 | - kube-master 23 | 24 | - hosts: 25 | - kube-master 26 | roles: 27 | - prepare/variables 28 | tasks: 29 | - name: "更新第一个 master 节点:{{ inventory_hostname }} 的配置" 30 | shell: > 31 | kubeadm upgrade apply --config=/etc/kubernetes/kubeadm-config.yaml --force --ignore-preflight-errors=ImagePull 32 | when: inventory_hostname == groups['kube-master'][0] 33 | 34 | - name: "更新 master 节点:{{ inventory_hostname }} 的配置" 35 | shell: > 36 | kubeadm upgrade node 37 | {% if kube_version.split('.')[1]|int == 13 %} 38 | experimental-control-plane 39 | {% endif %} 40 | when: 41 | - inventory_hostname != groups['kube-master'][0] 42 | - inventory_hostname in groups['kube-master'] -------------------------------------------------------------------------------- /87-remove-node.yml: -------------------------------------------------------------------------------- 1 | - hosts: 2 | - del-node 3 | vars_prompt: 4 | name: "restore_confirmation" 5 | prompt: "确认要移除节点? 输入“yes”确认移除。" 6 | default: "no" 7 | private: no 8 | pre_tasks: 9 | - name: 移除确认 10 | fail: 11 | msg: "移除确认失败,取消移除节点操作。" 12 | when: restore_confirmation != "yes" 13 | roles: 14 | - prepare/variables 15 | - remove/node 16 | - reset -------------------------------------------------------------------------------- /90-init-cluster.yml: -------------------------------------------------------------------------------- 1 | # 校验节点软件系统及硬件系统、安装必要基础组件、docker、kubeadm、kubelet、kubectl 2 | - hosts: 3 | - all 4 | gather_facts: false 5 | tasks: 6 | - name: "设置代理服务器环境变量" 7 | set_fact: 8 | proxy_env: 9 | http_proxy: "{{ http_proxy | default ('') }}" 10 | HTTP_PROXY: "{{ http_proxy | default ('') }}" 11 | https_proxy: "{{ https_proxy | default ('') }}" 12 | HTTPS_PROXY: "{{ https_proxy | default ('') }}" 13 | no_proxy: "{{ no_proxy | default ('') }}" 14 | NO_PROXY: "{{ no_proxy | default ('') }}" 15 | no_log: true 16 | 17 | - hosts: 18 | - all 19 | roles: 20 | - prepare/variables 21 | - prepare/base 22 | - prepare/container-engine 23 | - prepare/kubernetes 24 | environment: "{{ proxy_env }}" 25 | 26 | # 进行 apiserver 负载均衡 27 | - hosts: 28 | - all 29 | roles: 30 | - prepare/variables 31 | - load-balancer 32 | 33 | - hosts: 34 | - all 35 | roles: 36 | - prepare/variables 37 | - chrony 38 | 39 | # 安装 etcd,并分发etcd证书到master节点 40 | - hosts: 41 | - etcd 42 | - new-etcd 43 | - kube-master 44 | - new-master 45 | roles: 46 | - prepare/variables 47 | - etcd/certificates 48 | - etcd/install 49 | 50 | # 生成 kubernetes 所需证书证书 51 | - hosts: 52 | - kube-master 53 | - new-master 54 | - kube-worker 55 | - new-worker 56 | roles: 57 | - prepare/variables 58 | - kube-certificates 59 | 60 | # 初始化master节点 61 | - hosts: 62 | - kube-master 63 | - new-master 64 | roles: 65 | - prepare/variables 66 | - kube-master 67 | 68 | # 初始化worker节点 69 | - hosts: 70 | - kube-worker 71 | - new-worker 72 | roles: 73 | - prepare/variables 74 | - kube-worker 75 | 76 | # 标记各节点角色 77 | - hosts: 78 | - kube-master 79 | - kube-worker 80 | - new-master 81 | - new-worker 82 | roles: 83 | - prepare/variables 84 | - post 85 | 86 | # 安装基础插件 87 | - hosts: 88 | - kube-master[0] 89 | roles: 90 | - prepare/variables 91 | - plugins/network-plugins 92 | - plugins/ingress-controller 93 | - plugins/metrics-server 94 | - plugins/kubernetes-dashboard 95 | - plugins/cert-manager -------------------------------------------------------------------------------- /91-upgrade-cluster.yml: -------------------------------------------------------------------------------- 1 | - hosts: kube-master[0] 2 | tasks: 3 | - name: 校验 kube_upgrade_version 是否设置 4 | assert: 5 | that: kube_upgrade_version is defined 6 | msg: "请设置变量 kube_upgrade_version。" 7 | 8 | - name: 获取当前 kubernetes 实际版本 9 | shell: "kubeadm version -o short" 10 | register: kubeadm_version_output 11 | 12 | - name: 校验 kube_upgrade_version 与当前 kubernetes 版本差异 13 | assert: 14 | that: 15 | - kubeadm_version_output.stdout is version('v{{ kube_upgrade_version }}', '<=') 16 | - "{{ kube_upgrade_version.split('.')[1]|int - kubeadm_version_output.stdout.split('.')[1]|int }} <= 1" 17 | msg: "请设置正确的升级版本号,次版本号只能升一个版本,不能跨版本升级(比如:1.13版本只能升级到1.14,不能直接升级1.15)。" 18 | 19 | - hosts: 20 | - all 21 | roles: 22 | - prepare/variables 23 | - backup 24 | 25 | - hosts: 26 | - all 27 | gather_facts: false 28 | tasks: 29 | - name: "设置代理服务器环境变量" 30 | set_fact: 31 | proxy_env: 32 | http_proxy: "{{ http_proxy | default ('') }}" 33 | HTTP_PROXY: "{{ http_proxy | default ('') }}" 34 | https_proxy: "{{ https_proxy | default ('') }}" 35 | HTTPS_PROXY: "{{ https_proxy | default ('') }}" 36 | no_proxy: "{{ no_proxy | default ('') }}" 37 | NO_PROXY: "{{ no_proxy | default ('') }}" 38 | no_log: true 39 | 40 | - hosts: 41 | - all 42 | serial: 1 43 | roles: 44 | - prepare/variables 45 | - upgrade 46 | environment: "{{ proxy_env }}" 47 | 48 | - hosts: 49 | - etcd 50 | - new-etcd 51 | - kube-master 52 | - new-master 53 | roles: 54 | - prepare/variables 55 | - etcd/certificates 56 | - kube-certificates -------------------------------------------------------------------------------- /92-certificates-renew.yml: -------------------------------------------------------------------------------- 1 | - hosts: 2 | - etcd 3 | - new-etcd 4 | - kube-master 5 | - new-master 6 | - kube-worker 7 | - new-worker 8 | roles: 9 | - prepare/variables 10 | - etcd/certificates 11 | - kube-certificates -------------------------------------------------------------------------------- /93-backup-cluster.yml: -------------------------------------------------------------------------------- 1 | - hosts: localhost 2 | become: no 3 | tasks: 4 | - name: "校验 Ansible 版本" 5 | assert: 6 | msg: "Ansible 版本最低要求 2.8.0,当前版本为 {{ ansible_version.string }},请升级 Ansible 版本。" 7 | that: 8 | - ansible_version.string is version("2.8.0", ">=") 9 | vars: 10 | ansible_connection: local 11 | 12 | # 备份各节点相关数据及文件 13 | - hosts: 14 | - all 15 | roles: 16 | - prepare/variables 17 | - backup -------------------------------------------------------------------------------- /94-restore-cluster.yml: -------------------------------------------------------------------------------- 1 | - hosts: localhost 2 | become: no 3 | tasks: 4 | - name: "校验 Ansible 版本" 5 | assert: 6 | msg: "Ansible 版本最低要求 2.8.0,当前版本为 {{ ansible_version.string }},请升级 Ansible 版本。" 7 | that: 8 | - ansible_version.string is version("2.8.0", ">=") 9 | vars: 10 | ansible_connection: local 11 | 12 | - hosts: 13 | - all 14 | vars_prompt: 15 | name: "restore_confirmation" 16 | prompt: "确认要恢复群集吗? 输入“yes”确认恢复群集。" 17 | default: "no" 18 | private: no 19 | pre_tasks: 20 | - name: 恢复确认 21 | fail: 22 | msg: "恢复确认失败,取消恢复集群操作。" 23 | when: restore_confirmation != "yes" 24 | gather_facts: false 25 | tasks: 26 | - name: "设置代理服务器环境变量" 27 | set_fact: 28 | proxy_env: 29 | http_proxy: "{{ http_proxy | default ('') }}" 30 | HTTP_PROXY: "{{ http_proxy | default ('') }}" 31 | https_proxy: "{{ https_proxy | default ('') }}" 32 | HTTPS_PROXY: "{{ https_proxy | default ('') }}" 33 | no_proxy: "{{ no_proxy | default ('') }}" 34 | NO_PROXY: "{{ no_proxy | default ('') }}" 35 | no_log: true 36 | 37 | - hosts: 38 | - all 39 | roles: 40 | - prepare/variables 41 | - prepare/base 42 | - prepare/container-engine 43 | - prepare/kubernetes 44 | - restore 45 | environment: "{{ proxy_env }}" -------------------------------------------------------------------------------- /99-reset-cluster.yml: -------------------------------------------------------------------------------- 1 | # 所有节点卸载安装 2 | - hosts: 3 | - all 4 | vars_prompt: 5 | name: "reset_confirmation" 6 | prompt: "确认要重置群集吗? 输入“yes”确认重置群集。" 7 | default: "no" 8 | private: no 9 | pre_tasks: 10 | - name: 重置确认 11 | fail: 12 | msg: "重置确认失败,取消重置集群操作。" 13 | when: reset_confirmation != "yes" 14 | roles: 15 | - prepare/variables 16 | - reset -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 setzero 2 | 3 | "Anti 996" License Version 1.0 (Draft) 4 | 5 | Permission is hereby granted to any individual or legal entity 6 | obtaining a copy of this licensed work (including the source code, 7 | documentation and/or related items, hereinafter collectively referred 8 | to as the "licensed work"), free of charge, to deal with the licensed 9 | work for any purpose, including without limitation, the rights to use, 10 | reproduce, modify, prepare derivative works of, distribute, publish 11 | and sublicense the licensed work, subject to the following conditions: 12 | 13 | 1. The individual or the legal entity must conspicuously display, 14 | without modification, this License and the notice on each redistributed 15 | or derivative copy of the Licensed Work. 16 | 17 | 2. The individual or the legal entity must strictly comply with all 18 | applicable laws, regulations, rules and standards of the jurisdiction 19 | relating to labor and employment where the individual is physically 20 | located or where the individual was born or naturalized; or where the 21 | legal entity is registered or is operating (whichever is stricter). In 22 | case that the jurisdiction has no such laws, regulations, rules and 23 | standards or its laws, regulations, rules and standards are 24 | unenforceable, the individual or the legal entity are required to 25 | comply with Core International Labor Standards. 26 | 27 | 3. The individual or the legal entity shall not induce, suggest or force 28 | its employee(s), whether full-time or part-time, or its independent 29 | contractor(s), in any methods, to agree in oral or written form, to 30 | directly or indirectly restrict, weaken or relinquish his or her 31 | rights or remedies under such laws, regulations, rules and standards 32 | relating to labor and employment as mentioned above, no matter whether 33 | such written or oral agreements are enforceable under the laws of the 34 | said jurisdiction, nor shall such individual or the legal entity 35 | limit, in any methods, the rights of its employee(s) or independent 36 | contractor(s) from reporting or complaining to the copyright holder or 37 | relevant authorities monitoring the compliance of the license about 38 | its violation(s) of the said license. 39 | 40 | THE LICENSED WORK IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 41 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 42 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 43 | IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, 44 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 45 | OTHERWISE, ARISING FROM, OUT OF OR IN ANY WAY CONNECTION WITH THE 46 | LICENSED WORK OR THE USE OR OTHER DEALINGS IN THE LICENSED WORK. -------------------------------------------------------------------------------- /ansible/homebrew-core/sshpass.rb: -------------------------------------------------------------------------------- 1 | require 'formula' 2 | 3 | class Sshpass < Formula 4 | url 'https://sourceforge.net/projects/sshpass/files/sshpass/1.07/sshpass-1.07.tar.gz' 5 | homepage 'https://sourceforge.net/projects/sshpass' 6 | sha256 '986973c8dd5d75ff0febde6c05c76c6d2b5c4269ec233e5518f14f0fd4e4aaef' 7 | 8 | def install 9 | system "./configure", "--disable-debug", "--disable-dependency-tracking", 10 | "--prefix=#{prefix}" 11 | system "make install" 12 | end 13 | 14 | def test 15 | system "sshpass" 16 | end 17 | end -------------------------------------------------------------------------------- /ansible/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | is_darwin() { 5 | case "$(uname -s)" in 6 | *darwin* ) true ;; 7 | *Darwin* ) true ;; 8 | * ) false;; 9 | esac 10 | } 11 | 12 | get_distribution() { 13 | lsb_dist="" 14 | # Every system that we officially support has /etc/os-release 15 | if [ -r /etc/os-release ]; then 16 | lsb_dist="$(. /etc/os-release && echo "$ID")" 17 | fi 18 | # Returning an empty string here should be alright since the 19 | # case statements don't act unless you provide an actual value 20 | echo "$lsb_dist" 21 | } 22 | 23 | do_install() { 24 | # perform some very rudimentary platform detection 25 | lsb_dist=$( get_distribution ) 26 | lsb_dist="$(echo "$lsb_dist" | tr '[:upper:]' '[:lower:]')" 27 | # Run setup for each distro accordingly 28 | case "$lsb_dist" in 29 | ubuntu|debian|raspbian) 30 | sudo apt-get update 31 | sudo apt-get install -y git python3-pip sshpass build-essential libssl-dev libffi-dev python3-dev 32 | sudo pip3 install -U pip -i https://mirrors.aliyun.com/pypi/simple/ 33 | sudo pip3 install --no-cache-dir ansible==2.10.4 netaddr -i https://mirrors.aliyun.com/pypi/simple/ 34 | exit 0 35 | ;; 36 | centos|fedora|rhel) 37 | sudo curl -sSLo /etc/yum.repos.d/epel.repo https://mirrors.aliyun.com/repo/epel-7.repo 38 | sudo yum install -y git python3-pip sshpass libffi-devel python3-devel openssl-devel 39 | sudo pip3 install -U pip -i https://mirrors.aliyun.com/pypi/simple/ 40 | sudo pip3 install --no-cache-dir ansible==2.10.4 netaddr -i https://mirrors.aliyun.com/pypi/simple/ 41 | exit 0 42 | ;; 43 | *) 44 | if [ -z "$lsb_dist" ]; then 45 | if is_darwin; then 46 | brew install ./ansible/homebrew-core/ansible.rb 47 | brew install ./ansible/homebrew-core/sshpass.rb 48 | exit 0 49 | fi 50 | fi 51 | echo 52 | echo "ERROR: Unsupported distribution '$lsb_dist'" 53 | echo 54 | exit 1 55 | ;; 56 | esac 57 | exit 1 58 | } 59 | 60 | do_install -------------------------------------------------------------------------------- /docs/01-集群安装.md: -------------------------------------------------------------------------------- 1 | ## 集群安装 2 | 3 | ### 节点信息 4 | 5 | | **ip** | **hostname** | **OS** | **kernel version** | **role** | 6 | | :-----------: | :----------: | :--------: | :----------------: | :----------------: | 7 | | 192.168.56.11 | node1 | CentOS 7.8 | 4.20.13-1 | master etcd worker | 8 | | 192.168.56.12 | node2 | CentOS 7.8 | 4.20.13-1 | master etcd worker | 9 | | 192.168.56.13 | node3 | CentOS 7.8 | 4.20.13-1 | master etcd worker | 10 | | 192.168.56.14 | node4 | CentOS 7.8 | 4.20.13-1 | worker | 11 | 12 | ### 安装 Ansible 运行环境 13 | 14 | ``` 15 | # 任意节点上安装 Ansible 运行环境 16 | sudo ansible/install.sh 17 | ``` 18 | 19 | ### 集群规划 20 | 21 | 参考本项目 `example` 文件夹下的主机清单文件(ansible inventory),修改各机器的访问地址、用户名、密码,并维护好各节点与角色的关系。文件中配置的用户必须是具有 **root** 权限的用户。项目预定义了6个例子,请完成集群规划后进行修改,生产环境建议一个节点只是一个角色。 22 | 23 | - 搭建集群后有以下两种“样式”显示,请自行选择: 24 | - 样式一 25 | ``` 26 | NAME STATUS ROLES AGE VERSION 27 | 192.168.56.11 Ready control-plane,etcd,master,worker 7m25s v1.21.14 28 | 192.168.56.12 Ready control-plane,etcd,master,worker 5m18s v1.21.14 29 | 192.168.56.13 Ready control-plane,etcd,master,worker 5m18s v1.21.14 30 | 192.168.56.14 Ready worker 4m37s v1.21.14 31 | ``` 32 | 33 | - 样式二 34 | ``` 35 | NAME STATUS ROLES AGE VERSION 36 | node1 Ready control-plane,etcd,master,worker 7m25s v1.21.14 37 | node2 Ready control-plane,etcd,master,worker 5m18s v1.21.14 38 | node3 Ready control-plane,etcd,master,worker 5m18s v1.21.14 39 | node4 Ready worker 4m37s v1.21.14 40 | ``` 41 | 42 | - 对应的 ansible inventory 配置文件示例如下: 43 | 44 | | 节点分配 | 样式一 | 样式二 | 45 | | :--------- | :---------------------------------------------------- | :---------------------------------------------------------------- | 46 | | 单节点 | [hosts.allinone.ip](../example/hosts.allinone.ip.ini) | [hosts.allinone.hostname](../example/hosts.allinone.hostname.ini) | 47 | | 单主多节点 | [hosts.s-master.ip](../example/hosts.s-master.ip.ini) | [hosts.s-master.hostname](../example/hosts.s-master.hostname.ini) | 48 | | 多主多节点 | [hosts.m-master.ip](../example/hosts.m-master.ip.ini) | [hosts.m-master.hostname](../example/hosts.m-master.hostname.ini) | 49 | 50 | ### 部署集群 51 | 52 | - **可选:** 升级内核,默认安装集群是不会升级内核的,若需升级内核,请执行(内核升级完成后请手动重启所有节点): 53 | ``` 54 | ansible-playbook -i example/hosts.m-master.ip.ini 00-kernel.yml 55 | ``` 56 | 57 | - 一句命令拥有一个高可用 kubernetes 集群: 58 | - 基本配置执行 59 | ``` 60 | ansible-playbook -i example/hosts.m-master.ip.ini 90-init-cluster.yml 61 | ``` 62 | 63 | - 高级配置执行,**注意:** 如果安装集群时使用高级配置则以后所有操作都需将 `-e @example/variables.yaml` 参数添加在 `ansible-playbook` 命令中 64 | ``` 65 | ansible-playbook -i example/hosts.m-master.ip.ini -e @example/variables.yaml 90-init-cluster.yml 66 | ``` 67 | 68 | > 1. 本项目所有可配置项都在 `example/variables.yaml` 文件中体现,需自定义配置时删除配置项前注释符即可。 69 | > 2. 若 `example/hosts.m-master.ip.ini` 文件中与 `example/variables.yaml` 变量值冲突时, `example/variables.yaml` 文件中的变量值优先级最高。 -------------------------------------------------------------------------------- /docs/02-节点管理.md: -------------------------------------------------------------------------------- 1 | ## 节点管理 2 | 3 | - [添加 worker 节点](02/添加%20worker%20节点.md) 4 | - [添加 master 节点](02/添加%20master%20节点.md) 5 | - [添加 etcd 节点](02/添加%20etcd%20节点.md) 6 | - [删除节点 master 角色](02/删除节点%20master%20角色.md) 7 | - [删除节点 worker 角色](02/删除节点%20worker%20角色.md) 8 | - [删除集群节点](02/删除集群节点.md) 9 | - [删除 etcd 节点](02/删除%20etcd%20节点.md) 10 | - [增加或删除 lb 节点](02/增加或删除%20lb%20节点.md) 11 | - [Docker切换为containerd](02/Docker切换为containerd.md) -------------------------------------------------------------------------------- /docs/02/Docker切换为containerd.md: -------------------------------------------------------------------------------- 1 | ## Docker 切换为 containerd 2 | 3 | ### 约定 4 | 5 | - Docker 切换为 containerd 指的是已有集群用的是 docker 作为运行时,现将运行时切换为 containerd。 6 | - 切换时会清除 docker 的所有数据,包括 image、containers,networks,卸载 docker。 7 | - 切换完成后,请耐心等待一段时间,所需时间长短与拉取镜像网络快慢有关。也可使用 [如何选择运行时组件](../09/如何选择运行时组件.md) 中描述的命令进行容器日志查看等操作。 8 | 9 | ### Docker 切换为 containerd 10 | 11 | - 修改 container_manager 变量值为 containerd 12 | 13 | - 基本配置执行 14 | ``` 15 | ansible-playbook -i example/hosts.m-master.ip.ini 31-docker-to-containerd.yml 16 | ``` 17 | 18 | - 高级配置执行 19 | ``` 20 | ansible-playbook -i example/hosts.m-master.ip.ini -e @example/variables.yaml 31-docker-to-containerd.yml 21 | ``` 22 | > 若 `example/hosts.m-master.ip.ini` 文件中与 `example/variables.yaml` 变量值冲突时, `example/variables.yaml` 文件中的变量值优先级最高。 -------------------------------------------------------------------------------- /docs/02/删除 etcd 节点.md: -------------------------------------------------------------------------------- 1 | ## 删除 etcd 节点 2 | 3 | - 在 `[del-etcd]` 节点组中添加需删除节点信息 4 | ```diff 5 | ... 6 | [del-etcd] 7 | + node1 8 | ... 9 | ``` 10 | 11 | - 执行 etcd 节点删除操作 12 | - 基本配置执行 13 | ``` 14 | ansible-playbook -i example/hosts.m-master.ip.ini 86-remove-etcd.yml 15 | ``` 16 | 17 | - **注意:** 如果安装集群时使用高级配置,则使用该命 18 | ``` 19 | ansible-playbook -i example/hosts.m-master.ip.ini -e @example/variables.yaml 86-remove-etcd.yml 20 | ``` 21 | 22 | > 若 `example/hosts.m-master.ip.ini` 文件中与 `example/variables.yaml` 参数冲突时, `example/variables.yaml` 文件中的变量值优先级最高。 23 | 24 | - 删除完成后,更新 `[del-etcd]` 节点组以及 `[etcd]` 节点组 25 | ```diff 26 | ... 27 | [etcd] 28 | - node1 29 | node2 30 | node3 31 | node5 32 | ... 33 | [del-etcd] 34 | - node1 35 | ... 36 | ``` 37 | -------------------------------------------------------------------------------- /docs/02/删除节点 master 角色.md: -------------------------------------------------------------------------------- 1 | ## 删除节点 master 角色 2 | 3 | - 删除节点 master 角色,则该节点将转换为 worker 角色节点 4 | 5 | - 在 `[del-master]` 节点组中添加需删除节点信息 6 | ```diff 7 | ... 8 | [del-master] 9 | + node6 10 | ... 11 | ``` 12 | 13 | - 执行 master 节点删除操作 14 | - 基本配置执行 15 | ``` 16 | ansible-playbook -i example/hosts.m-master.ip.ini 85-remove-master.yml 17 | ``` 18 | 19 | - **注意:** 如果安装集群时使用高级配置,则使用该命令 20 | ``` 21 | ansible-playbook -i example/hosts.m-master.ip.ini -e @example/variables.yaml 85-remove-master.yml 22 | ``` 23 | 24 | > 若 `example/hosts.m-master.ip.ini` 文件中与 `example/variables.yaml` 参数冲突时, `example/variables.yaml` 文件中的变量值优先级最高。 25 | 26 | - 删除完成后,更新 `[del-master]` 、`[kube-master]` 节点组以及 `[kube-worker]` 节点组 27 | ```diff 28 | ... 29 | [kube-master] 30 | node1 31 | node2 32 | node3 33 | - node6 34 | ... 35 | [kube-worker] 36 | node1 37 | node2 38 | node3 39 | + node6 40 | ... 41 | [del-master] 42 | - node6 43 | ... 44 | ``` -------------------------------------------------------------------------------- /docs/02/删除节点 worker 角色.md: -------------------------------------------------------------------------------- 1 | ## 删除节点 worker 角色 2 | 3 | - **此操作仅为移除 worker 角色操作,并不会完全将该节点移出集群**;若需移出集群,请看本文 `删除节点` 操作 4 | 5 | - 在 `[del-worker]` 节点组中添加需删除角色的节点信息 6 | ```diff 7 | ... 8 | [del-worker] 9 | + node7 10 | ... 11 | ``` 12 | 13 | - 执行 worker 角色删除操作 14 | - 基本配置执行 15 | ``` 16 | ansible-playbook -i example/hosts.m-master.ip.ini 84-remove-worker.yml 17 | ``` 18 | 19 | - **注意:** 如果安装集群时使用高级配置,则使用该命令 20 | ``` 21 | ansible-playbook -i example/hosts.m-master.ip.ini -e @example/variables.yaml 84-remove-worker.yml 22 | ``` 23 | 24 | > 若 `example/hosts.m-master.ip.ini` 文件中与 `example/variables.yaml` 参数冲突时, `example/variables.yaml` 文件中的变量值优先级最高。 25 | 26 | - 删除完成后,更新 `[del-worker]` 节点组以及 `[kube-worker]` 节点组 27 | ```diff 28 | ... 29 | [kube-worker] 30 | node1 31 | node2 32 | node3 33 | node4 34 | - node7 35 | ... 36 | [del-worker] 37 | - node7 38 | ... 39 | ``` -------------------------------------------------------------------------------- /docs/02/删除集群节点.md: -------------------------------------------------------------------------------- 1 | ## 删除节点 2 | 3 | - 此为完全移除节点操作,操作前请先移除该节点所有角色。 4 | 5 | - 在 `[del-node]` 节点组中添加需删除节点的信息 6 | ```diff 7 | ... 8 | [del-node] 9 | + node1 10 | ... 11 | ``` 12 | 13 | - 执行节点删除操作 14 | - 基本配置执行 15 | ``` 16 | ansible-playbook -i example/hosts.m-master.ip.ini 87-remove-node.yml 17 | ``` 18 | 19 | - **注意:** 如果安装集群时使用高级配置,则使用该命令 20 | ``` 21 | ansible-playbook -i example/hosts.m-master.ip.ini -e @example/variables.yaml 87-remove-node.yml 22 | ``` 23 | 24 | > 若 `example/hosts.m-master.ip.ini` 文件中与 `example/variables.yaml` 参数冲突时, `example/variables.yaml` 文件中的变量值优先级最高。 25 | 26 | - 删除完成后,更新 `[kube-worker]` 节点组以及 `[del-node]` 节点组 27 | ```diff 28 | ... 29 | [kube-worker] 30 | - node1 31 | node2 32 | node3 33 | node5 34 | ... 35 | [del-node] 36 | - node1 37 | ... 38 | ``` -------------------------------------------------------------------------------- /docs/02/增加或删除 lb 节点.md: -------------------------------------------------------------------------------- 1 | ## 新节点信息 2 | 3 | | **ip** | **hostname** | **OS** | **kernel version** | **role** | 4 | | :-----------: | :----------: | :--------: | :----------------: | :------: | 5 | | 192.168.56.18 | node8 | CentOS 7.4 | 4.20.13-1 | lb | 6 | | 192.168.56.19 | node9 | CentOS 7.4 | 4.20.13-1 | lb | 7 | 8 | ## 增加或删除 lb 节点 9 | 10 | - 添加或删除 lb 节点都按下面方式进行,维护好节点组信息即可 11 | 12 | - 在 `[lb]` 节点组中添加或删除节点信息 13 | ```diff 14 | ... 15 | [lb] 16 | + node8 17 | + node9 18 | ... 19 | ``` 20 | 21 | - 执行 lb 节点添加或删除操作 22 | - 基本配置执行 23 | ``` 24 | ansible-playbook -i example/hosts.m-master.ip.ini 04-load-balancer.yml 25 | ``` 26 | 27 | - **注意:** 如果安装集群时使用高级配置,则使用该命令 28 | ``` 29 | ansible-playbook -i example/hosts.m-master.ip.ini -e @example/variables.yaml 04-load-balancer.yml 30 | ``` 31 | 32 | > 若 `example/hosts.m-master.ip.ini` 文件中与 `example/variables.yaml` 参数冲突时, `example/variables.yaml` 文件中的变量值优先级最高。 -------------------------------------------------------------------------------- /docs/02/添加 etcd 节点.md: -------------------------------------------------------------------------------- 1 | ## 新节点信息 2 | 3 | | **ip** | **hostname** | **OS** | **kernel version** | **role** | 4 | | :-----------: | :----------: | :--------: | :----------------: | :------: | 5 | | 192.168.56.15 | node5 | CentOS 7.4 | 4.20.13-1 | etcd | 6 | 7 | ## 添加 etcd 节点 8 | 9 | **注意:** 同时只能添加一个 etcd 节点。 10 | 11 | - 编辑原有主机清单文件,在 `[all]` 节点组中添加新节点信息 12 | ```diff 13 | [all] 14 | ... 15 | + node5 ansible_host=192.168.56.15 ansible_user=vagrant ansible_ssh_pass=vagrant 16 | ... 17 | ``` 18 | 19 | - 在 `[new-etcd]` 节点组中添加新节点信息 20 | ```diff 21 | ... 22 | [new-etcd] 23 | + node5 24 | ... 25 | ``` 26 | 27 | - 执行 etcd 节点添加操作 28 | - 基本配置执行 29 | ``` 30 | ansible-playbook -i example/hosts.m-master.ip.ini 83-add-etcd.yml 31 | ``` 32 | 33 | - **注意:** 如果安装集群时使用高级配置,则使用该命令 34 | ``` 35 | ansible-playbook -i example/hosts.m-master.ip.ini -e @example/variables.yaml 83-add-etcd.yml 36 | ``` 37 | 38 | > 若 `example/hosts.m-master.ip.ini` 文件中与 `example/variables.yaml` 参数冲突时, `example/variables.yaml` 文件中的变量值优先级最高。 39 | 40 | - 添加完成后,将`[new-etcd]` 节点组中新节点信息移动至 `[etcd]` 节点组 41 | ```diff 42 | ... 43 | [etcd] 44 | node1 45 | node2 46 | node3 47 | + node5 48 | ... 49 | [new-etcd] 50 | - node5 51 | ... 52 | ``` -------------------------------------------------------------------------------- /docs/02/添加 master 节点.md: -------------------------------------------------------------------------------- 1 | ## 新节点信息 2 | 3 | | **ip** | **hostname** | **OS** | **kernel version** | **role** | 4 | | :-----------: | :----------: | :--------: | :----------------: | :------: | 5 | | 192.168.56.16 | node6 | CentOS 7.4 | 4.20.13-1 | master | 6 | 7 | ## 添加 master 节点 8 | 9 | - 编辑原有主机清单文件,在 `[all]` 节点组中添加新节点信息 10 | ```diff 11 | [all] 12 | ... 13 | + node6 ansible_host=192.168.56.16 ansible_user=vagrant ansible_ssh_pass=vagrant 14 | ... 15 | ``` 16 | 17 | - 在 `[new-master]` 节点组中添加新节点信息 18 | ```diff 19 | ... 20 | [new-master] 21 | + node6 22 | ... 23 | ``` 24 | 25 | - 执行 master 节点添加操作 26 | - 基本配置执行 27 | ``` 28 | ansible-playbook -i example/hosts.m-master.ip.ini 82-add-master.yml 29 | ``` 30 | 31 | - **注意:** 如果安装集群时使用高级配置,则使用该命令 32 | ``` 33 | ansible-playbook -i example/hosts.m-master.ip.ini -e @example/variables.yaml 82-add-master.yml 34 | ``` 35 | 36 | > 若 `example/hosts.m-master.ip.ini` 文件中与 `example/variables.yaml` 参数冲突时, `example/variables.yaml` 文件中的变量值优先级最高。 37 | 38 | - 添加完成后,将`[new-master]` 节点组中新节点信息移动至 `[kube-master]` 节点组 39 | ```diff 40 | ... 41 | [kube-master] 42 | node1 43 | node2 44 | node3 45 | + node6 46 | ... 47 | [new-master] 48 | - node6 49 | ... 50 | ``` -------------------------------------------------------------------------------- /docs/02/添加 worker 节点.md: -------------------------------------------------------------------------------- 1 | ## 新节点信息 2 | 3 | | **ip** | **hostname** | **OS** | **kernel version** | **role** | 4 | | :-----------: | :----------: | :--------: | :----------------: | :------: | 5 | | 192.168.56.17 | node7 | CentOS 7.4 | 4.20.13-1 | worker | 6 | 7 | ## 添加 worker 节点 8 | 9 | - 编辑原有主机清单文件,在 `[all]` 节点组中添加新节点信息 10 | ```diff 11 | [all] 12 | ... 13 | + node7 ansible_host=192.168.56.17 ansible_user=vagrant ansible_ssh_pass=vagrant 14 | ... 15 | ``` 16 | 17 | - 在 `[new-worker]` 节点组中添加新节点信息 18 | ```diff 19 | ... 20 | [new-worker] 21 | + node7 22 | ... 23 | ``` 24 | 25 | - 执行 worker 节点添加操作 26 | - 基本配置执行 27 | ``` 28 | ansible-playbook -i example/hosts.m-master.ip.ini 81-add-worker.yml 29 | ``` 30 | 31 | - **注意:** 如果安装集群时使用高级配置,则使用该命令 32 | ``` 33 | ansible-playbook -i example/hosts.m-master.ip.ini -e @example/variables.yaml 81-add-worker.yml 34 | ``` 35 | 36 | > 若 `example/hosts.m-master.ip.ini` 文件中与 `example/variables.yaml` 参数冲突时, `example/variables.yaml` 文件中的变量值优先级最高。 37 | 38 | - 添加完成后,将 `[new-worker]` 节点组中新节点信息移动至 `[kube-worker]` 节点组 39 | ```diff 40 | ... 41 | [kube-worker] 42 | node1 43 | node2 44 | node3 45 | node4 46 | + node7 47 | ... 48 | [new-worker] 49 | - node7 50 | ... 51 | ``` -------------------------------------------------------------------------------- /docs/03-证书轮换.md: -------------------------------------------------------------------------------- 1 | ## 证书轮换 2 | 3 | ### 约定 4 | 5 | - 默认 kubernetes、etcd 的 ca 根证书有效期为 100 年,该 ca 根证书签发的证书有效期为 10 年,若需自定义请在 `example/variables.yaml` 文件中进行修改。 6 | - 证书轮换后请手动重启 docker 和 kubelet 以应用新的证书。 7 | 8 | ### 证书轮换 9 | - 基本配置执行 10 | ``` 11 | ansible-playbook -i example/hosts.m-master.ip.ini 92-certificates-renew.yml 12 | ``` 13 | 14 | - **注意:** 如果安装集群时使用高级配置,则使用该命令进行证书轮换 15 | ``` 16 | ansible-playbook -i example/hosts.m-master.ip.ini -e @example/variables.yaml 92-certificates-renew.yml 17 | ``` -------------------------------------------------------------------------------- /docs/04-集群升级.md: -------------------------------------------------------------------------------- 1 | ## 集群升级 2 | 3 | ### 预备知识 4 | 5 | - 版本格式:主版本号.次版本号.修订号,版本号递增规则如下: 6 | - 主版本号:当你做了不兼容的 API 修改, 7 | - 次版本号:当你做了向下兼容的功能性新增, 8 | - 修订号:当你做了向下兼容的问题修正。 9 | 10 | 先行版本号及版本编译元数据可以加到“主版本号.次版本号.修订号”的后面,作为延伸。 11 | 12 | ### 升级集群 13 | 14 | - 由于 kubeadm 约束,不能跨次版本号升级,即 1.13 版本只能升级至 1.14 ,不能直接升级 1.15 15 | - 升级kubernetes版本,执行: 16 | - 基本配置执行 17 | ``` 18 | # 请注意替换用下面命令中版本号x部分为实际版本 19 | ansible-playbook -i example/hosts.m-master.ip.ini -e kube_upgrade_version=1.15.x 91-upgrade-cluster.yml 20 | ``` 21 | 22 | - **注意:** 如果安装集群时使用高级配置,则使用该命令升级集群 23 | ``` 24 | # 将变量 kube_upgrade_version 添加在 example/variables.yaml 文件中 25 | ansible-playbook -i example/hosts.m-master.ip.ini -e @example/variables.yaml 91-upgrade-cluster.yml 26 | ``` 27 | - 升级完成后修改 `example/hosts.m-master.ip.ini` 文件中 `kube_version` 值为升级后的版本。 -------------------------------------------------------------------------------- /docs/05-集群备份.md: -------------------------------------------------------------------------------- 1 | ## 集群备份 2 | 3 | ### 约定 4 | 5 | - 备份后将在 play-book 所在目录中生成 cluster-backup 文件夹,文件夹中为各个节点 /etc/kubernetes 、 kubelet 配置文件和 etcd 快照的压缩包,命名格式为`-kubernetes.orig..tar.gz` 6 | 7 | ### 集群备份 8 | - 基本配置执行 9 | ``` 10 | ansible-playbook -i example/hosts.m-master.ip.ini 93-backup-cluster.yml 11 | ``` 12 | 13 | - **注意:** 如果安装集群时使用高级配置,则使用该命令进行集群备份 14 | ``` 15 | ansible-playbook -i example/hosts.m-master.ip.ini -e @example/variables.yaml 93-backup-cluster.yml 16 | ``` -------------------------------------------------------------------------------- /docs/06-集群恢复.md: -------------------------------------------------------------------------------- 1 | ## 集群恢复 2 | 3 | ### 约定 4 | 5 | - 节点“干净”,若节点中 docker 或 kubelet 已安装或版本不统一,建议全部卸载。在执行集群恢复时脚本会进行安装 docker 以及 kubelet。 6 | - 进行集群恢复时读取的备份为 play-book 所在目录中的 cluster-backup 文件夹,即备份集群时生成的备份文件。 7 | 8 | ### 集群恢复 9 | 10 | - 基本配置执行 11 | ``` 12 | ansible-playbook -i example/hosts.m-master.ip.ini 94-restore-cluster.yml 13 | ``` 14 | 15 | - 高级配置执行 16 | ``` 17 | ansible-playbook -i example/hosts.m-master.ip.ini -e @example/variables.yaml 94-restore-cluster.yml 18 | ``` 19 | > 若 `example/hosts.m-master.ip.ini` 文件中与 `example/variables.yaml` 变量值冲突时, `example/variables.yaml` 文件中的变量值优先级最高。 -------------------------------------------------------------------------------- /docs/07-集群重置.md: -------------------------------------------------------------------------------- 1 | ## 集群重置 2 | 3 | ### 如果部署失败,想要重置集群(所有数据),执行: 4 | 5 | - 若安装集群时未使用 `example/variables.yaml` 文件 6 | ``` 7 | ansible-playbook -i example/hosts.m-master.ip.ini 99-reset-cluster.yml 8 | ``` 9 | 10 | - 若安装集群时使用了 `example/variables.yaml` 文件 11 | ``` 12 | ansible-playbook -i example/hosts.m-master.ip.ini -e @example/variables.yaml 99-reset-cluster.yml 13 | ``` 14 | 15 | > 若 `example/hosts.m-master.ip.ini` 文件中与 `example/variables.yaml` 变量值冲突时, `example/variables.yaml` 文件中的变量值优先级最高。 16 | 17 | - 重置集群后建议重启所有节点,以确保所有残留文件被清理干净 -------------------------------------------------------------------------------- /docs/09-扩展阅读.md: -------------------------------------------------------------------------------- 1 | ## 扩展阅读 2 | 3 | - [如何选择运行时组件](09/如何选择运行时组件.md) 4 | -------------------------------------------------------------------------------- /example/hosts.allinone.hostname.ini: -------------------------------------------------------------------------------- 1 | ; 将所有节点的信息在这里填写 2 | ; 第一个字段 为 kubernetes 节点 nodeName,注意必须由小写字母、数字,“-”或“.”组成,并且必须以小写字母或数字开头和结尾 3 | ; 第二个字段 ansible_host 为节点内网IP 4 | ; 第三个字段 ansible_port 为节点 sshd 监听端口 5 | ; 第四个字段 ansible_user 为节点远程登录用户名 6 | ; 第五个字段 ansible_ssh_pass 为节点远程登录用户密码 7 | [all] 8 | node1 ansible_host=192.168.56.11 ansible_port=22 ansible_user="vagrant" ansible_ssh_pass="vagrant" 9 | 10 | ; 单节点lb节点组留空。 11 | [lb] 12 | 13 | ; 注意etcd集群必须是1,3,5,7...奇数个节点 14 | [etcd] 15 | node1 16 | 17 | [kube-master] 18 | node1 19 | 20 | [kube-worker] 21 | node1 22 | 23 | ; 预留组,后续添加master节点使用 24 | [new-master] 25 | 26 | ; 预留组,后续添加worker节点使用 27 | [new-worker] 28 | 29 | ; 预留组,后续添加etcd节点使用 30 | [new-etcd] 31 | 32 | ; 预留组,后续删除worker角色使用 33 | [del-worker] 34 | 35 | ; 预留组,后续删除master角色使用 36 | [del-master] 37 | 38 | ; 预留组,后续删除etcd角色使用 39 | [del-etcd] 40 | 41 | ; 预留组,后续删除节点使用 42 | [del-node] 43 | 44 | ;-------------------------------------- 以下为基础信息配置 ------------------------------------; 45 | [all:vars] 46 | ; 是否跳过节点物理资源校验,Master节点要求2c2g以上,Worker节点要求2c4g以上 47 | skip_verify_node=false 48 | ; kubernetes版本 49 | kube_version="1.21.14" 50 | 51 | ; 容器运行时类型,可选项:containerd,docker;默认 containerd 52 | container_manager="containerd" 53 | 54 | ; 负载均衡器 55 | ; 有 nginx、openresty、haproxy、envoy 和 slb 可选,默认使用 nginx 56 | ; 为什么单节点 apiserver 也使用了负载均衡请参与此讨论: https://github.com/TimeBye/kubeadm-ha/issues/8 57 | lb_mode="nginx" 58 | ; 使用负载均衡后集群 apiserver port 59 | lb_kube_apiserver_port="8443" 60 | 61 | ; 网段选择:pod 和 service 的网段不能与服务器网段重叠, 62 | ; 若有重叠请配置 `kube_pod_subnet` 和 `kube_service_subnet` 变量设置 pod 和 service 的网段,示例参考: 63 | ; 如果服务器网段为:10.0.0.1/8 64 | ; pod 网段可设置为:192.168.0.0/18 65 | ; service 网段可设置为 192.168.64.0/18 66 | ; 如果服务器网段为:172.16.0.1/12 67 | ; pod 网段可设置为:10.244.0.0/18 68 | ; service 网段可设置为 10.244.64.0/18 69 | ; 如果服务器网段为:192.168.0.1/16 70 | ; pod 网段可设置为:10.244.0.0/18 71 | ; service 网段可设置为 10.244.64.0/18 72 | ; 集群pod ip段,默认掩码位 18 即 16384 个ip 73 | kube_pod_subnet="10.244.0.0/18" 74 | ; 集群service ip段 75 | kube_service_subnet="10.244.64.0/18" 76 | ; 分配给节点的 pod 子网掩码位,默认为 24 即 256 个ip,故使用这些默认值可以纳管 16384/256=64 个节点。 77 | kube_network_node_prefix="24" 78 | 79 | ; node节点最大 pod 数。数量与分配给节点的 pod 子网有关,ip 数应大于 pod 数。 80 | ; https://cloud.google.com/kubernetes-engine/docs/how-to/flexible-pod-cidr 81 | kube_max_pods="110" 82 | 83 | ; 集群网络插件,目前支持flannel,calico 84 | network_plugin="calico" 85 | 86 | ; 若服务器磁盘分为系统盘与数据盘,请修改以下路径至数据盘自定义的目录。 87 | ; Kubelet 根目录 88 | kubelet_root_dir="/var/lib/kubelet" 89 | ; docker容器存储目录 90 | docker_storage_dir="/var/lib/docker" 91 | ; containerd容器存储目录 92 | containerd_storage_dir="/var/lib/containerd" 93 | ; Etcd 数据根目录 94 | etcd_data_dir="/var/lib/etcd" -------------------------------------------------------------------------------- /example/hosts.allinone.ip.ini: -------------------------------------------------------------------------------- 1 | ; 将所有节点的信息在这里填写 2 | ; 第一个字段 为节点内网IP,部署完成后为 kubernetes 节点 nodeName 3 | ; 第二个字段 ansible_port 为节点 sshd 监听端口 4 | ; 第三个字段 ansible_user 为节点远程登录用户名 5 | ; 第四个字段 ansible_ssh_pass 为节点远程登录用户密码 6 | [all] 7 | 192.168.56.11 ansible_port=22 ansible_user="vagrant" ansible_ssh_pass="vagrant" 8 | 9 | ; 单节点lb节点组留空。 10 | [lb] 11 | 12 | ; 注意etcd集群必须是1,3,5,7...奇数个节点 13 | [etcd] 14 | 192.168.56.11 15 | 16 | [kube-master] 17 | 192.168.56.11 18 | 19 | [kube-worker] 20 | 192.168.56.11 21 | 22 | ; 预留组,后续添加master节点使用 23 | [new-master] 24 | 25 | ; 预留组,后续添加worker节点使用 26 | [new-worker] 27 | 28 | ; 预留组,后续添加etcd节点使用 29 | [new-etcd] 30 | 31 | ; 预留组,后续删除worker角色使用 32 | [del-worker] 33 | 34 | ; 预留组,后续删除master角色使用 35 | [del-master] 36 | 37 | ; 预留组,后续删除etcd角色使用 38 | [del-etcd] 39 | 40 | ; 预留组,后续删除节点使用 41 | [del-node] 42 | 43 | ;-------------------------------------- 以下为基础信息配置 ------------------------------------; 44 | [all:vars] 45 | ; 是否跳过节点物理资源校验,Master节点要求2c2g以上,Worker节点要求2c4g以上 46 | skip_verify_node=false 47 | ; kubernetes版本 48 | kube_version="1.21.14" 49 | 50 | ; 容器运行时类型,可选项:containerd,docker;默认 containerd 51 | container_manager="containerd" 52 | 53 | ; 负载均衡器 54 | ; 有 nginx、openresty、haproxy、envoy 和 slb 可选,默认使用 nginx 55 | ; 为什么单节点 apiserver 也使用了负载均衡请参与此讨论: https://github.com/TimeBye/kubeadm-ha/issues/8 56 | lb_mode="nginx" 57 | ; 使用负载均衡后集群 apiserver port 58 | lb_kube_apiserver_port="8443" 59 | 60 | ; 网段选择:pod 和 service 的网段不能与服务器网段重叠, 61 | ; 若有重叠请配置 `kube_pod_subnet` 和 `kube_service_subnet` 变量设置 pod 和 service 的网段,示例参考: 62 | ; 如果服务器网段为:10.0.0.1/8 63 | ; pod 网段可设置为:192.168.0.0/18 64 | ; service 网段可设置为 192.168.64.0/18 65 | ; 如果服务器网段为:172.16.0.1/12 66 | ; pod 网段可设置为:10.244.0.0/18 67 | ; service 网段可设置为 10.244.64.0/18 68 | ; 如果服务器网段为:192.168.0.1/16 69 | ; pod 网段可设置为:10.244.0.0/18 70 | ; service 网段可设置为 10.244.64.0/18 71 | ; 集群pod ip段,默认掩码位 18 即 16384 个ip 72 | kube_pod_subnet="10.244.0.0/18" 73 | ; 集群service ip段 74 | kube_service_subnet="10.244.64.0/18" 75 | ; 分配给节点的 pod 子网掩码位,默认为 24 即 256 个ip,故使用这些默认值可以纳管 16384/256=64 个节点。 76 | kube_network_node_prefix="24" 77 | 78 | ; node节点最大 pod 数。数量与分配给节点的 pod 子网有关,ip 数应大于 pod 数。 79 | ; https://cloud.google.com/kubernetes-engine/docs/how-to/flexible-pod-cidr 80 | kube_max_pods="110" 81 | 82 | ; 集群网络插件,目前支持flannel,calico 83 | network_plugin="calico" 84 | 85 | ; 若服务器磁盘分为系统盘与数据盘,请修改以下路径至数据盘自定义的目录。 86 | ; Kubelet 根目录 87 | kubelet_root_dir="/var/lib/kubelet" 88 | ; docker容器存储目录 89 | docker_storage_dir="/var/lib/docker" 90 | ; containerd容器存储目录 91 | containerd_storage_dir="/var/lib/containerd" 92 | ; Etcd 数据根目录 93 | etcd_data_dir="/var/lib/etcd" -------------------------------------------------------------------------------- /example/hosts.s-master.hostname.ini: -------------------------------------------------------------------------------- 1 | ; 将所有节点的信息在这里填写 2 | ; 第一个字段 为 kubernetes 节点 nodeName,注意必须由小写字母、数字,“-”或“.”组成,并且必须以小写字母或数字开头和结尾 3 | ; 第二个字段 ansible_host 为节点内网IP 4 | ; 第三个字段 ansible_port 为节点 sshd 监听端口 5 | ; 第四个字段 ansible_user 为节点远程登录用户名 6 | ; 第五个字段 ansible_ssh_pass 为节点远程登录用户密码 7 | [all] 8 | node1 ansible_host=192.168.56.11 ansible_port=22 ansible_user="vagrant" ansible_ssh_pass="vagrant" 9 | node2 ansible_host=192.168.56.12 ansible_port=22 ansible_user="vagrant" ansible_ssh_pass="vagrant" 10 | node3 ansible_host=192.168.56.13 ansible_port=22 ansible_user="vagrant" ansible_ssh_pass="vagrant" 11 | node4 ansible_host=192.168.56.14 ansible_port=22 ansible_user="vagrant" ansible_ssh_pass="vagrant" 12 | 13 | ; 单 master 节点不需要进行负载均衡,lb节点组留空。 14 | [lb] 15 | 16 | ; 注意etcd集群必须是1,3,5,7...奇数个节点 17 | [etcd] 18 | node1 19 | node2 20 | node3 21 | 22 | [kube-master] 23 | node1 24 | 25 | [kube-worker] 26 | node1 27 | node2 28 | node3 29 | node4 30 | 31 | ; 预留组,后续添加master节点使用 32 | [new-master] 33 | 34 | ; 预留组,后续添加worker节点使用 35 | [new-worker] 36 | 37 | ; 预留组,后续添加etcd节点使用 38 | [new-etcd] 39 | 40 | ; 预留组,后续删除worker角色使用 41 | [del-worker] 42 | 43 | ; 预留组,后续删除master角色使用 44 | [del-master] 45 | 46 | ; 预留组,后续删除etcd角色使用 47 | [del-etcd] 48 | 49 | ; 预留组,后续删除节点使用 50 | [del-node] 51 | 52 | ;-------------------------------------- 以下为基础信息配置 ------------------------------------; 53 | [all:vars] 54 | ; 是否跳过节点物理资源校验,Master节点要求2c2g以上,Worker节点要求2c4g以上 55 | skip_verify_node=false 56 | ; kubernetes版本 57 | kube_version="1.21.14" 58 | 59 | ; 容器运行时类型,可选项:containerd,docker;默认 containerd 60 | container_manager="containerd" 61 | 62 | ; 负载均衡器 63 | ; 有 nginx、openresty、haproxy、envoy 和 slb 可选,默认使用 nginx 64 | ; 为什么单 master 集群 apiserver 也使用了负载均衡请参与此讨论: https://github.com/TimeBye/kubeadm-ha/issues/8 65 | lb_mode="nginx" 66 | ; 使用负载均衡后集群 apiserver ip,设置 lb_kube_apiserver_ip 变量,则启用负载均衡器 + keepalived 67 | ; lb_kube_apiserver_ip="192.168.56.15" 68 | ; 使用负载均衡后集群 apiserver port 69 | lb_kube_apiserver_port="8443" 70 | 71 | ; 网段选择:pod 和 service 的网段不能与服务器网段重叠, 72 | ; 若有重叠请配置 `kube_pod_subnet` 和 `kube_service_subnet` 变量设置 pod 和 service 的网段,示例参考: 73 | ; 如果服务器网段为:10.0.0.1/8 74 | ; pod 网段可设置为:192.168.0.0/18 75 | ; service 网段可设置为 192.168.64.0/18 76 | ; 如果服务器网段为:172.16.0.1/12 77 | ; pod 网段可设置为:10.244.0.0/18 78 | ; service 网段可设置为 10.244.64.0/18 79 | ; 如果服务器网段为:192.168.0.1/16 80 | ; pod 网段可设置为:10.244.0.0/18 81 | ; service 网段可设置为 10.244.64.0/18 82 | ; 集群pod ip段,默认掩码位 18 即 16384 个ip 83 | kube_pod_subnet="10.244.0.0/18" 84 | ; 集群service ip段 85 | kube_service_subnet="10.244.64.0/18" 86 | ; 分配给节点的 pod 子网掩码位,默认为 24 即 256 个ip,故使用这些默认值可以纳管 16384/256=64 个节点。 87 | kube_network_node_prefix="24" 88 | 89 | ; node节点最大 pod 数。数量与分配给节点的 pod 子网有关,ip 数应大于 pod 数。 90 | ; https://cloud.google.com/kubernetes-engine/docs/how-to/flexible-pod-cidr 91 | kube_max_pods="110" 92 | 93 | ; 集群网络插件,目前支持flannel,calico 94 | network_plugin="calico" 95 | 96 | ; 若服务器磁盘分为系统盘与数据盘,请修改以下路径至数据盘自定义的目录。 97 | ; Kubelet 根目录 98 | kubelet_root_dir="/var/lib/kubelet" 99 | ; docker容器存储目录 100 | docker_storage_dir="/var/lib/docker" 101 | ; containerd容器存储目录 102 | containerd_storage_dir="/var/lib/containerd" 103 | ; Etcd 数据根目录 104 | etcd_data_dir="/var/lib/etcd" -------------------------------------------------------------------------------- /example/hosts.s-master.ip.ini: -------------------------------------------------------------------------------- 1 | ; 将所有节点信息在这里填写 2 | ; 第一个字段 为远程服务器内网IP 3 | ; 第二个字段 ansible_port 为节点 sshd 监听端口 4 | ; 第三个字段 ansible_user 为节点远程登录用户名 5 | ; 第四个字段 ansible_ssh_pass 为节点远程登录用户密码 6 | [all] 7 | 192.168.56.11 ansible_port=22 ansible_user="vagrant" ansible_ssh_pass="vagrant" 8 | 192.168.56.12 ansible_port=22 ansible_user="vagrant" ansible_ssh_pass="vagrant" 9 | 192.168.56.13 ansible_port=22 ansible_user="vagrant" ansible_ssh_pass="vagrant" 10 | 192.168.56.14 ansible_port=22 ansible_user="vagrant" ansible_ssh_pass="vagrant" 11 | 12 | ; 单 master 节点不需要进行负载均衡,lb节点组留空。 13 | [lb] 14 | 15 | ; 注意etcd集群必须是1,3,5,7...奇数个节点 16 | [etcd] 17 | 192.168.56.11 18 | 192.168.56.12 19 | 192.168.56.13 20 | 21 | [kube-master] 22 | 192.168.56.11 23 | 24 | [kube-worker] 25 | 192.168.56.11 26 | 192.168.56.12 27 | 192.168.56.13 28 | 192.168.56.14 29 | 30 | ; 预留组,后续添加master节点使用 31 | [new-master] 32 | 33 | ; 预留组,后续添加worker节点使用 34 | [new-worker] 35 | 36 | ; 预留组,后续添加etcd节点使用 37 | [new-etcd] 38 | 39 | ; 预留组,后续删除worker角色使用 40 | [del-worker] 41 | 42 | ; 预留组,后续删除master角色使用 43 | [del-master] 44 | 45 | ; 预留组,后续删除etcd角色使用 46 | [del-etcd] 47 | 48 | ; 预留组,后续删除节点使用 49 | [del-node] 50 | 51 | ;-------------------------------------- 以下为基础信息配置 ------------------------------------; 52 | [all:vars] 53 | ; 是否跳过节点物理资源校验,Master节点要求2c2g以上,Worker节点要求2c4g以上 54 | skip_verify_node=false 55 | ; kubernetes版本 56 | kube_version="1.21.14" 57 | 58 | ; 容器运行时类型,可选项:containerd,docker;默认 containerd 59 | container_manager="containerd" 60 | 61 | ; 负载均衡器 62 | ; 有 nginx、openresty、haproxy、envoy 和 slb 可选,默认使用 nginx 63 | ; 为什么单 master 集群 apiserver 也使用了负载均衡请参与此讨论: https://github.com/TimeBye/kubeadm-ha/issues/8 64 | lb_mode="nginx" 65 | ; 使用负载均衡后集群 apiserver ip,设置 lb_kube_apiserver_ip 变量,则启用负载均衡器 + keepalived 66 | ; lb_kube_apiserver_ip="192.168.56.15" 67 | ; 使用负载均衡后集群 apiserver port 68 | lb_kube_apiserver_port="8443" 69 | 70 | ; 网段选择:pod 和 service 的网段不能与服务器网段重叠, 71 | ; 若有重叠请配置 `kube_pod_subnet` 和 `kube_service_subnet` 变量设置 pod 和 service 的网段,示例参考: 72 | ; 如果服务器网段为:10.0.0.1/8 73 | ; pod 网段可设置为:192.168.0.0/18 74 | ; service 网段可设置为 192.168.64.0/18 75 | ; 如果服务器网段为:172.16.0.1/12 76 | ; pod 网段可设置为:10.244.0.0/18 77 | ; service 网段可设置为 10.244.64.0/18 78 | ; 如果服务器网段为:192.168.0.1/16 79 | ; pod 网段可设置为:10.244.0.0/18 80 | ; service 网段可设置为 10.244.64.0/18 81 | ; 集群pod ip段,默认掩码位 18 即 16384 个ip 82 | kube_pod_subnet="10.244.0.0/18" 83 | ; 集群service ip段 84 | kube_service_subnet="10.244.64.0/18" 85 | ; 分配给节点的 pod 子网掩码位,默认为 24 即 256 个ip,故使用这些默认值可以纳管 16384/256=64 个节点。 86 | kube_network_node_prefix="24" 87 | 88 | ; node节点最大 pod 数。数量与分配给节点的 pod 子网有关,ip 数应大于 pod 数。 89 | ; https://cloud.google.com/kubernetes-engine/docs/how-to/flexible-pod-cidr 90 | kube_max_pods="110" 91 | 92 | ; 集群网络插件,目前支持flannel,calico 93 | network_plugin="calico" 94 | 95 | ; 若服务器磁盘分为系统盘与数据盘,请修改以下路径至数据盘自定义的目录。 96 | ; Kubelet 根目录 97 | kubelet_root_dir="/var/lib/kubelet" 98 | ; docker容器存储目录 99 | docker_storage_dir="/var/lib/docker" 100 | ; containerd容器存储目录 101 | containerd_storage_dir="/var/lib/containerd" 102 | ; Etcd 数据根目录 103 | etcd_data_dir="/var/lib/etcd" 104 | -------------------------------------------------------------------------------- /offline/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM setzero/kubeadm-ha:1.21.14-base 2 | ARG BRANCH=master 3 | RUN git clone -b ${BRANCH} https://github.com/TimeBye/kubeadm-ha.git /etc/ansible 4 | WORKDIR /etc/ansible 5 | ENTRYPOINT ["tini", "--"] 6 | CMD ["/bin/sh", "-c", "nginx && registry serve /etc/docker/registry/config.yml"] -------------------------------------------------------------------------------- /offline/Dockerfile.base: -------------------------------------------------------------------------------- 1 | FROM registry:2 AS registry 2 | FROM nginx:1.19 3 | ARG TARGETARCH 4 | ENV LANG=C.UTF-8 5 | 6 | COPY --from=registry /bin/registry /bin/registry 7 | COPY --from=registry /etc/docker/registry/config.yml /etc/docker/registry/config.yml 8 | 9 | COPY --chmod=755 sync-images.sh /usr/bin/sync-images 10 | COPY default.conf /etc/nginx/conf.d/default.conf 11 | COPY ${TARGETARCH}/registry /var/lib/registry 12 | COPY ${TARGETARCH}/kubernetes-yum /kubernetes/yum 13 | 14 | RUN DEBIAN_FRONTEND=noninteractive; \ 15 | apt-get update; \ 16 | apt-get install -y \ 17 | git \ 18 | tini \ 19 | rsync \ 20 | sshpass \ 21 | python3-pip \ 22 | openssh-client; \ 23 | . /etc/os-release; \ 24 | echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/Debian_${VERSION_ID}/ /" | tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list; \ 25 | curl -L https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/Debian_${VERSION_ID}/Release.key | apt-key add -; \ 26 | apt-get update; \ 27 | apt-get install -y \ 28 | skopeo; \ 29 | rm -rf /var/lib/apt/lists/*; \ 30 | apt-get clean 31 | 32 | RUN pip3 install --upgrade pip cffi; \ 33 | pip install ansible==2.10.7 netaddr 34 | 35 | RUN dpkgArch="$(dpkg --print-architecture | awk -F- '{ print $NF }')"; \ 36 | case "${dpkgArch}" in \ 37 | amd64|x86_64) \ 38 | curl -sSLo /kubernetes/helm-v3.5.4-linux-amd64.tar.gz https://get.helm.sh/helm-v3.5.4-linux-amd64.tar.gz; \ 39 | ;; \ 40 | aarch64|arm64) \ 41 | curl -sSLo /kubernetes/helm-v3.5.4-linux-arm64.tar.gz https://get.helm.sh/helm-v3.5.4-linux-arm64.tar.gz; \ 42 | ;; \ 43 | esac 44 | 45 | RUN cp /etc/apt/sources.list /etc/apt/sources.list.orig.bak; \ 46 | sed -i 's http://.*.debian.org http://mirrors.aliyun.com g' /etc/apt/sources.list 47 | -------------------------------------------------------------------------------- /offline/default.conf: -------------------------------------------------------------------------------- 1 | server { 2 | # 镜像库未配置认证,故禁止推送镜像 3 | if ($request_method !~* GET|HEAD) { 4 | return 403; 5 | } 6 | location / { 7 | root /kubernetes; 8 | charset utf-8; 9 | autoindex on; # 显示目录 10 | autoindex_localtime on; # 显示文件时间 11 | autoindex_exact_size off; # 显示文件大小 12 | } 13 | location /v2 { 14 | proxy_redirect off; 15 | proxy_set_header Host $host; 16 | proxy_set_header X-Real-IP $remote_addr; 17 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 18 | proxy_pass http://127.0.0.1:5000; 19 | } 20 | } -------------------------------------------------------------------------------- /offline/download-images-to-registry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # set -eux; 3 | 4 | images=" 5 | nginx:1.19-alpine 6 | haproxy:2.3-alpine 7 | traefik:2.4.8 8 | openresty/openresty:1.19.3.1-alpine 9 | envoyproxy/envoy:v1.16.2 10 | osixia/keepalived:2.0.20 11 | setzero/chrony:3.5 12 | calico/typha:v3.19.1 13 | calico/cni:v3.19.1 14 | calico/node:v3.19.1 15 | calico/kube-controllers:v3.19.1 16 | calico/pod2daemon-flexvol:v3.19.1 17 | calico/ctl:v3.19.1 18 | jettech/kube-webhook-certgen:v1.5.1 19 | kubernetesui/dashboard:v2.3.1 20 | kubernetesui/metrics-scraper:v1.0.6 21 | quay.io/coreos/flannel:v0.14.0 22 | quay.io/jetstack/cert-manager-cainjector:v1.4.0 23 | quay.io/jetstack/cert-manager-webhook:v1.4.0 24 | quay.io/jetstack/cert-manager-controller:v1.4.0 25 | k8s.gcr.io/kube-apiserver:v1.21.14 26 | k8s.gcr.io/kube-controller-manager:v1.21.14 27 | k8s.gcr.io/kube-scheduler:v1.21.14 28 | k8s.gcr.io/kube-proxy:v1.21.14 29 | k8s.gcr.io/pause:3.4.1 30 | k8s.gcr.io/etcd:3.5.4-0 31 | k8s.gcr.io/coredns/coredns:v1.8.0 32 | k8s.gcr.io/ingress-nginx/controller:v0.47.0 33 | k8s.gcr.io/metrics-server/metrics-server:v0.5.0 34 | " 35 | 36 | dest_registry=${dest_registry:-'127.0.0.1:5000/kubeadm-ha'} 37 | for image in $images ; do 38 | docker pull --platform ${1:-'linux/amd64'} $image 39 | count=$(echo $image | grep -o '/*' | wc -l) 40 | if [[ $count -eq 0 ]]; then 41 | dest=$dest_registry/$image 42 | elif [[ $count -eq 1 ]]; then 43 | if [[ $image =~ 'k8s.gcr.io' ]]; then 44 | dest=$dest_registry/$(echo ${image#*/} | sed 's / _ g') 45 | else 46 | dest=$dest_registry/$(echo ${image} | sed 's / _ g') 47 | fi 48 | else 49 | if [[ $image =~ 'coredns' ]]; then 50 | dest=$dest_registry/$(echo ${image##*/} | sed 's / _ g') 51 | else 52 | dest=$dest_registry/$(echo ${image#*/} | sed 's / _ g') 53 | fi 54 | fi 55 | docker tag $image $dest 56 | docker push $dest 57 | done -------------------------------------------------------------------------------- /offline/download-yum.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eux; 3 | 4 | # 创建缓存目录 5 | mkdir -p ${1:-'kubernetes-yum'} 6 | cd ${1:-'kubernetes-yum'} 7 | 8 | yum install -y \ 9 | yum-utils \ 10 | createrepo \ 11 | epel-release 12 | 13 | # 添加docker源 14 | yum-config-manager \ 15 | --add-repo \ 16 | https://download.docker.com/linux/centos/docker-ce.repo 17 | 18 | # 添加kubernetes源 19 | cat < 3 | systemctl status docker | grep running || echo "not running" 4 | register: docker_already_running 5 | 6 | - name: 设置 container_manager_detected 变量 7 | set_fact: 8 | container_manager_detected: >- 9 | {%- if "active" in docker_already_running.stdout -%} 10 | docker 11 | {%- else -%} 12 | containerd 13 | {%- endif -%} 14 | 15 | - block: 16 | - name: 第一个 etcd 节点创建 etcd 备份目录 17 | file: 18 | name: /etc/kubernetes/backup/etcd 19 | state: directory 20 | 21 | - name: 第一个 etcd 节点备份 etcd 数据库 22 | shell: > 23 | {% if container_manager_detected == 'containerd' %} 24 | ctr -n k8s.io run --rm --net-host --env ETCDCTL_API=3 25 | --mount type=bind,src=/etc/kubernetes/pki/etcd,dst=/etc/kubernetes/pki/etcd,options=rbind:ro 26 | --mount type=bind,src=/etc/kubernetes/backup/etcd,dst=/etc/kubernetes/backup/etcd,options=rbind:rw 27 | {{ etcd_image }} etcd-save-snapshot 28 | {% elif container_manager_detected == 'docker' %} 29 | docker run --net host -e ETCDCTL_API=3 30 | -v /etc/kubernetes/backup/etcd:/etc/kubernetes/backup/etcd 31 | -v /etc/kubernetes/pki/etcd/:/etc/kubernetes/pki/etcd/ 32 | --rm {{ etcd_image }} 33 | {% endif %} 34 | etcdctl snapshot save 35 | /etc/kubernetes/backup/etcd/etcd-snapshot-$(date '+%Y%m%dT%H%M%S').db 36 | --cacert=/etc/kubernetes/pki/etcd/ca.crt 37 | --key=/etc/kubernetes/pki/etcd/healthcheck-client.key 38 | --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt 39 | when: inventory_hostname == groups['etcd'][0] 40 | 41 | - name: 所有节点创建 kubelet 备份目录 42 | file: 43 | name: /etc/kubernetes/backup/kubelet 44 | state: directory 45 | 46 | - name: 集群外节点备份 kubelet 配置文件 47 | copy: 48 | src: /etc/systemd/system/kubelet.service.d/20-kubelet-override.conf 49 | dest: /etc/kubernetes/backup/kubelet 50 | mode: 0644 51 | remote_src: yes 52 | when: inventory_hostname not in (groups['kube-master'] + groups['new-master'] + groups['kube-worker'] + groups['new-worker']) 53 | 54 | - name: 集群内节点备份 kubelet 配置文件 55 | copy: 56 | src: "{{ item }}" 57 | dest: /etc/kubernetes/backup/kubelet 58 | mode: 0644 59 | remote_src: yes 60 | with_items: 61 | - /var/lib/kubelet/pki 62 | - /var/lib/kubelet/config.yaml 63 | - /var/lib/kubelet/kubeadm-flags.env 64 | when: inventory_hostname in (groups['kube-master'] + groups['new-master'] + groups['kube-worker'] + groups['new-worker']) 65 | 66 | - name: 所有节点备份 /etc/kubernetes 目录 67 | copy: 68 | src: /etc/kubernetes/ 69 | dest: /tmp/kubernetes.orig.{{ ansible_date_time.iso8601_basic_short }} 70 | mode: 0644 71 | remote_src: yes 72 | 73 | - name: 读取所有备份目录 74 | find: 75 | paths: /tmp 76 | file_type: directory 77 | patterns: kubernetes.orig.* 78 | register: kubernetes_back_dirs 79 | 80 | - name: 获取最新备份目录 81 | set_fact: 82 | kubernetes_latest_back_dir: "{{ kubernetes_back_dirs.files | sort(attribute='ctime',reverse=true) | first }}" 83 | 84 | - name: 压缩备份文件 85 | archive: 86 | path: "{{ kubernetes_latest_back_dir.path }}/*" 87 | dest: "{{ kubernetes_latest_back_dir.path }}.tar.gz" 88 | format: gz 89 | force_archive: true 90 | 91 | - name: 拉取备份至本地 92 | fetch: 93 | src: "{{ kubernetes_latest_back_dir.path }}.tar.gz" 94 | dest: "{{ (playbook_dir + '/cluster-backup/' + inventory_hostname + '-' + kubernetes_latest_back_dir.path|basename + '.tar.gz') | realpath }}" 95 | flat: yes -------------------------------------------------------------------------------- /roles/chrony/tasks/deploy.yml: -------------------------------------------------------------------------------- 1 | - name: 判断 Docker 是否早已安装 2 | shell: > 3 | systemctl status docker | grep running || echo "not running" 4 | register: docker_already_running 5 | 6 | - name: 设置 container_manager_detected 变量 7 | set_fact: 8 | container_manager_detected: >- 9 | {%- if "active" in docker_already_running.stdout -%} 10 | docker 11 | {%- else -%} 12 | containerd 13 | {%- endif -%} 14 | 15 | - name: 创建 chrony、kubelet 相关目录 16 | file: 17 | name: "{{ item }}" 18 | state: directory 19 | with_items: 20 | - "{{ kubelet_root_dir }}" 21 | - /etc/kubernetes/manifests 22 | - /etc/systemd/system/kubelet.service.d 23 | 24 | - name: 读取 kubelet.conf 文件 stat 信息 25 | stat: 26 | path: /etc/kubernetes/kubelet.conf 27 | register: kubelet_conf_stat 28 | 29 | - block: 30 | - name: 获取 Docker Cgroup Driver 值 31 | shell: docker info --format '{{ '{{' }} json .CgroupDriver {{ '}}' }}' | cut -d'"' -f2 32 | register: docker_cgroup_driver 33 | when: container_manager_detected == 'docker' 34 | 35 | - name: 设置 docker 为容器运行时 kubelet_cgroup_driver_detected 变量 36 | set_fact: 37 | kubelet_cgroup_driver_detected: >- 38 | {{ docker_cgroup_driver.stdout }} 39 | when: container_manager_detected == 'docker' 40 | 41 | - name: 设置 containerd 为容器运行时 kubelet_cgroup_driver_detected 变量 42 | set_fact: 43 | kubelet_cgroup_driver_detected: >- 44 | {%- if containerd_use_systemd_cgroup -%}systemd{%- else -%}cgroupfs{%- endif -%} 45 | when: container_manager_detected == 'containerd' 46 | 47 | - name: 生成临时 kubelet unit 文件 48 | template: 49 | src: 20-kubelet-override.conf.j2 50 | dest: /etc/systemd/system/kubelet.service.d/20-kubelet-override.conf 51 | owner: root 52 | mode: 0644 53 | 54 | - name: 重新加载 daemon 55 | systemd: 56 | daemon_reload: yes 57 | 58 | - name: 临时启动 kubelet 以引导 lb 运行 59 | service: 60 | name: kubelet 61 | state: restarted 62 | enabled: yes 63 | 64 | when: 65 | - not kubelet_conf_stat.stat.exists 66 | 67 | - name: 判断 chrony 是否早已安装 68 | shell: > 69 | systemctl status chronyd | grep running || echo "not running" 70 | register: chronyd_already_running 71 | 72 | - name: 停止系统原有 chrony 73 | when: '"active" in chronyd_already_running.stdout' 74 | service: 75 | name: chronyd 76 | state: stopped 77 | enabled: no 78 | 79 | - name: 拉取 chrony 镜像 80 | shell: "{{ image_pull_command }} {{ item }}" 81 | with_items: 82 | - "{{ chrony_image }}" 83 | - "{{ pod_infra_container_image }}" 84 | 85 | - name: 生成 chrony static pod 文件 86 | template: 87 | src: chrony.yaml.j2 88 | dest: /etc/kubernetes/manifests/chrony.yaml 89 | owner: root 90 | mode: 0644 91 | 92 | - block: 93 | - name: 移除临时 kubelet unit 文件 94 | file: 95 | name: /etc/systemd/system/kubelet.service.d/20-kubelet-override.conf 96 | state: absent 97 | 98 | - name: 重新加载 daemon 99 | systemd: 100 | daemon_reload: yes 101 | 102 | - name: 停止临时启动 kubelet 103 | service: 104 | name: kubelet 105 | state: stopped 106 | enabled: yes 107 | when: 108 | - not kubelet_conf_stat.stat.exists 109 | - inventory_hostname in (groups['kube-master'] + groups['kube-worker'] + groups['new-master'] + groups['new-worker']) -------------------------------------------------------------------------------- /roles/chrony/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include_tasks: deploy.yml 2 | when: "(chrony_enabled|bool) | default(false)" -------------------------------------------------------------------------------- /roles/chrony/templates/20-kubelet-override.conf.j2: -------------------------------------------------------------------------------- 1 | [Service] 2 | ExecStart= 3 | ExecStart=/usr/bin/kubelet {{ kubelet_args_base }} 4 | Restart=always -------------------------------------------------------------------------------- /roles/chrony/templates/chrony.yaml.j2: -------------------------------------------------------------------------------- 1 | kind: Pod 2 | apiVersion: v1 3 | metadata: 4 | labels: 5 | addonmanager.kubernetes.io/mode: Reconcile 6 | k8s-app: chrony 7 | name: chrony 8 | namespace: kube-system 9 | spec: 10 | hostNetwork: true 11 | dnsPolicy: ClusterFirstWithHostNet 12 | priorityClassName: system-cluster-critical 13 | containers: 14 | - name: chrony 15 | image: {{ chrony_image }} 16 | env: 17 | - name: NTP_SERVER 18 | {% if inventory_hostname == groups['etcd'][0] %} 19 | value: {{ ntp_server }} 20 | {% else %} 21 | value: {% if hostvars[inventory_hostname]['ansible_host'] is defined %}{{ hostvars[groups['etcd'][0]]['ansible_host'] }}{% else %}{{ groups['etcd'][0] }}{% endif %} 22 | 23 | {% endif %} 24 | - name: ALLOW_CIDR 25 | value: 0.0.0.0/0 26 | - name: SYNC_RTC 27 | value: 'true' 28 | livenessProbe: 29 | exec: 30 | command: 31 | - chronyc 32 | - tracking 33 | initialDelaySeconds: 30 34 | periodSeconds: 60 35 | timeoutSeconds: 5 36 | volumeMounts: 37 | - name: tz-config 38 | mountPath: /etc/localtime 39 | readOnly: true 40 | - name: tzdata-config 41 | mountPath: /etc/timezone 42 | readOnly: true 43 | securityContext: 44 | capabilities: 45 | add: 46 | - SYS_TIME 47 | volumes: 48 | - name: tz-config 49 | hostPath: 50 | path: /etc/localtime 51 | - name: tzdata-config 52 | hostPath: 53 | path: /etc/timezone 54 | -------------------------------------------------------------------------------- /roles/docker-to-containerd/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 校验变量 container_manager 2 | assert: 3 | that: container_manager == "containerd" 4 | msg: "当前 container_manager 值为:{{ container_manager }},请设置为 containerd。" 5 | 6 | - name: 判断 Docker 是否运行 7 | shell: > 8 | systemctl status docker | grep running || echo "not running" 9 | register: docker_status 10 | 11 | - block: 12 | - name: 停止 kubelet 服务 13 | service: 14 | name: kubelet 15 | state: stopped 16 | enabled: yes 17 | 18 | - name: 停止运行的容器 19 | ignore_errors: true 20 | shell: "docker stop $(docker ps -aq)" 21 | args: 22 | executable: /bin/bash 23 | register: stop_all_docker_containers 24 | retries: 5 25 | until: stop_all_docker_containers.rc == 0 26 | delay: 5 27 | 28 | - name: 清理 docker 相关数据 29 | ignore_errors: true 30 | shell: "docker system prune -a -f --volumes" 31 | args: 32 | executable: /bin/bash 33 | register: remove_all_docker_data 34 | retries: 5 35 | until: remove_all_docker_data.rc == 0 36 | delay: 5 37 | 38 | - name: 停止 docker 服务 39 | service: 40 | name: docker 41 | state: stopped 42 | enabled: no 43 | 44 | - name: 卸载 Docker 服务 45 | yum: 46 | name: 47 | - "docker-ce" 48 | - "docker-ce-cli" 49 | state: absent 50 | enablerepo: docker-ce-stable 51 | when: ansible_distribution in [ 'CentOS','OracleLinux','RedHat' ] 52 | 53 | - name: 卸载 Docker 服务 54 | apt: 55 | name: 56 | - "docker-ce" 57 | - "docker-ce-cli" 58 | state: absent 59 | allow_unauthenticated: true 60 | when: ansible_distribution in [ 'Ubuntu','Debian' ] 61 | 62 | - name: 停止 containerd 服务 63 | service: 64 | name: containerd 65 | state: stopped 66 | enabled: yes 67 | 68 | when: '"active" in docker_status.stdout' 69 | 70 | - include_tasks: "{{ (role_path + '/../prepare/container-engine/tasks/main.yml') | realpath }}" 71 | 72 | - name: 解析 /var/lib/kubelet/kubeadm-flags.env 配置文件 73 | shell: cat /var/lib/kubelet/kubeadm-flags.env | grep containerd.sock || echo "not find" 74 | register: kubeadm_flags_env_output 75 | 76 | - block: 77 | - name: 更新 /var/lib/kubelet/kubeadm-flags.env 配置文件 78 | shell: > 79 | sed -i 80 | 's;--hostname-override;--container-runtime=remote 81 | --container-runtime-endpoint=/var/run/containerd/containerd.sock 82 | --hostname-override;g' 83 | /var/lib/kubelet/kubeadm-flags.env 84 | args: 85 | warn: no 86 | 87 | - name: 启动 kubelet 服务 88 | service: 89 | name: kubelet 90 | state: restarted 91 | enabled: yes 92 | when: '"containerd.sock" not in kubeadm_flags_env_output.stdout' -------------------------------------------------------------------------------- /roles/docker-to-containerd/templates/containerd/config.toml.j2: -------------------------------------------------------------------------------- 1 | version = 2 2 | root = "{{ containerd_storage_dir }}" 3 | state = "{{ containerd_state_dir }}" 4 | oom_score = {{ containerd_oom_score }} 5 | 6 | [grpc] 7 | max_recv_message_size = {{ containerd_grpc_max_recv_message_size | default(16777216) }} 8 | max_send_message_size = {{ containerd_grpc_max_send_message_size | default(16777216) }} 9 | 10 | [debug] 11 | level = "{{ containerd_debug_level | default('info') }}" 12 | 13 | [metrics] 14 | address = "{{ containerd_metrics_address | default('') }}" 15 | grpc_histogram = {{ containerd_metrics_grpc_histogram | default(false) | lower }} 16 | 17 | [plugins] 18 | [plugins."io.containerd.grpc.v1.cri"] 19 | sandbox_image = "{{ pod_infra_container_image }}" 20 | max_container_log_line_size = {{ containerd_max_container_log_line_size }} 21 | [plugins."io.containerd.grpc.v1.cri".containerd] 22 | default_runtime_name = "{{ containerd_default_runtime | default('runc') }}" 23 | snapshotter = "{{ containerd_snapshotter | default('overlayfs') }}" 24 | [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] 25 | {% for runtime in containerd_runtimes %} 26 | [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.{{ runtime.name }}] 27 | runtime_type = "{{ runtime.type }}" 28 | runtime_engine = "{{ runtime.engine }}" 29 | runtime_root = "{{ runtime.root }}" 30 | [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.{{ runtime.name }}.options] 31 | {% for key, value in runtime.options.items() %} 32 | {{ key }} = {{ value }} 33 | {% endfor %} 34 | {% endfor %} 35 | [plugins."io.containerd.grpc.v1.cri".registry] 36 | [plugins."io.containerd.grpc.v1.cri".registry.mirrors] 37 | {% for registry, addr in containerd_registries.items() %} 38 | [plugins."io.containerd.grpc.v1.cri".registry.mirrors."{{ registry }}"] 39 | endpoint = ["{{ ([ addr ] | flatten ) | join('","') }}"] 40 | {% endfor %} 41 | 42 | {% if containerd_extra_args is defined %} 43 | {{ containerd_extra_args }} 44 | {% endif %} -------------------------------------------------------------------------------- /roles/docker-to-containerd/templates/containerd/crictl.yaml.j2: -------------------------------------------------------------------------------- 1 | runtime-endpoint: unix://{{ cri_socket }} 2 | image-endpoint: unix://{{ cri_socket }} 3 | timeout: 30 4 | debug: false 5 | -------------------------------------------------------------------------------- /roles/docker-to-containerd/templates/http-proxy.conf.j2: -------------------------------------------------------------------------------- 1 | [Service] 2 | Environment={% if http_proxy is defined %}"HTTP_PROXY={{ http_proxy }}"{% endif %} {% if https_proxy is defined %}"HTTPS_PROXY={{ https_proxy }}"{% endif %} {% if no_proxy is defined %}"NO_PROXY={{ no_proxy }}"{% endif %} 3 | -------------------------------------------------------------------------------- /roles/etcd/certificates/tasks/certs_stat.yml: -------------------------------------------------------------------------------- 1 | # 根据stat信息判断是否已经生成过edcd证书,如果没有,下一步生成证书 2 | # 如果已经有etcd证书,为了保证整个安装的幂等性,跳过证书生成的步骤 3 | - name: 读取 etcd-ca 证书私钥 stat 信息 4 | stat: 5 | path: /etc/kubernetes/pki/etcd/ca.key 6 | register: etcd_ca_key_stat 7 | 8 | - name: 读取 etcd-ca 根证书 stat 信息 9 | stat: 10 | path: /etc/kubernetes/pki/etcd/ca.crt 11 | register: etcd_ca_crt_stat 12 | 13 | - name: 读取 healthcheck-client 证书秘钥 stat 信息 14 | stat: 15 | path: /etc/kubernetes/pki/etcd/healthcheck-client.key 16 | register: etcd_healthcheck_client_key_stat 17 | 18 | - name: 读取 server 证书秘钥 stat 信息 19 | stat: 20 | path: /etc/kubernetes/pki/etcd/server.key 21 | register: etcd_server_key_stat 22 | 23 | - name: 读取 peer 证书秘钥 stat 信息 24 | stat: 25 | path: /etc/kubernetes/pki/etcd/peer.key 26 | register: etcd_peer_key_stat 27 | 28 | - name: 读取 apiserver-etcd-client 证书秘钥 stat 信息 29 | stat: 30 | path: /etc/kubernetes/pki/apiserver-etcd-client.key 31 | register: apiserver_etcd_client_key_stat -------------------------------------------------------------------------------- /roles/etcd/certificates/tasks/distribute.yml: -------------------------------------------------------------------------------- 1 | # 分发证书 2 | - name: 获取 etcd 相关证书 3 | slurp: 4 | src: /etc/kubernetes/pki/etcd/{{ item }} 5 | with_items: 6 | - ca.crt 7 | - ca.key 8 | - healthcheck-client.crt 9 | - healthcheck-client.key 10 | - peer.crt 11 | - peer.key 12 | - server.crt 13 | - server.key 14 | register: etcd_certs 15 | delegate_to: "{{ groups['etcd'][0] }}" 16 | run_once: true 17 | 18 | - name: 分发 etcd 相关证书到 etcd 节点 19 | copy: 20 | dest: "{{ item.source }}" 21 | content: "{{ item.content | b64decode }}" 22 | owner: root 23 | group: root 24 | mode: 0644 25 | no_log: true 26 | with_items: "{{ etcd_certs.results }}" 27 | when: 28 | - inventory_hostname != groups['etcd'][0] 29 | - inventory_hostname in (groups['etcd'] + groups['new-etcd']) 30 | 31 | - name: 获取 apiserver etcd client 证书 32 | slurp: 33 | src: /etc/kubernetes/pki/{{ item }} 34 | with_items: 35 | - etcd/ca.crt 36 | - apiserver-etcd-client.crt 37 | - apiserver-etcd-client.key 38 | register: etcd_client_certs 39 | delegate_to: "{{ groups['etcd'][0] }}" 40 | run_once: true 41 | 42 | - name: 分发 apiserver etcd client 证书到 master 节点 43 | copy: 44 | dest: "{{ item.source }}" 45 | content: "{{ item.content | b64decode }}" 46 | owner: root 47 | group: root 48 | mode: 0644 49 | no_log: true 50 | with_items: "{{ etcd_client_certs.results }}" 51 | when: 52 | - inventory_hostname != groups['etcd'][0] 53 | - inventory_hostname in (groups['kube-master'] + groups['new-master']) -------------------------------------------------------------------------------- /roles/etcd/certificates/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 在 etcd 节点创建 etcd 相关目录 2 | file: 3 | name: /etc/kubernetes/pki/etcd 4 | state: directory 5 | when: inventory_hostname in (groups['etcd'] + groups['new-etcd']) 6 | 7 | - name: 在 master 节点创建 etcd 证书相关目录 8 | file: 9 | name: /etc/kubernetes/pki/etcd 10 | state: directory 11 | when: inventory_hostname in (groups['kube-master'] + groups['new-master']) 12 | 13 | - block: 14 | # 获取密钥状态 15 | - include_tasks: certs_stat.yml 16 | # 生成证书 17 | - include_tasks: generate.yml 18 | when: inventory_hostname == groups['etcd'][0] 19 | 20 | # 分发证书 21 | - include_tasks: distribute.yml -------------------------------------------------------------------------------- /roles/etcd/certificates/templates/etcd-openssl.cnf.j2: -------------------------------------------------------------------------------- 1 | [ req ] 2 | default_bits = 2048 3 | default_md = sha256 4 | distinguished_name = req_distinguished_name 5 | 6 | [req_distinguished_name] 7 | 8 | [ v3_ca ] 9 | basicConstraints = critical, CA:TRUE 10 | keyUsage = critical, digitalSignature, keyEncipherment, keyCertSign 11 | 12 | [ v3_req_server ] 13 | basicConstraints = CA:FALSE 14 | keyUsage = critical, digitalSignature, keyEncipherment 15 | extendedKeyUsage = serverAuth 16 | 17 | [ v3_req_client ] 18 | basicConstraints = CA:FALSE 19 | keyUsage = critical, digitalSignature, keyEncipherment 20 | extendedKeyUsage = clientAuth 21 | 22 | [ v3_req_peer ] 23 | basicConstraints = CA:FALSE 24 | keyUsage = critical, digitalSignature, keyEncipherment 25 | extendedKeyUsage = serverAuth, clientAuth 26 | subjectAltName = @alt_names_etcd 27 | 28 | [ alt_names_etcd ] 29 | DNS.1 = localhost 30 | {% set dns_idx = 1 | int %} 31 | {% if hostvars[inventory_hostname]['ansible_host'] is defined %} 32 | {% for host in ((groups['etcd'] + groups['new-etcd'])|unique) %} 33 | DNS.{{ dns_idx + loop.index }} = {% if hostvars[host]['ansible_host'] is defined %}{{ host }}{% endif %} 34 | 35 | {% endfor %} 36 | {% endif %} 37 | IP.1 = 127.0.0.1 38 | IP.2 = 0:0:0:0:0:0:0:1 39 | {% set ip_idx = 2 | int %} 40 | {% for host in ((groups['etcd'] + groups['new-etcd'])|unique) %} 41 | IP.{{ ip_idx + loop.index }} = {% if hostvars[host]['ansible_host'] is defined %}{{ hostvars[host]['ansible_host'] }}{% else %}{{ host }}{% endif %} 42 | 43 | {% endfor %} -------------------------------------------------------------------------------- /roles/etcd/install/tasks/containerd.yml: -------------------------------------------------------------------------------- 1 | - name: 以轮询的方式等待 etcd 运行完成 2 | shell: > 3 | ctr -n k8s.io run --net-host --env ETCDCTL_API=3 4 | --mount type=bind,src=/etc/kubernetes/pki/etcd,dst=/etc/kubernetes/pki/etcd,options=rbind:ro 5 | --rm {{ etcd_image }} etcd-health-check 6 | etcdctl endpoint health 7 | --endpoints=https://[127.0.0.1]:2379 8 | --cacert=/etc/kubernetes/pki/etcd/ca.crt 9 | --key=/etc/kubernetes/pki/etcd/healthcheck-client.key 10 | --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt 11 | register: etcd_status 12 | until: etcd_status.rc == 0 13 | retries: 12 14 | delay: 5 15 | when: inventory_hostname in groups['etcd'] 16 | 17 | - name: 校验新添加的 etcd 节点是否已在 etcd 集群中 18 | shell: > 19 | ctr -n k8s.io run --net-host --env ETCDCTL_API=3 20 | --mount type=bind,src=/etc/kubernetes/pki/etcd,dst=/etc/kubernetes/pki/etcd,options=rbind:ro 21 | --rm {{ etcd_image }} etcd-list-member 22 | etcdctl member list 23 | --endpoints=https://[127.0.0.1]:2379 24 | --cacert=/etc/kubernetes/pki/etcd/ca.crt 25 | --key=/etc/kubernetes/pki/etcd/healthcheck-client.key 26 | --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt 27 | register: etcd_member_list 28 | delegate_to: "{{ groups['etcd'][0] }}" 29 | when: inventory_hostname in groups['new-etcd'] 30 | 31 | - name: 将新添加的 etcd 节点添加到 etcd 集群中 32 | shell: > 33 | ctr -n k8s.io run --net-host --env ETCDCTL_API=3 34 | --mount type=bind,src=/etc/kubernetes/pki/etcd,dst=/etc/kubernetes/pki/etcd,options=rbind:ro 35 | --rm {{ etcd_image }} etcd-add-member 36 | etcdctl member add {{ etcd_new_member }} 37 | --endpoints=https://[127.0.0.1]:2379 38 | --cacert=/etc/kubernetes/pki/etcd/ca.crt 39 | --key=/etc/kubernetes/pki/etcd/healthcheck-client.key 40 | --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt 41 | register: etcd_member_add 42 | until: etcd_member_add.rc == 0 43 | retries: 12 44 | delay: 5 45 | delegate_to: "{{ groups['etcd'][0] }}" 46 | when: 47 | - inventory_hostname in groups['new-etcd'] 48 | - inventory_hostname not in etcd_member_list.stdout 49 | 50 | - name: 以轮询的方式等待 new etcd 运行完成 51 | shell: > 52 | ctr -n k8s.io run --net-host --env ETCDCTL_API=3 53 | --mount type=bind,src=/etc/kubernetes/pki/etcd,dst=/etc/kubernetes/pki/etcd,options=rbind:ro 54 | --rm {{ etcd_image }} etcd-health-check 55 | etcdctl endpoint health 56 | --endpoints=https://[127.0.0.1]:2379 57 | --cacert=/etc/kubernetes/pki/etcd/ca.crt 58 | --key=/etc/kubernetes/pki/etcd/healthcheck-client.key 59 | --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt 60 | register: etcd_status 61 | until: etcd_status.rc == 0 62 | retries: 12 63 | delay: 5 64 | when: inventory_hostname in groups['new-etcd'] 65 | -------------------------------------------------------------------------------- /roles/etcd/install/tasks/docker.yml: -------------------------------------------------------------------------------- 1 | - name: 以轮询的方式等待 etcd 运行完成 2 | shell: > 3 | docker run --net host -e ETCDCTL_API=3 4 | -v /etc/kubernetes/pki/etcd:/etc/kubernetes/pki/etcd 5 | --rm {{ etcd_image }} 6 | etcdctl endpoint health 7 | --endpoints=https://[127.0.0.1]:2379 8 | --cacert=/etc/kubernetes/pki/etcd/ca.crt 9 | --key=/etc/kubernetes/pki/etcd/healthcheck-client.key 10 | --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt 11 | register: etcd_status 12 | until: etcd_status.rc == 0 13 | retries: 12 14 | delay: 5 15 | when: inventory_hostname in groups['etcd'] 16 | 17 | - name: 校验新添加的 etcd 节点是否已在 etcd 集群中 18 | shell: > 19 | docker run --net host -e ETCDCTL_API=3 20 | -v /etc/kubernetes/pki/etcd:/etc/kubernetes/pki/etcd 21 | --rm {{ etcd_image }} 22 | etcdctl member list 23 | --endpoints=https://[127.0.0.1]:2379 24 | --cacert=/etc/kubernetes/pki/etcd/ca.crt 25 | --key=/etc/kubernetes/pki/etcd/healthcheck-client.key 26 | --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt 27 | register: etcd_member_list 28 | delegate_to: "{{ groups['etcd'][0] }}" 29 | when: inventory_hostname in groups['new-etcd'] 30 | 31 | - name: 将新添加的 etcd 节点添加到 etcd 集群中 32 | shell: > 33 | docker run --net host -e ETCDCTL_API=3 34 | -v /etc/kubernetes/pki/etcd:/etc/kubernetes/pki/etcd 35 | --rm {{ etcd_image }} 36 | etcdctl member add {{ etcd_new_member }} 37 | --endpoints=https://[127.0.0.1]:2379 38 | --cacert=/etc/kubernetes/pki/etcd/ca.crt 39 | --key=/etc/kubernetes/pki/etcd/healthcheck-client.key 40 | --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt 41 | register: etcd_member_add 42 | until: etcd_member_add.rc == 0 43 | retries: 12 44 | delay: 5 45 | delegate_to: "{{ groups['etcd'][0] }}" 46 | when: 47 | - inventory_hostname in groups['new-etcd'] 48 | - inventory_hostname not in etcd_member_list.stdout 49 | 50 | - name: 以轮询的方式等待 new etcd 运行完成 51 | shell: > 52 | docker run --net host -e ETCDCTL_API=3 53 | -v /etc/kubernetes/pki/etcd:/etc/kubernetes/pki/etcd 54 | --rm {{ etcd_image }} 55 | etcdctl endpoint health 56 | --endpoints=https://[127.0.0.1]:2379 57 | --cacert=/etc/kubernetes/pki/etcd/ca.crt 58 | --key=/etc/kubernetes/pki/etcd/healthcheck-client.key 59 | --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt 60 | register: etcd_status 61 | until: etcd_status.rc == 0 62 | retries: 12 63 | delay: 5 64 | when: inventory_hostname in groups['new-etcd'] -------------------------------------------------------------------------------- /roles/etcd/install/templates/20-kubelet-override.conf.j2: -------------------------------------------------------------------------------- 1 | [Service] 2 | ExecStart= 3 | ExecStart=/usr/bin/kubelet {{ kubelet_args_base }} 4 | Restart=always -------------------------------------------------------------------------------- /roles/etcd/install/templates/etcd-external.yaml.j2: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | annotations: 5 | kubeadm.kubernetes.io/etcd.advertise-client-urls: https://{{ current_host_ip }}:2379 6 | creationTimestamp: null 7 | labels: 8 | component: etcd 9 | tier: control-plane 10 | name: etcd 11 | namespace: kube-system 12 | spec: 13 | containers: 14 | - command: 15 | - etcd 16 | - --name=etcd-{{ inventory_hostname }} 17 | - --advertise-client-urls=https://{{ current_host_ip }}:2379 18 | - --trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt 19 | - --cert-file=/etc/kubernetes/pki/etcd/server.crt 20 | - --key-file=/etc/kubernetes/pki/etcd/server.key 21 | - --peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt 22 | - --peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt 23 | - --peer-key-file=/etc/kubernetes/pki/etcd/peer.key 24 | - --peer-client-cert-auth=true 25 | - --listen-peer-urls=https://{{ current_host_ip }}:2380 26 | - --listen-metrics-urls=http://127.0.0.1:2381 27 | - --listen-client-urls=https://127.0.0.1:2379,https://{{ current_host_ip }}:2379 28 | - --initial-cluster-state={{ etcd_cluster_state }} 29 | - --initial-advertise-peer-urls=https://{{ current_host_ip }}:2380 30 | - --initial-cluster={{ etcd_initial_cluster }} 31 | - --initial-cluster-token=etcd-cluster-token 32 | - --client-cert-auth=true 33 | - --snapshot-count=10000 34 | - --data-dir=/var/lib/etcd 35 | # 推荐一小时压缩一次数据这样可以极大的保证集群稳定 36 | - --auto-compaction-retention=1 37 | # Etcd Raft消息最大字节数,官方推荐是10M 38 | - --max-request-bytes=10485760 39 | # ETCD db数据大小,默认是2G,官方推荐是8G 40 | - --quota-backend-bytes=8589934592 41 | image: {{ etcd_image }} 42 | imagePullPolicy: IfNotPresent 43 | livenessProbe: 44 | failureThreshold: 8 45 | httpGet: 46 | host: 127.0.0.1 47 | path: /health 48 | port: 2381 49 | scheme: HTTP 50 | initialDelaySeconds: 10 51 | periodSeconds: 10 52 | timeoutSeconds: 15 53 | {% if kube_version is version('1.18', '>=') %} 54 | startupProbe: 55 | failureThreshold: 24 56 | httpGet: 57 | host: 127.0.0.1 58 | path: /health 59 | port: 2381 60 | scheme: HTTP 61 | initialDelaySeconds: 10 62 | periodSeconds: 10 63 | timeoutSeconds: 15 64 | {% endif %} 65 | name: etcd 66 | resources: {} 67 | volumeMounts: 68 | - mountPath: /var/lib/etcd 69 | name: etcd-data 70 | - mountPath: /etc/kubernetes/pki/etcd 71 | name: etcd-certs 72 | - mountPath: /etc/localtime 73 | name: localtime 74 | readOnly: true 75 | hostNetwork: true 76 | priorityClassName: system-cluster-critical 77 | volumes: 78 | - hostPath: 79 | path: {{ etcd_data_dir }} 80 | type: DirectoryOrCreate 81 | name: etcd-data 82 | - hostPath: 83 | path: /etc/kubernetes/pki/etcd 84 | type: DirectoryOrCreate 85 | name: etcd-certs 86 | - hostPath: 87 | path: /etc/localtime 88 | type: File 89 | name: localtime 90 | status: {} -------------------------------------------------------------------------------- /roles/etcd/install/templates/etcdtools.j2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | function etcd_backup(){ 3 | {% if container_manager_detected == 'containerd' %} 4 | ctr -n k8s.io run --rm --net-host --env ETCDCTL_API=3 \ 5 | --mount type=bind,src=/etc/kubernetes/pki/etcd,dst=/etc/kubernetes/pki/etcd,options=rbind:ro \ 6 | --mount type=bind,src=/etc/kubernetes/backup/etcd,dst=/etc/kubernetes/backup/etcd,options=rbind:rw \ 7 | {{ etcd_image }} etcd-save-snapshot \ 8 | {% elif container_manager_detected == 'docker' %} 9 | docker run --rm --net host -e ETCDCTL_API=3 \ 10 | -v /etc/kubernetes/backup/etcd:/etc/kubernetes/backup/etcd \ 11 | -v /etc/kubernetes/pki/etcd/:/etc/kubernetes/pki/etcd/ \ 12 | {{ etcd_image }} \ 13 | {% endif %} 14 | etcdctl snapshot save \ 15 | /etc/kubernetes/backup/etcd/etcd-snapshot-$(date '+%Y%m%dT%H%M%S').db \ 16 | --cacert=/etc/kubernetes/pki/etcd/ca.crt \ 17 | --key=/etc/kubernetes/pki/etcd/healthcheck-client.key \ 18 | --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt 19 | } 20 | 21 | function etcd_cleanup(){ 22 | count=$(find /etc/kubernetes/backup/etcd -ctime +{{ etcd_backup_expiry }} -name 'etcd-snapshot*.db' | wc -l) 23 | if [ $count -lt {{ etcd_backup_expiry }} ]; then 24 | exit 0 # 备份文件数量小于备份保留天数则不进行删除;防止备份失败的情况 25 | fi 26 | find /etc/kubernetes/backup/etcd -ctime +{{ etcd_backup_expiry }} -name 'etcd-snapshot*.db' | xargs rm -rf 27 | } 28 | 29 | case ${1} in 30 | backup) 31 | etcd_backup 32 | ;; 33 | cleanup) 34 | etcd_cleanup 35 | ;; 36 | esac -------------------------------------------------------------------------------- /roles/kube-certificates/tasks/certs_stat.yml: -------------------------------------------------------------------------------- 1 | # 根据stat信息判断是否已经生成过kubernetes证书,如果没有,下一步生成证书 2 | # 如果已经有kubernetes证书,为了保证整个安装的幂等性,跳过证书生成的步骤 3 | - name: 读取 kubernetes-ca 根证书私钥 stat 信息 4 | stat: 5 | path: /etc/kubernetes/pki/ca.key 6 | register: ca_key_stat 7 | 8 | - name: 读取 kubernetes-ca 根证书 stat 信息 9 | stat: 10 | path: /etc/kubernetes/pki/ca.crt 11 | register: ca_crt_stat 12 | 13 | - name: 读取 front-proxy-ca 根证书私钥 stat 信息 14 | stat: 15 | path: /etc/kubernetes/pki/front-proxy-ca.key 16 | register: front_proxy_ca_key_stat 17 | 18 | - name: 读取 front-proxy-ca 根证书 stat 信息 19 | stat: 20 | path: /etc/kubernetes/pki/front-proxy-ca.crt 21 | register: front_proxy_ca_crt_stat 22 | 23 | - name: 读取 apiserver 证书私钥 stat 信息 24 | stat: 25 | path: /etc/kubernetes/pki/apiserver.key 26 | register: apiserver_key_stat 27 | 28 | - name: 读取 apiserver-kubelet-client 证书私钥 stat 信息 29 | stat: 30 | path: /etc/kubernetes/pki/apiserver-kubelet-client.key 31 | register: apiserver_kubelet_client_key_stat 32 | 33 | - name: 读取 front-proxy-client 证书私钥 stat 信息 34 | stat: 35 | path: /etc/kubernetes/pki/front-proxy-client.key 36 | register: front_proxy_client_key_stat 37 | 38 | - name: 读取 kube-scheduler 证书私钥 stat 信息 39 | stat: 40 | path: /etc/kubernetes/pki/kube-scheduler.key 41 | register: kube_scheduler_key_stat 42 | 43 | - name: 读取 sa 证书私钥 stat 信息 44 | stat: 45 | path: /etc/kubernetes/pki/sa.key 46 | register: sa_key_stat 47 | 48 | - name: 读取 sa 证书公钥 stat 信息 49 | stat: 50 | path: /etc/kubernetes/pki/sa.pub 51 | register: sa_pud_stat 52 | 53 | - name: 读取 admin 证书私钥 stat 信息 54 | stat: 55 | path: /etc/kubernetes/pki/admin.key 56 | register: admin_key_stat 57 | 58 | - name: 读取 kubelet 证书私钥 stat 信息 59 | stat: 60 | path: /var/lib/kubelet/pki/kubelet.key 61 | register: kubelet_key_stat -------------------------------------------------------------------------------- /roles/kube-certificates/tasks/distribute.yml: -------------------------------------------------------------------------------- 1 | # 分发证书 2 | - name: 获取 kubernetes master 节点相关证书 3 | slurp: 4 | src: /etc/kubernetes/pki/{{ item }} 5 | with_items: 6 | - admin.crt 7 | - admin.key 8 | - apiserver.crt 9 | - apiserver.key 10 | - apiserver-kubelet-client.crt 11 | - apiserver-kubelet-client.key 12 | - ca.crt 13 | - ca.key 14 | - front-proxy-ca.crt 15 | - front-proxy-ca.key 16 | - front-proxy-client.crt 17 | - front-proxy-client.key 18 | - kube-controller-manager.crt 19 | - kube-scheduler.crt 20 | - kube-scheduler.key 21 | - sa.key 22 | - sa.pub 23 | register: kubernetes_master_certs 24 | run_once: true 25 | delegate_to: "{{ groups['kube-master'][0] }}" 26 | 27 | - name: 分发 kubernetes master相关证书到 master 节点 28 | copy: 29 | dest: "{{ item.source }}" 30 | content: "{{ item.content | b64decode }}" 31 | owner: root 32 | group: root 33 | mode: 0644 34 | no_log: true 35 | with_items: "{{ kubernetes_master_certs.results }}" 36 | when: 37 | - inventory_hostname != groups['kube-master'][0] 38 | - inventory_hostname in (groups['kube-master'] + groups['new-master']) 39 | 40 | - name: 获取 kubelet 服务端证书 41 | slurp: 42 | src: /var/lib/kubelet/pki/{{ item }} 43 | with_items: 44 | - kubelet.crt 45 | - kubelet.key 46 | register: kubelet_certs 47 | run_once: true 48 | delegate_to: "{{ groups['kube-master'][0] }}" 49 | 50 | - name: 分发 kubelet 服务端证书到所有节点 51 | copy: 52 | dest: "{{ item.source }}" 53 | content: "{{ item.content | b64decode }}" 54 | owner: root 55 | group: root 56 | mode: 0644 57 | no_log: true 58 | with_items: "{{ kubelet_certs.results }}" 59 | when: 60 | - inventory_hostname != groups['kube-master'][0] 61 | 62 | - name: 获取 kubelet 客户端证书列表 63 | set_fact: 64 | kubelet_client_certs: 65 | - "/etc/kubernetes/pki/ca.crt" 66 | - "/var/lib/kubelet/pki/kubelet.key" 67 | - "/var/lib/kubelet/pki/kubelet-client-{{ inventory_hostname }}.crt" 68 | 69 | - name: 获取 kubelet 客户端证书 70 | slurp: 71 | src: "{{ item }}" 72 | with_items: "{{ kubelet_client_certs }}" 73 | register: my_kubelet_client_certs 74 | delegate_to: "{{ groups['kube-master'][0] }}" 75 | 76 | - name: 分发 kubelet 客户端证书 77 | copy: 78 | dest: "{{ item.source }}" 79 | content: "{{ item.content | b64decode }}" 80 | owner: root 81 | group: root 82 | mode: 0644 83 | no_log: true 84 | with_items: "{{ my_kubelet_client_certs.results }}" 85 | when: inventory_hostname != groups['kube-master'][0] -------------------------------------------------------------------------------- /roles/kube-certificates/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 创建 kubernetes 证书存放目录 2 | file: 3 | name: "{{ item }}" 4 | state: directory 5 | with_items: 6 | - /etc/kubernetes/pki 7 | - /var/lib/kubelet/pki 8 | when: 9 | - inventory_hostname in (groups['kube-master'] + groups['new-master'] + groups['kube-worker'] + groups['new-worker']) 10 | 11 | - block: 12 | # 获取密钥状态 13 | - include_tasks: certs_stat.yml 14 | # 生成公共证书 15 | - include_tasks: common.yml 16 | when: inventory_hostname == groups['kube-master'][0] 17 | 18 | # 分发证书 19 | - include_tasks: distribute.yml 20 | when: 21 | - inventory_hostname in (groups['kube-master'] + groups['new-master'] + groups['kube-worker'] + groups['new-worker']) 22 | 23 | - name: 读取 kubelet.conf 文件 stat 信息 24 | stat: 25 | path: /etc/kubernetes/kubelet.conf 26 | register: kubelet_conf_stat 27 | when: 28 | - inventory_hostname in (groups['kube-master'] + groups['new-master'] + groups['kube-worker'] + groups['new-worker']) 29 | 30 | - include_tasks: kubeconfig.yml 31 | when: 32 | - kubelet_conf_stat.stat.exists 33 | - inventory_hostname in (groups['kube-master'] + groups['new-master'] + groups['kube-worker'] + groups['new-worker']) -------------------------------------------------------------------------------- /roles/kube-certificates/templates/kube-openssl.cnf.j2: -------------------------------------------------------------------------------- 1 | [ req ] 2 | default_bits = 2048 3 | default_md = sha256 4 | distinguished_name = req_distinguished_name 5 | 6 | [req_distinguished_name] 7 | 8 | [ v3_ca ] 9 | basicConstraints = critical, CA:TRUE 10 | keyUsage = critical, digitalSignature, keyEncipherment, keyCertSign 11 | 12 | [ v3_req_server ] 13 | basicConstraints = CA:FALSE 14 | keyUsage = critical, digitalSignature, keyEncipherment 15 | extendedKeyUsage = serverAuth 16 | subjectAltName = @alt_kube_apiserver 17 | 18 | [ v3_req_kubelet ] 19 | basicConstraints = CA:FALSE 20 | keyUsage = critical, digitalSignature, keyEncipherment 21 | extendedKeyUsage = serverAuth 22 | subjectAltName = @alt_kubelet 23 | 24 | [ v3_req_client ] 25 | basicConstraints = CA:FALSE 26 | keyUsage = critical, digitalSignature, keyEncipherment 27 | extendedKeyUsage = clientAuth 28 | 29 | [ alt_kube_apiserver ] 30 | DNS.1 = localhost 31 | DNS.2 = kubernetes 32 | DNS.3 = kubernetes.default 33 | DNS.4 = kubernetes.default.svc 34 | {% set dns_idx = 4 | int %} 35 | {% for sub_domain in kube_dns_domain.split('.') %} 36 | {% set outer_loop = loop %} 37 | DNS.{{ dns_idx + loop.index }} = kubernetes.default.svc.{% for domain in kube_dns_domain.split('.') %}{% if loop.index <= outer_loop.index %}{{ domain }}{% if loop.index < outer_loop.index %}.{% endif %}{% endif %}{% endfor %} 38 | 39 | {% endfor %} 40 | {% set dns_idx = 4 + (kube_dns_domain.split('.')|length) | int %} 41 | {% for domain in kube_master_external_domain %} 42 | DNS.{{ dns_idx + loop.index }} = {{ domain }} 43 | {% endfor %} 44 | {% if hostvars[inventory_hostname]['ansible_host'] is defined %} 45 | {% set dns_idx = 4 + (kube_dns_domain.split('.')|length + kube_master_external_domain|length) | int %} 46 | {% for host in (groups['kube-master'] + groups['new-master'] | default([])) | unique %} 47 | DNS.{{ dns_idx + loop.index }} = {{ host }} 48 | {% endfor %} 49 | {% endif %} 50 | IP.1 = 127.0.0.1 51 | IP.2 = 0:0:0:0:0:0:0:1 52 | IP.3 = {{ kubernetes_service_ip }} 53 | {% set ip_idx = 3 | int %} 54 | {% for host in (groups['kube-master'] + groups['new-master'] | default([])) | unique %} 55 | IP.{{ ip_idx + loop.index }} = {% if hostvars[host]['ansible_host'] is defined %}{{ hostvars[host]['ansible_host'] }}{% else %}{{ host }}{% endif %} 56 | 57 | {% endfor %} 58 | {% set ip_idx = 3 + (groups['kube-master']|length + groups['new-master']|length) | int %} 59 | {% for ip in kube_master_external_ip %} 60 | IP.{{ ip_idx + loop.index }} = {{ ip }} 61 | {% endfor %} 62 | {% if lb_kube_apiserver_ip is defined %} 63 | IP.{{4 + (groups['kube-master']|length + groups['new-master']|length + kube_master_external_ip|length) | int }} = {{ lb_kube_apiserver_ip | trim }} 64 | {% endif %} 65 | 66 | [ alt_kubelet ] 67 | DNS.1 = localhost 68 | {% if hostvars[inventory_hostname]['ansible_host'] is defined %} 69 | {% set dns_idx = 1 | int %} 70 | {% for host in (groups['kube-master'] + groups['new-master'] + groups['kube-worker'] + groups['new-worker'] | default([])) | unique %} 71 | DNS.{{ dns_idx + loop.index }} = {{ host }} 72 | {% endfor %} 73 | {% endif %} 74 | IP.1 = 127.0.0.1 75 | IP.2 = 0:0:0:0:0:0:0:1 76 | {% set ip_idx = 2 | int %} 77 | {% for host in (groups['kube-master'] + groups['new-master'] + groups['kube-worker'] + groups['new-worker'] | default([])) | unique %} 78 | IP.{{ ip_idx + loop.index }} = {% if hostvars[host]['ansible_host'] is defined %}{{ hostvars[host]['ansible_host'] }}{% else %}{{ host }}{% endif %} 79 | 80 | {% endfor %} -------------------------------------------------------------------------------- /roles/kube-master/tasks/kubeadm-config.yml: -------------------------------------------------------------------------------- 1 | - name: 确认 kubeadm 版本 2 | command: "kubeadm version -o short" 3 | register: kubeadm_version_output 4 | 5 | - name: 设置 kubeadm api version 为 v1beta1 6 | set_fact: 7 | kubeadmConfig_api_version: v1beta1 8 | when: 9 | - kubeadm_version_output.stdout is version('v1.13.0', '>=') 10 | - kubeadm_version_output.stdout is version('v1.15.0', '<') 11 | 12 | - name: 设置 kubeadm api version 为 v1beta2 13 | set_fact: 14 | kubeadmConfig_api_version: v1beta2 15 | when: 16 | - kubeadm_version_output.stdout is version('v1.15.0', '>=') 17 | 18 | - name: 获取 Docker Cgroup Driver 值 19 | shell: docker info --format '{{ '{{' }} json .CgroupDriver {{ '}}' }}' | cut -d'"' -f2 20 | register: docker_cgroup_driver 21 | when: container_manager_detected == 'docker' 22 | 23 | - name: 设置 docker 为容器运行时 kubelet_cgroup_driver_detected 变量 24 | set_fact: 25 | kubelet_cgroup_driver_detected: >- 26 | {{ docker_cgroup_driver.stdout }} 27 | when: container_manager_detected == 'docker' 28 | 29 | - name: 设置 containerd 为容器运行时 kubelet_cgroup_driver_detected 变量 30 | set_fact: 31 | kubelet_cgroup_driver_detected: >- 32 | {%- if containerd_use_systemd_cgroup -%}systemd{%- else -%}cgroupfs{%- endif -%} 33 | when: container_manager_detected == 'containerd' 34 | 35 | - name: 创建 kubeadm 的配置文件 36 | template: 37 | src: >- 38 | {% if inventory_hostname == groups['kube-master'][0] -%} 39 | kubeadm-controlplane-init.{{ kubeadmConfig_api_version }}.yaml.j2 40 | {%- elif inventory_hostname in (groups['kube-master'] + groups['new-master']) -%} 41 | kubeadm-controlplane-join.{{ kubeadmConfig_api_version }}.yaml.j2 42 | {%- else -%} 43 | kubeadm-join.{{ kubeadmConfig_api_version }}.yaml.j2 44 | {%- endif %} 45 | dest: "/etc/kubernetes/kubeadm-config.yaml" 46 | owner: root 47 | mode: 0644 -------------------------------------------------------------------------------- /roles/kube-master/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 判断 Docker 是否早已安装 2 | shell: > 3 | systemctl status docker | grep running || echo "not running" 4 | register: docker_already_running 5 | 6 | - name: 设置 container_manager_detected 变量 7 | set_fact: 8 | container_manager_detected: >- 9 | {%- if "active" in docker_already_running.stdout -%} 10 | docker 11 | {%- else -%} 12 | containerd 13 | {%- endif -%} 14 | 15 | - name: 创建 kubernetes 相关目录 16 | file: 17 | name: "{{ item }}" 18 | state: directory 19 | with_items: 20 | - "{{ kubelet_root_dir }}" 21 | - "{{ audit_policy_file | dirname }}" 22 | - /etc/kubernetes/pki 23 | - /etc/kubernetes/config 24 | - /etc/kubernetes/manifests 25 | - /var/log/kubernetes/audit 26 | - /usr/share/bash-completion/completions 27 | 28 | - name: 读取 kubelet.conf 文件 stat 信息 29 | stat: 30 | path: /etc/kubernetes/kubelet.conf 31 | register: kubelet_conf_stat 32 | 33 | # 生成 kubeadm 配置 34 | - include_tasks: kubeadm-config.yml 35 | 36 | - name: 创建 EncryptionConfiguration 配置文件 37 | template: 38 | src: secrets-encryption.yaml.j2 39 | dest: /etc/kubernetes/pki/secrets-encryption.yaml 40 | owner: root 41 | group: root 42 | mode: 0644 43 | 44 | - name: 创建 apiserver 审计日志策略配置文件 45 | template: 46 | src: apiserver-audit-policy.yaml.j2 47 | dest: "{{ audit_policy_file }}" 48 | owner: root 49 | group: root 50 | mode: 0644 51 | when: kubernetes_audit|default(false) 52 | 53 | # 初始化第一个 master 节点 54 | - include_tasks: master-init.yml 55 | when: 56 | - not kubelet_conf_stat.stat.exists 57 | - inventory_hostname == groups['kube-master'][0] 58 | 59 | # 初始化其他 master 节点 60 | - include_tasks: master-join.yml 61 | when: 62 | - not kubelet_conf_stat.stat.exists 63 | - inventory_hostname != groups['kube-master'][0] 64 | - inventory_hostname in (groups['kube-master'] + groups['new-master']) 65 | 66 | - name: 确认 kubelet 配置是否有修改 67 | template: 68 | src: kubelet-config.v1beta1.yaml.j2 69 | dest: /var/lib/kubelet/config.yaml 70 | owner: root 71 | mode: 0644 72 | register: configuration_result 73 | 74 | - name: 启动/重启 kubelet 75 | service: 76 | name: kubelet 77 | state: restarted 78 | enabled: yes 79 | when: configuration_result.changed -------------------------------------------------------------------------------- /roles/kube-master/tasks/master-init.yml: -------------------------------------------------------------------------------- 1 | - name: 确认 kubelet 已停止运行 2 | service: 3 | name: kubelet 4 | state: stopped 5 | enabled: yes 6 | 7 | - name: 获取 master 节点需要拉取的镜像列表 8 | shell: > 9 | kubeadm config images list --config=/etc/kubernetes/kubeadm-config.yaml | grep -v coredns | 10 | grep {{ kube_image_repository }} 11 | register: kube_image_list 12 | 13 | - name: 开始拉取 master 节点相关镜像 14 | shell: "{{ image_pull_command }} {{ item }}" 15 | with_items: "{{ kube_image_list.stdout_lines }}" 16 | 17 | - name: 初始化第一个 master 节点 18 | shell: kubeadm init --config=/etc/kubernetes/kubeadm-config.yaml --ignore-preflight-errors=ImagePull 19 | 20 | - include_tasks: "{{ (role_path + '/../kube-certificates/tasks/kubeconfig.yml') | realpath }}" 21 | 22 | - name: 重新加载 daemon 23 | systemd: 24 | daemon_reload: yes 25 | 26 | - name: 重新启动 kubelet 27 | service: 28 | name: kubelet 29 | state: restarted 30 | enabled: yes 31 | 32 | - name: 等待 apiserver 运行 33 | uri: 34 | url: "https://{{ current_host_ip }}:6443/healthz" 35 | validate_certs: no 36 | register: apiserver_result 37 | until: apiserver_result.status == 200 38 | retries: 60 39 | delay: 5 40 | 41 | - name: 等待 kube-scheduler 运行 42 | uri: 43 | url: "https://127.0.0.1:10259/healthz" 44 | validate_certs: no 45 | register: scheduler_result 46 | until: scheduler_result.status == 200 47 | retries: 60 48 | delay: 5 49 | 50 | - name: 等待 kube-controller-manager 运行 51 | uri: 52 | url: "https://127.0.0.1:10257/healthz" 53 | validate_certs: no 54 | register: controller_manager_result 55 | until: controller_manager_result.status == 200 56 | retries: 60 57 | delay: 5 58 | 59 | - name: 创建 kubelet 自动轮换服务端证书配置 60 | template: 61 | src: kubelet-certificates-renewal.yaml.j2 62 | dest: /etc/kubernetes/config/kubelet-certificates-renewal.yaml 63 | owner: root 64 | group: root 65 | mode: 0644 66 | 67 | - name: 授权 kubelet 自动轮换服务端证书 68 | shell: kubectl apply -f /etc/kubernetes/config/kubelet-certificates-renewal.yaml 69 | 70 | - block: 71 | - name: 创建 Pod 安全策略配置文件 72 | template: 73 | src: pod-security-policy.yaml.j2 74 | dest: /etc/kubernetes/config/pod-security-policy.yaml 75 | owner: root 76 | group: root 77 | mode: 0644 78 | 79 | - name: 配置 Pod 安全策略 80 | shell: kubectl apply -f /etc/kubernetes/config/pod-security-policy.yaml 81 | when: '"PodSecurityPolicy" in kube_apiserver_enable_admission_plugins' 82 | -------------------------------------------------------------------------------- /roles/kube-master/tasks/master-join.yml: -------------------------------------------------------------------------------- 1 | - name: 确认 kubelet 已停止运行 2 | service: 3 | name: kubelet 4 | state: stopped 5 | enabled: yes 6 | 7 | - name: 获取 master 节点需要拉取的镜像列表 8 | shell: > 9 | kubeadm config images list --config=/etc/kubernetes/kubeadm-config.yaml | grep -v coredns | 10 | grep {{ kube_image_repository }} 11 | register: kube_image_list 12 | 13 | - name: 开始拉取 master 节点相关镜像 14 | shell: "{{ image_pull_command }} {{ item }}" 15 | with_items: "{{ kube_image_list.stdout_lines }}" 16 | 17 | - name: 其他 master 节点加入集群 18 | shell: > 19 | kubeadm join --config /etc/kubernetes/kubeadm-config.yaml 20 | --ignore-preflight-errors=ImagePull,DirAvailable--etc-kubernetes-manifests 21 | 22 | - include_tasks: "{{ (role_path + '/../kube-certificates/tasks/kubeconfig.yml') | realpath }}" 23 | 24 | - name: 重新加载 daemon 25 | systemd: 26 | daemon_reload: yes 27 | 28 | - name: 重新启动 kubelet 29 | service: 30 | name: kubelet 31 | state: restarted 32 | enabled: yes 33 | 34 | - name: 等待 apiserver 运行 35 | uri: 36 | url: "https://{{ current_host_ip }}:6443/healthz" 37 | validate_certs: no 38 | register: apiserver_result 39 | until: apiserver_result.status == 200 40 | retries: 60 41 | delay: 5 42 | 43 | - name: 等待 kube-scheduler 运行 44 | uri: 45 | url: "https://127.0.0.1:10259/healthz" 46 | validate_certs: no 47 | register: scheduler_result 48 | until: scheduler_result.status == 200 49 | retries: 60 50 | delay: 5 51 | 52 | - name: 等待 kube-controller-manager 运行 53 | uri: 54 | url: "https://127.0.0.1:10257/healthz" 55 | validate_certs: no 56 | register: controller_manager_result 57 | until: controller_manager_result.status == 200 58 | retries: 60 59 | delay: 5 -------------------------------------------------------------------------------- /roles/kube-master/templates/kubelet-certificates-renewal.yaml.j2: -------------------------------------------------------------------------------- 1 | # A ClusterRole which instructs the CSR approver to approve a node requesting a 2 | # serving cert matching its client cert. 3 | kind: ClusterRole 4 | apiVersion: rbac.authorization.k8s.io/v1 5 | metadata: 6 | name: system:certificates.k8s.io:certificatesigningrequests:selfnodeserver 7 | rules: 8 | - apiGroups: ["certificates.k8s.io"] 9 | resources: ["certificatesigningrequests/selfnodeserver"] 10 | verbs: ["create"] 11 | --- 12 | kind: ClusterRoleBinding 13 | apiVersion: rbac.authorization.k8s.io/v1 14 | metadata: 15 | name: kubeadm:node-autoapprove-certificate-renewal 16 | subjects: 17 | - kind: Group 18 | name: system:nodes 19 | apiGroup: rbac.authorization.k8s.io 20 | roleRef: 21 | kind: ClusterRole 22 | name: system:certificates.k8s.io:certificatesigningrequests:selfnodeserver 23 | apiGroup: rbac.authorization.k8s.io -------------------------------------------------------------------------------- /roles/kube-master/templates/kubelet-config.v1beta1.yaml.j2: -------------------------------------------------------------------------------- 1 | apiVersion: kubelet.config.k8s.io/v1beta1 2 | kind: KubeletConfiguration 3 | address: 0.0.0.0 4 | authentication: 5 | anonymous: 6 | enabled: false 7 | webhook: 8 | cacheTTL: 2m0s 9 | enabled: true 10 | x509: 11 | clientCAFile: /etc/kubernetes/pki/ca.crt 12 | authorization: 13 | mode: Webhook 14 | webhook: 15 | cacheAuthorizedTTL: 5m0s 16 | cacheUnauthorizedTTL: 30s 17 | cgroupDriver: {{ kubelet_cgroup_driver|default(kubelet_cgroup_driver_detected) }} 18 | cgroupsPerQOS: true 19 | clusterDNS: 20 | - {{ cluster_dns_service_ip }} 21 | clusterDomain: {{ kube_dns_domain }} 22 | configMapAndSecretChangeDetectionStrategy: Watch 23 | containerLogMaxFiles: 5 24 | containerLogMaxSize: 10Mi 25 | contentType: application/vnd.kubernetes.protobuf 26 | cpuCFSQuota: true 27 | cpuCFSQuotaPeriod: 100ms 28 | cpuManagerPolicy: none 29 | cpuManagerReconcilePeriod: 10s 30 | enableControllerAttachDetach: true 31 | enableDebuggingHandlers: true 32 | enforceNodeAllocatable: 33 | - pods 34 | eventBurst: 10 35 | eventRecordQPS: 5 36 | evictionHard: 37 | imagefs.available: {{ eviction_hard_imagefs_available }} 38 | memory.available: {{ eviction_hard_memory_available }} 39 | nodefs.available: {{ eviction_hard_nodefs_available }} 40 | nodefs.inodesFree: {{ eviction_hard_nodefs_inodes_free }} 41 | kubeReserved: 42 | cpu: {{ kube_cpu_reserved }} 43 | memory: {{ kube_memory_reserved|regex_replace('Mi', 'M') }} 44 | {% if system_reserved_enabled is defined and system_reserved_enabled %} 45 | systemReserved: 46 | cpu: {{ system_cpu_reserved|default('500m') }} 47 | memory: {{ system_memory_reserved|default('512M')|regex_replace('Mi', 'M') }} 48 | ephemeral-storage: {{ system_ephemeral_storage_reserved|default('10Gi')|regex_replace('Gi', 'G') }} 49 | {% endif %} 50 | evictionPressureTransitionPeriod: 5m0s 51 | failSwapOn: true 52 | featureGates: 53 | RotateKubeletServerCertificate: true 54 | fileCheckFrequency: 20s 55 | hairpinMode: promiscuous-bridge 56 | healthzBindAddress: 127.0.0.1 57 | healthzPort: 10248 58 | httpCheckFrequency: 20s 59 | imageGCHighThresholdPercent: 85 60 | imageGCLowThresholdPercent: 80 61 | imageMinimumGCAge: 2m0s 62 | iptablesDropBit: 15 63 | iptablesMasqueradeBit: 14 64 | kubeAPIBurst: 10 65 | kubeAPIQPS: 5 66 | makeIPTablesUtilChains: true 67 | maxOpenFiles: 1000000 68 | maxPods: {{ kube_max_pods }} 69 | nodeLeaseDurationSeconds: 40 70 | nodeStatusReportFrequency: 1m0s 71 | nodeStatusUpdateFrequency: 10s 72 | oomScoreAdj: -999 73 | podPidsLimit: -1 74 | port: 10250 75 | protectKernelDefaults: true 76 | readOnlyPort: 0 77 | registryBurst: 10 78 | registryPullQPS: 5 79 | {% if ansible_distribution == "Ubuntu" and ansible_distribution_version is version('16.04', '>') %} 80 | resolvConf: /run/systemd/resolve/resolv.conf 81 | {% else %} 82 | resolvConf: /etc/resolv.conf 83 | {% endif %} 84 | rotateCertificates: true 85 | runtimeRequestTimeout: 2m0s 86 | serializeImagePulls: true 87 | staticPodPath: /etc/kubernetes/manifests 88 | streamingConnectionIdleTimeout: 4h0m0s 89 | syncFrequency: 1m0s 90 | tlsCertFile: /var/lib/kubelet/pki/kubelet.crt 91 | tlsPrivateKeyFile: /var/lib/kubelet/pki/kubelet.key 92 | volumeStatsAggPeriod: 1m0s -------------------------------------------------------------------------------- /roles/kube-master/templates/secrets-encryption.yaml.j2: -------------------------------------------------------------------------------- 1 | kind: EncryptionConfig 2 | apiVersion: v1 3 | resources: 4 | - resources: 5 | - secrets 6 | providers: 7 | - {{ kube_encryption_algorithm }}: 8 | keys: 9 | - name: key 10 | secret: {{ kube_encrypt_token }} 11 | - identity: {} -------------------------------------------------------------------------------- /roles/kube-worker/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 判断 Docker 是否早已安装 2 | shell: > 3 | systemctl status docker | grep running || echo "not running" 4 | register: docker_already_running 5 | 6 | - name: 设置 container_manager_detected 变量 7 | set_fact: 8 | container_manager_detected: >- 9 | {%- if "active" in docker_already_running.stdout -%} 10 | docker 11 | {%- else -%} 12 | containerd 13 | {%- endif -%} 14 | 15 | - name: 创建 kubernetes 相关目录 16 | file: 17 | name: "{{ item }}" 18 | state: directory 19 | with_items: 20 | - "{{ kubelet_root_dir }}" 21 | - /etc/kubernetes 22 | - /usr/share/bash-completion/completions 23 | 24 | - name: 读取 kubelet.conf 文件 stat 信息 25 | stat: 26 | path: /etc/kubernetes/kubelet.conf 27 | register: kubelet_conf_stat 28 | 29 | - include_tasks: "{{ (role_path + '/../kube-master/tasks/kubeadm-config.yml') | realpath }}" 30 | when: 31 | - inventory_hostname in (groups['kube-worker'] + groups['new-worker']) 32 | - inventory_hostname not in (groups['kube-master'] + groups['new-master']) 33 | 34 | - block: 35 | - name: 确认 kubelet 已停止运行 36 | service: 37 | name: kubelet 38 | state: stopped 39 | enabled: yes 40 | 41 | - name: Worker 节点加入集群 42 | shell: > 43 | kubeadm join --config /etc/kubernetes/kubeadm-config.yaml 44 | --ignore-preflight-errors=DirAvailable--etc-kubernetes-manifests,FileAvailable--etc-kubernetes-pki-ca.crt 45 | 46 | when: 47 | - inventory_hostname in (groups['kube-worker'] + groups['new-worker']) 48 | - inventory_hostname not in (groups['kube-master'] + groups['new-master']) 49 | - not kubelet_conf_stat.stat.exists 50 | 51 | - block: 52 | - name: 确认 kubelet 配置是否有修改 53 | template: 54 | src: kubelet-config.v1beta1.yaml.j2 55 | dest: /var/lib/kubelet/config.yaml 56 | owner: root 57 | mode: 0644 58 | register: configuration_result 59 | 60 | - name: 启动/重启 kubelet 61 | service: 62 | name: kubelet 63 | state: restarted 64 | enabled: yes 65 | when: configuration_result.changed 66 | when: 67 | - inventory_hostname in (groups['kube-worker'] + groups['new-worker']) 68 | - inventory_hostname not in (groups['kube-master'] + groups['new-master']) 69 | 70 | # - name: 取消在 worker 组的 master 节点 taint,使 master 节点可以调度 71 | # shell: > 72 | # kubectl taint nodes {{inventory_hostname}} node-role.kubernetes.io/control-plane='':NoSchedule --overwrite && 73 | # kubectl taint nodes {{inventory_hostname}} node-role.kubernetes.io/control-plane- 74 | # delegate_to: "{{ groups['kube-master'][0] }}" 75 | # ignore_errors: yes 76 | # when: 77 | # - inventory_hostname in (groups['kube-worker'] + groups['new-worker']) 78 | # - inventory_hostname in (groups['kube-master'] + groups['new-master']) -------------------------------------------------------------------------------- /roles/kube-worker/templates/kubelet-config.v1beta1.yaml.j2: -------------------------------------------------------------------------------- 1 | apiVersion: kubelet.config.k8s.io/v1beta1 2 | kind: KubeletConfiguration 3 | address: 0.0.0.0 4 | authentication: 5 | anonymous: 6 | enabled: false 7 | webhook: 8 | cacheTTL: 2m0s 9 | enabled: true 10 | x509: 11 | clientCAFile: /etc/kubernetes/pki/ca.crt 12 | authorization: 13 | mode: Webhook 14 | webhook: 15 | cacheAuthorizedTTL: 5m0s 16 | cacheUnauthorizedTTL: 30s 17 | cgroupDriver: {{ kubelet_cgroup_driver|default(kubelet_cgroup_driver_detected) }} 18 | cgroupsPerQOS: true 19 | clusterDNS: 20 | - {{ cluster_dns_service_ip }} 21 | clusterDomain: {{ kube_dns_domain }} 22 | configMapAndSecretChangeDetectionStrategy: Watch 23 | containerLogMaxFiles: 5 24 | containerLogMaxSize: 10Mi 25 | contentType: application/vnd.kubernetes.protobuf 26 | cpuCFSQuota: true 27 | cpuCFSQuotaPeriod: 100ms 28 | cpuManagerPolicy: none 29 | cpuManagerReconcilePeriod: 10s 30 | enableControllerAttachDetach: true 31 | enableDebuggingHandlers: true 32 | enforceNodeAllocatable: 33 | - pods 34 | eventBurst: 10 35 | eventRecordQPS: 5 36 | evictionHard: 37 | imagefs.available: {{ eviction_hard_imagefs_available }} 38 | memory.available: {{ eviction_hard_memory_available }} 39 | nodefs.available: {{ eviction_hard_nodefs_available }} 40 | nodefs.inodesFree: {{ eviction_hard_nodefs_inodes_free }} 41 | kubeReserved: 42 | cpu: {{ kube_cpu_reserved }} 43 | memory: {{ kube_memory_reserved|regex_replace('Mi', 'M') }} 44 | {% if system_reserved_enabled is defined and system_reserved_enabled %} 45 | systemReserved: 46 | cpu: {{ system_cpu_reserved|default('500m') }} 47 | memory: {{ system_memory_reserved|default('512M')|regex_replace('Mi', 'M') }} 48 | ephemeral-storage: {{ system_ephemeral_storage_reserved|default('10Gi')|regex_replace('Gi', 'G') }} 49 | {% endif %} 50 | evictionPressureTransitionPeriod: 5m0s 51 | failSwapOn: true 52 | featureGates: 53 | RotateKubeletServerCertificate: true 54 | fileCheckFrequency: 20s 55 | hairpinMode: promiscuous-bridge 56 | healthzBindAddress: 127.0.0.1 57 | healthzPort: 10248 58 | httpCheckFrequency: 20s 59 | imageGCHighThresholdPercent: 85 60 | imageGCLowThresholdPercent: 80 61 | imageMinimumGCAge: 2m0s 62 | iptablesDropBit: 15 63 | iptablesMasqueradeBit: 14 64 | kubeAPIBurst: 10 65 | kubeAPIQPS: 5 66 | makeIPTablesUtilChains: true 67 | maxOpenFiles: 1000000 68 | maxPods: {{ kube_max_pods }} 69 | nodeLeaseDurationSeconds: 40 70 | nodeStatusReportFrequency: 1m0s 71 | nodeStatusUpdateFrequency: 10s 72 | oomScoreAdj: -999 73 | podPidsLimit: -1 74 | port: 10250 75 | protectKernelDefaults: true 76 | readOnlyPort: 0 77 | registryBurst: 10 78 | registryPullQPS: 5 79 | {% if ansible_distribution == "Ubuntu" and ansible_distribution_version is version('16.04', '>') %} 80 | resolvConf: /run/systemd/resolve/resolv.conf 81 | {% else %} 82 | resolvConf: /etc/resolv.conf 83 | {% endif %} 84 | rotateCertificates: true 85 | runtimeRequestTimeout: 2m0s 86 | serializeImagePulls: true 87 | staticPodPath: /etc/kubernetes/manifests 88 | streamingConnectionIdleTimeout: 4h0m0s 89 | syncFrequency: 1m0s 90 | tlsCertFile: /var/lib/kubelet/pki/kubelet.crt 91 | tlsPrivateKeyFile: /var/lib/kubelet/pki/kubelet.key 92 | volumeStatsAggPeriod: 1m0s -------------------------------------------------------------------------------- /roles/load-balancer/defaults/main.yml: -------------------------------------------------------------------------------- 1 | # 节点初始主备状态 2 | lb_keepalived_state: "{% if inventory_hostname == groups['lb'][0] %}MASTER{% else %}BACKUP{% endif %}" 3 | # 其他 Keepalived 节点的单点传播 IP 4 | lb_keepalived_unicast_peers: >- 5 | {%- for host in groups['lb'] -%} 6 | {%- if hostvars[host]['ansible_host'] is defined -%} 7 | '{{ hostvars[host]['ansible_host'] }}' 8 | {%- else -%} 9 | '{{ host }}' 10 | {%- endif -%} 11 | {%- if not loop.last -%},{%- endif -%} 12 | {%- endfor -%} 13 | -------------------------------------------------------------------------------- /roles/load-balancer/tasks/envoy.yml: -------------------------------------------------------------------------------- 1 | - name: Envoy lb | 生成 envoy lb 配置文件 2 | template: 3 | src: envoy/envoy.conf.yaml.j2 4 | dest: /etc/kubernetes/plugins/lb-config/envoy.yaml 5 | owner: root 6 | mode: 0644 7 | register: lb_configuration_result 8 | 9 | - name: Envoy lb | 拉取相关镜像 10 | shell: "{{ image_pull_command }} {{ item }}" 11 | with_items: 12 | - "{{ lb_envoy_image }}" 13 | - "{{ pod_infra_container_image }}" 14 | 15 | - name: Envoy lb | 生成 envoy lb static pod 文件 16 | template: 17 | src: envoy/envoy.yaml.j2 18 | dest: /etc/kubernetes/manifests/lb-kube-apiserver.yaml 19 | owner: root 20 | mode: 0644 -------------------------------------------------------------------------------- /roles/load-balancer/tasks/external.yml: -------------------------------------------------------------------------------- 1 | - name: 确认没有运行其他负载均衡器 2 | file: 3 | name: "{{ item }}" 4 | state: absent 5 | with_items: 6 | - /etc/kubernetes/plugins/lb-config 7 | - /etc/kubernetes/manifests/lb-keepalived.yaml 8 | - /etc/kubernetes/manifests/lb-kube-apiserver.yaml -------------------------------------------------------------------------------- /roles/load-balancer/tasks/haproxy.yml: -------------------------------------------------------------------------------- 1 | - name: Haproxy | 生成 haproxy 配置文件 2 | template: 3 | src: haproxy/haproxy.cfg.j2 4 | dest: /etc/kubernetes/plugins/lb-config/haproxy.cfg 5 | owner: root 6 | mode: 0644 7 | register: lb_configuration_result 8 | 9 | - name: Haproxy | 拉取相关镜像 10 | shell: "{{ image_pull_command }} {{ item }}" 11 | with_items: 12 | - "{{ lb_haproxy_image }}" 13 | - "{{ pod_infra_container_image }}" 14 | 15 | - name: Haproxy | 生成 haproxy static pod 文件 16 | template: 17 | src: haproxy/haproxy.yaml.j2 18 | dest: /etc/kubernetes/manifests/lb-kube-apiserver.yaml 19 | owner: root 20 | mode: 0644 -------------------------------------------------------------------------------- /roles/load-balancer/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include_tasks: external.yml 2 | when: lb_mode == "slb" 3 | 4 | - include_tasks: internal.yml 5 | when: 6 | - lb_mode != "slb" 7 | - (inventory_hostname in groups['lb']) or (groups['lb']|length == 0 and inventory_hostname in (groups['kube-master'] + groups['kube-worker'] + groups['new-master'] + groups['new-worker'])) -------------------------------------------------------------------------------- /roles/load-balancer/tasks/nginx.yml: -------------------------------------------------------------------------------- 1 | - name: Nginx lb | 生成 nginx lb 配置文件 2 | template: 3 | src: nginx/nginx.conf.j2 4 | dest: /etc/kubernetes/plugins/lb-config/nginx.conf 5 | owner: root 6 | mode: 0644 7 | register: lb_configuration_result 8 | 9 | - name: Nginx lb | 拉取相关镜像 10 | shell: "{{ image_pull_command }} {{ item }}" 11 | with_items: 12 | - "{{ lb_nginx_image }}" 13 | - "{{ pod_infra_container_image }}" 14 | 15 | - name: Nginx lb | 生成 nginx lb static pod 文件 16 | template: 17 | src: nginx/nginx.yaml.j2 18 | dest: /etc/kubernetes/manifests/lb-kube-apiserver.yaml 19 | owner: root 20 | mode: 0644 -------------------------------------------------------------------------------- /roles/load-balancer/tasks/openresty.yml: -------------------------------------------------------------------------------- 1 | - name: Openresty lb | 生成 openresty lb 配置文件 2 | template: 3 | src: openresty/openresty.conf.j2 4 | dest: /etc/kubernetes/plugins/lb-config/nginx.conf 5 | owner: root 6 | mode: 0644 7 | register: lb_configuration_result 8 | 9 | - name: Openresty lb | 拉取相关镜像 10 | shell: "{{ image_pull_command }} {{ item }}" 11 | with_items: 12 | - "{{ lb_openresty_image }}" 13 | - "{{ pod_infra_container_image }}" 14 | 15 | - name: Openresty lb | 生成 openresty lb static pod 文件 16 | template: 17 | src: openresty/openresty.yaml.j2 18 | dest: /etc/kubernetes/manifests/lb-kube-apiserver.yaml 19 | owner: root 20 | mode: 0644 -------------------------------------------------------------------------------- /roles/load-balancer/templates/20-kubelet-override.conf.j2: -------------------------------------------------------------------------------- 1 | [Service] 2 | ExecStart= 3 | ExecStart=/usr/bin/kubelet {{ kubelet_args_base }} 4 | Restart=always -------------------------------------------------------------------------------- /roles/load-balancer/templates/envoy/envoy.conf.yaml.j2: -------------------------------------------------------------------------------- 1 | {% if lb_kube_apiserver_healthcheck_port is defined %} 2 | admin: 3 | access_log_path: "/dev/null" 4 | address: 5 | socket_address: 6 | address: 127.0.0.1 7 | port_value: {{ lb_envoy_admin_address_port }} 8 | {% endif %} 9 | static_resources: 10 | listeners: 11 | - name: lb_kube_apiserver 12 | address: 13 | socket_address: 14 | address: {% if inventory_hostname in groups['lb'] %}0.0.0.0{% else %}127.0.0.1{% endif %} 15 | 16 | port_value: {{ lb_kube_apiserver_port }} 17 | filter_chains: 18 | - filters: 19 | - name: envoy.tcp_proxy 20 | config: 21 | stat_prefix: ingress_tcp 22 | cluster: kube_apiserver 23 | access_log: 24 | - name: envoy.file_access_log 25 | config: 26 | path: "/dev/stdout" 27 | {% if enabel_ingress_nodeport_lb | bool %} 28 | - name: ingress_http 29 | address: 30 | socket_address: 31 | address: 0.0.0.0 32 | port_value: 80 33 | filter_chains: 34 | - filters: 35 | - name: envoy.tcp_proxy 36 | config: 37 | stat_prefix: ingress_tcp 38 | cluster: ingress_http 39 | access_log: 40 | - name: envoy.file_access_log 41 | config: 42 | path: "/dev/stdout" 43 | {% endif %} 44 | {% if enabel_ingress_nodeport_lb | bool %} 45 | - name: ingress_https 46 | address: 47 | socket_address: 48 | address: 0.0.0.0 49 | port_value: 443 50 | filter_chains: 51 | - filters: 52 | - name: envoy.tcp_proxy 53 | config: 54 | stat_prefix: ingress_tcp 55 | cluster: ingress_https 56 | access_log: 57 | - name: envoy.file_access_log 58 | config: 59 | path: "/dev/stdout" 60 | {% endif %} 61 | {% if lb_kube_apiserver_healthcheck_port is defined %} 62 | - name: healthz 63 | address: 64 | socket_address: 65 | address: 127.0.0.1 66 | port_value: {{ lb_kube_apiserver_healthcheck_port }} 67 | filter_chains: 68 | - filters: 69 | - name: envoy.http_connection_manager 70 | config: 71 | access_log: 72 | - name: envoy.file_access_log 73 | config: 74 | path: "/dev/null" 75 | stat_prefix: ingress_http 76 | route_config: 77 | name: local_route 78 | virtual_hosts: 79 | - name: local_service 80 | domains: ["*"] 81 | routes: 82 | - match: 83 | prefix: "/healthz" 84 | route: 85 | prefix_rewrite: / 86 | cluster: admin 87 | http_filters: 88 | - name: envoy.router 89 | {% endif %} 90 | clusters: 91 | {% if lb_kube_apiserver_healthcheck_port is defined %} 92 | - name: admin 93 | connect_timeout: 0.5s 94 | type: strict_dns 95 | lb_policy: LEAST_REQUEST 96 | hosts: 97 | - socket_address: 98 | address: 127.0.0.1 99 | port_value: {{ lb_envoy_admin_address_port }} 100 | {% endif %} 101 | - name: kube_apiserver 102 | connect_timeout: 0.5s 103 | type: strict_dns 104 | lb_policy: LEAST_REQUEST 105 | hosts: 106 | {% for host in (groups['kube-master'] + groups['new-master']) %} 107 | - socket_address: 108 | address: {% if hostvars[host]['ansible_host'] is defined %}{{ hostvars[host]['ansible_host'] }}{% else %}{{ host }}{% endif %} 109 | 110 | port_value: 6443 111 | {% endfor %} 112 | -------------------------------------------------------------------------------- /roles/load-balancer/templates/envoy/envoy.yaml.j2: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: lb-envoy 5 | namespace: kube-system 6 | labels: 7 | addonmanager.kubernetes.io/mode: Reconcile 8 | k8s-app: lb-envoy 9 | annotations: 10 | checksum/config: {{ lb_configuration_result.checksum }} 11 | spec: 12 | hostNetwork: true 13 | dnsPolicy: ClusterFirstWithHostNet 14 | priorityClassName: system-cluster-critical 15 | containers: 16 | - name: lb-envoy 17 | image: {{ lb_envoy_image }} 18 | imagePullPolicy: IfNotPresent 19 | {% if lb_kube_apiserver_healthcheck_port is defined %} 20 | livenessProbe: 21 | periodSeconds: 3 22 | timeoutSeconds: 5 23 | failureThreshold: 5 24 | initialDelaySeconds: 10 25 | httpGet: 26 | host: 127.0.0.1 27 | path: /healthz 28 | port: {{ lb_kube_apiserver_healthcheck_port }} 29 | {% endif %} 30 | resources: {} 31 | volumeMounts: 32 | - mountPath: /etc/envoy/envoy.yaml 33 | name: envoy-lb-conf 34 | readOnly: true 35 | - mountPath: /etc/localtime 36 | name: localtime 37 | readOnly: true 38 | volumes: 39 | - name: envoy-lb-conf 40 | hostPath: 41 | path: /etc/kubernetes/plugins/lb-config/envoy.yaml 42 | type: File 43 | - hostPath: 44 | path: /etc/localtime 45 | type: File 46 | name: localtime -------------------------------------------------------------------------------- /roles/load-balancer/templates/haproxy/haproxy.cfg.j2: -------------------------------------------------------------------------------- 1 | global 2 | maxconn {{ 10000 * ansible_processor_vcpus|int }} 3 | log 127.0.0.1 local0 4 | 5 | defaults 6 | mode http 7 | log global 8 | option httplog 9 | option dontlognull 10 | option http-server-close 11 | option redispatch 12 | retries 5 13 | timeout http-request 5m 14 | timeout queue 5m 15 | timeout connect 30s 16 | timeout client 15m 17 | timeout server 15m 18 | timeout http-keep-alive 30s 19 | timeout check 30s 20 | maxconn {{ 10000 * ansible_processor_vcpus|int }} 21 | 22 | listen stats 23 | bind 0.0.0.0:{{ lb_haproxy_stats_bind_address }} 24 | mode http 25 | balance {{ lb_haproxy_balance_alg }} 26 | stats enable 27 | stats refresh {{ lb_haproxy_stats_refresh }}s 28 | stats uri {{ lb_haproxy_stats_uri }} 29 | stats auth {{ lb_haproxy_stats_user }}:{{ lb_haproxy_stats_password }} 30 | stats admin if TRUE 31 | 32 | {% if lb_kube_apiserver_healthcheck_port is defined -%} 33 | frontend healthz 34 | bind 127.0.0.1:{{ lb_kube_apiserver_healthcheck_port }} 35 | mode http 36 | monitor-uri /healthz 37 | {% endif %} 38 | 39 | frontend kube_api_frontend 40 | bind {% if inventory_hostname in groups['lb'] %}0.0.0.0{% else %}127.0.0.1{% endif %}:{{ lb_kube_apiserver_port }} 41 | mode tcp 42 | option tcplog 43 | default_backend kube_api_backend 44 | 45 | backend kube_api_backend 46 | mode tcp 47 | balance {{ lb_haproxy_balance_alg }} 48 | default-server inter 15s downinter 15s rise 2 fall 2 slowstart 60s maxconn 1000 maxqueue 256 weight 100 49 | option httpchk GET /healthz 50 | http-check expect status 200 51 | {% for host in (groups['kube-master'] + groups['new-master']) %} 52 | server {{ host }} {% if hostvars[host]['ansible_host'] is defined %}{{ hostvars[host]['ansible_host'] }}{% else %}{{ host }}{% endif %}:6443 check check-ssl verify none 53 | {% endfor %} 54 | -------------------------------------------------------------------------------- /roles/load-balancer/templates/haproxy/haproxy.yaml.j2: -------------------------------------------------------------------------------- 1 | kind: Pod 2 | apiVersion: v1 3 | metadata: 4 | name: lb-haproxy 5 | namespace: kube-system 6 | labels: 7 | addonmanager.kubernetes.io/mode: Reconcile 8 | k8s-app: lb-haproxy 9 | annotations: 10 | checksum/config: {{ lb_configuration_result.checksum }} 11 | spec: 12 | hostNetwork: true 13 | dnsPolicy: ClusterFirstWithHostNet 14 | priorityClassName: system-cluster-critical 15 | containers: 16 | - name: lb-nginx 17 | image: {{ lb_haproxy_image }} 18 | {% if lb_kube_apiserver_healthcheck_port is defined %} 19 | livenessProbe: 20 | periodSeconds: 3 21 | timeoutSeconds: 5 22 | failureThreshold: 5 23 | initialDelaySeconds: 10 24 | httpGet: 25 | host: 127.0.0.1 26 | path: /healthz 27 | port: {{ lb_kube_apiserver_healthcheck_port }} 28 | {% endif %} 29 | resources: {} 30 | volumeMounts: 31 | - name: haproxy-cfg 32 | readOnly: true 33 | mountPath: /usr/local/etc/haproxy/haproxy.cfg 34 | - mountPath: /etc/localtime 35 | name: localtime 36 | readOnly: true 37 | volumes: 38 | - name: haproxy-cfg 39 | hostPath: 40 | path: /etc/kubernetes/plugins/lb-config/haproxy.cfg 41 | type: File 42 | - hostPath: 43 | path: /etc/localtime 44 | type: File 45 | name: localtime -------------------------------------------------------------------------------- /roles/load-balancer/templates/keepalived.yaml.j2: -------------------------------------------------------------------------------- 1 | kind: Pod 2 | apiVersion: v1 3 | metadata: 4 | labels: 5 | addonmanager.kubernetes.io/mode: Reconcile 6 | k8s-app: lb-keepalived 7 | name: lb-keepalived 8 | namespace: kube-system 9 | spec: 10 | hostNetwork: true 11 | dnsPolicy: ClusterFirstWithHostNet 12 | priorityClassName: system-cluster-critical 13 | containers: 14 | - name: lb-keepalived 15 | image: {{ lb_keepalived_image }} 16 | env: 17 | - name: KEEPALIVED_VIRTUAL_IPS 18 | value: {{ kube_apiserver_ip | trim }} 19 | - name: KEEPALIVED_INTERFACE 20 | value: {{ lb_keepalived_interface.stdout }} 21 | - name: KEEPALIVED_UNICAST_PEERS 22 | value: "#PYTHON2BASH:[{{ lb_keepalived_unicast_peers }}]" 23 | - name: KEEPALIVED_PASSWORD 24 | value: {{ lb_keepalived_password }} 25 | - name: KEEPALIVED_STATE 26 | value: "{{ lb_keepalived_state }}" 27 | - name: KEEPALIVED_PRIORITY 28 | value: "{{ lb_keepalived_priority }}" 29 | - name: KEEPALIVED_ROUTER_ID 30 | value: "{{ lb_keepalived_router_id }}" 31 | {% if lb_kube_apiserver_healthcheck_port is defined %} 32 | # 检测当前节点 lb 是否存活,若不存活则重启 33 | livenessProbe: 34 | periodSeconds: 3 35 | timeoutSeconds: 15 36 | failureThreshold: 5 37 | initialDelaySeconds: 10 38 | httpGet: 39 | host: 127.0.0.1 40 | path: /healthz 41 | port: {{ lb_kube_apiserver_healthcheck_port }} 42 | {% endif %} 43 | resources: {} 44 | volumeMounts: 45 | - mountPath: /etc/localtime 46 | name: localtime 47 | readOnly: true 48 | securityContext: 49 | capabilities: 50 | add: 51 | - NET_RAW 52 | - NET_ADMIN 53 | - NET_BROADCAST 54 | volumes: 55 | - hostPath: 56 | path: /etc/localtime 57 | type: File 58 | name: localtime -------------------------------------------------------------------------------- /roles/load-balancer/templates/nginx/nginx.conf.j2: -------------------------------------------------------------------------------- 1 | error_log stderr notice; 2 | 3 | worker_processes 2; 4 | worker_rlimit_nofile 130048; 5 | worker_shutdown_timeout 10s; 6 | 7 | events { 8 | multi_accept on; 9 | use epoll; 10 | worker_connections 16384; 11 | } 12 | 13 | stream { 14 | upstream kube_apiserver { 15 | least_conn; 16 | {% for host in (groups['kube-master'] + groups['new-master']) %} 17 | server {% if hostvars[host]['ansible_host'] is defined %}{{ hostvars[host]['ansible_host'] }}{% else %}{{ host }}{% endif %}:6443; 18 | {% endfor %} 19 | } 20 | 21 | server { 22 | listen {% if inventory_hostname in groups['lb'] %}0.0.0.0{% else %}127.0.0.1{% endif %}:{{ lb_kube_apiserver_port }}; 23 | proxy_pass kube_apiserver; 24 | proxy_timeout 10m; 25 | proxy_connect_timeout 1s; 26 | } 27 | } 28 | 29 | http { 30 | aio threads; 31 | aio_write on; 32 | tcp_nopush on; 33 | tcp_nodelay on; 34 | 35 | keepalive_timeout 75s; 36 | keepalive_requests 100; 37 | reset_timedout_connection on; 38 | server_tokens off; 39 | autoindex off; 40 | 41 | {% if lb_kube_apiserver_healthcheck_port is defined %} 42 | server { 43 | listen 127.0.0.1:{{ lb_kube_apiserver_healthcheck_port }}; 44 | location /healthz { 45 | access_log off; 46 | return 200; 47 | } 48 | location /stub_status { 49 | stub_status on; 50 | access_log off; 51 | } 52 | } 53 | {% endif %} 54 | } -------------------------------------------------------------------------------- /roles/load-balancer/templates/nginx/nginx.yaml.j2: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: lb-nginx 5 | namespace: kube-system 6 | labels: 7 | addonmanager.kubernetes.io/mode: Reconcile 8 | k8s-app: lb-nginx 9 | annotations: 10 | checksum/config: {{ lb_configuration_result.checksum }} 11 | spec: 12 | hostNetwork: true 13 | dnsPolicy: ClusterFirstWithHostNet 14 | priorityClassName: system-cluster-critical 15 | containers: 16 | - name: lb-nginx 17 | image: {{ lb_nginx_image }} 18 | imagePullPolicy: IfNotPresent 19 | {% if lb_kube_apiserver_healthcheck_port is defined %} 20 | livenessProbe: 21 | periodSeconds: 3 22 | timeoutSeconds: 5 23 | failureThreshold: 5 24 | initialDelaySeconds: 10 25 | httpGet: 26 | host: 127.0.0.1 27 | path: /healthz 28 | port: {{ lb_kube_apiserver_healthcheck_port }} 29 | {% endif %} 30 | resources: {} 31 | volumeMounts: 32 | - mountPath: /etc/nginx/nginx.conf 33 | name: nginx-lb-conf 34 | readOnly: true 35 | - mountPath: /etc/localtime 36 | name: localtime 37 | readOnly: true 38 | volumes: 39 | - name: nginx-lb-conf 40 | hostPath: 41 | path: /etc/kubernetes/plugins/lb-config/nginx.conf 42 | type: File 43 | - hostPath: 44 | path: /etc/localtime 45 | type: File 46 | name: localtime -------------------------------------------------------------------------------- /roles/load-balancer/templates/openresty/openresty.conf.j2: -------------------------------------------------------------------------------- 1 | error_log stderr notice; 2 | 3 | worker_processes 2; 4 | worker_rlimit_nofile 130048; 5 | worker_shutdown_timeout 10s; 6 | 7 | events { 8 | multi_accept on; 9 | use epoll; 10 | worker_connections 16384; 11 | } 12 | 13 | stream { 14 | upstream kube_apiserver { 15 | least_conn; 16 | {% for host in (groups['kube-master'] + groups['new-master']) %} 17 | server {% if hostvars[host]['ansible_host'] is defined %}{{ hostvars[host]['ansible_host'] }}{% else %}{{ host }}{% endif %}:6443; 18 | {% endfor %} 19 | } 20 | 21 | server { 22 | listen {% if inventory_hostname in groups['lb'] %}0.0.0.0{% else %}127.0.0.1{% endif %}:{{ lb_kube_apiserver_port }}; 23 | proxy_pass kube_apiserver; 24 | proxy_timeout 10m; 25 | proxy_connect_timeout 1s; 26 | } 27 | } 28 | 29 | http { 30 | aio threads; 31 | aio_write on; 32 | tcp_nopush on; 33 | tcp_nodelay on; 34 | 35 | keepalive_timeout 75s; 36 | keepalive_requests 100; 37 | reset_timedout_connection on; 38 | server_tokens off; 39 | autoindex off; 40 | 41 | {% if lb_kube_apiserver_healthcheck_port is defined %} 42 | server { 43 | listen 127.0.0.1:{{ lb_kube_apiserver_healthcheck_port }}; 44 | location /healthz { 45 | access_log off; 46 | return 200; 47 | } 48 | location /stub_status { 49 | stub_status on; 50 | access_log off; 51 | } 52 | } 53 | {% endif %} 54 | } -------------------------------------------------------------------------------- /roles/load-balancer/templates/openresty/openresty.yaml.j2: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: lb-openresty 5 | namespace: kube-system 6 | labels: 7 | addonmanager.kubernetes.io/mode: Reconcile 8 | k8s-app: lb-openresty 9 | annotations: 10 | checksum/config: {{ lb_configuration_result.checksum }} 11 | spec: 12 | hostNetwork: true 13 | dnsPolicy: ClusterFirstWithHostNet 14 | priorityClassName: system-cluster-critical 15 | containers: 16 | - name: lb-openresty 17 | image: {{ lb_openresty_image }} 18 | imagePullPolicy: IfNotPresent 19 | {% if lb_kube_apiserver_healthcheck_port is defined %} 20 | livenessProbe: 21 | periodSeconds: 3 22 | timeoutSeconds: 5 23 | failureThreshold: 5 24 | initialDelaySeconds: 10 25 | httpGet: 26 | host: 127.0.0.1 27 | path: /healthz 28 | port: {{ lb_kube_apiserver_healthcheck_port }} 29 | {% endif %} 30 | resources: {} 31 | volumeMounts: 32 | - mountPath: /usr/local/openresty/nginx/conf/nginx.conf 33 | name: openresty-lb-conf 34 | readOnly: true 35 | - mountPath: /etc/localtime 36 | name: localtime 37 | readOnly: true 38 | volumes: 39 | - name: openresty-lb-conf 40 | hostPath: 41 | path: /etc/kubernetes/plugins/lb-config/nginx.conf 42 | type: File 43 | - hostPath: 44 | path: /etc/localtime 45 | type: File 46 | name: localtime -------------------------------------------------------------------------------- /roles/plugins/cert-manager/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - block: 2 | - name: 在第一台 master 节点创建 cert-manager 配置文件目录 3 | file: 4 | path: /etc/kubernetes/plugins/cert-manager 5 | state: directory 6 | 7 | - name: 渲染 cert-manager 配置文件 8 | template: 9 | src: cert-manager.yaml.j2 10 | dest: /etc/kubernetes/plugins/cert-manager/cert-manager.yaml 11 | register: cert_manager_manifest 12 | 13 | - name: 部署 cert-manager 14 | shell: > 15 | kubectl apply -f /etc/kubernetes/plugins/cert-manager/cert-manager.yaml 16 | register: apply_status 17 | until: apply_status.rc == 0 18 | retries: 12 19 | delay: 10 20 | when: cert_manager_manifest.changed 21 | 22 | - name: 轮询等待 cert-manager 运行 23 | shell: kubectl get pod --all-namespaces -o wide | grep 'cert-manager' | awk '{print $4}' 24 | register: pod_status 25 | until: "'Running' in pod_status.stdout" 26 | retries: 12 27 | delay: 5 28 | ignore_errors: true 29 | when: wait_plugins_ready|bool 30 | 31 | when: inventory_hostname == groups['kube-master'][0] and cert_manager_enabled|bool -------------------------------------------------------------------------------- /roles/plugins/ingress-controller/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - block: 2 | - name: 在第一台 master 节点创建 ingress-controller 配置文件目录 3 | file: 4 | path: /etc/kubernetes/plugins/ingress-controller 5 | state: directory 6 | - name: 获取当前 kubernetes 版本 7 | shell: "kubeadm version -o short" 8 | register: kubeadm_version_output 9 | - include_tasks: nginx-ingress-controller.yml 10 | when: ingress_controller_tpye == 'nginx' 11 | - include_tasks: traefik-ingress-controller.yml 12 | when: ingress_controller_tpye == 'traefik' 13 | 14 | when: inventory_hostname == groups['kube-master'][0] and ingress_controller_enabled|bool -------------------------------------------------------------------------------- /roles/plugins/ingress-controller/tasks/nginx-ingress-controller.yml: -------------------------------------------------------------------------------- 1 | - block: 2 | - name: 渲染 nginx-ingress-controller 配置文件 3 | template: 4 | src: nginx-ingress-controller.yaml.j2 5 | dest: /etc/kubernetes/plugins/ingress-controller/nginx-ingress-controller.yaml 6 | register: ingress_nginx_manifest 7 | 8 | - name: 部署 nginx-ingress-controller 9 | shell: kubectl apply -f /etc/kubernetes/plugins/ingress-controller/nginx-ingress-controller.yaml 10 | when: ingress_nginx_manifest.changed 11 | 12 | - name: 轮询等待 nginx-ingress-controller 运行 13 | shell: kubectl get pod --all-namespaces -o wide | grep 'ingress-nginx' | awk '{print $4}' 14 | register: pod_status 15 | until: "'Running' in pod_status.stdout" 16 | retries: 24 17 | delay: 5 18 | ignore_errors: true 19 | when: wait_plugins_ready|bool 20 | 21 | when: inventory_hostname == groups['kube-master'][0] -------------------------------------------------------------------------------- /roles/plugins/ingress-controller/tasks/traefik-ingress-controller.yml: -------------------------------------------------------------------------------- 1 | - block: 2 | - name: 渲染 traefik-ingress-controller crds 配置文件 3 | template: 4 | src: traefik-ingress-controller/crds.yaml.j2 5 | dest: /etc/kubernetes/plugins/ingress-controller/crds.yaml 6 | register: ingress_traefik_crd_manifest 7 | 8 | - name: 渲染 traefik-ingress-controller 配置文件 9 | template: 10 | src: traefik-ingress-controller/traefik-ingress-controller.yaml.j2 11 | dest: /etc/kubernetes/plugins/ingress-controller/traefik-ingress-controller.yaml 12 | register: ingress_traefik_manifest 13 | 14 | # - name: 创建 traefik-ingress-controller 默认证书 15 | # shell: > 16 | # openssl req -x509 -nodes -days {{ traefik_certs_expired }} \ 17 | # -newkey rsa:2048 \ 18 | # -keyout /etc/kubernetes/plugins/ingress-controller/tls.key \ 19 | # -out /etc/kubernetes/plugins/ingress-controller/tls.crt \ 20 | # -subj "/CN=timebye.github.io" 21 | 22 | # - name: 应用 traefik-ingress-controller 默认证书 23 | # shell: > 24 | # kubectl create ns ingress-controller 25 | # --dry-run -o yaml | kubectl apply -f - && 26 | # kubectl -n ingress-controller create secret tls \ 27 | # traefik-default-cert \ 28 | # --key=/etc/kubernetes/plugins/ingress-controller/tls.key \ 29 | # --cert=/etc/kubernetes/plugins/ingress-controller/tls.crt \ 30 | # --dry-run -o yaml | kubectl apply -f - 31 | 32 | - name: 部署 traefik-ingress-controller crds 33 | shell: kubectl apply -f /etc/kubernetes/plugins/ingress-controller/crds.yaml 34 | when: ingress_traefik_crd_manifest.changed 35 | 36 | - name: 部署 traefik-ingress-controller 37 | shell: kubectl apply -f /etc/kubernetes/plugins/ingress-controller/traefik-ingress-controller.yaml 38 | when: ingress_traefik_manifest.changed 39 | 40 | - name: 轮询等待 traefik-ingress-controller 运行 41 | shell: kubectl get pod --all-namespaces -o wide | grep 'traefik' | awk '{print $4}' 42 | register: pod_status 43 | until: "'Running' in pod_status.stdout" 44 | retries: 12 45 | delay: 5 46 | ignore_errors: true 47 | when: wait_plugins_ready|bool 48 | 49 | when: inventory_hostname == groups['kube-master'][0] -------------------------------------------------------------------------------- /roles/plugins/ingress-controller/templates/traefik-ingress-controller/crds.yaml.j2: -------------------------------------------------------------------------------- 1 | # ref: https://docs.traefik.io/providers/kubernetes-crd/ 2 | 3 | # All resources definition must be declared 4 | apiVersion: apiextensions.k8s.io/v1beta1 5 | kind: CustomResourceDefinition 6 | metadata: 7 | name: ingressroutes.traefik.containo.us 8 | 9 | spec: 10 | group: traefik.containo.us 11 | version: v1alpha1 12 | names: 13 | kind: IngressRoute 14 | plural: ingressroutes 15 | singular: ingressroute 16 | scope: Namespaced 17 | 18 | --- 19 | apiVersion: apiextensions.k8s.io/v1beta1 20 | kind: CustomResourceDefinition 21 | metadata: 22 | name: middlewares.traefik.containo.us 23 | 24 | spec: 25 | group: traefik.containo.us 26 | version: v1alpha1 27 | names: 28 | kind: Middleware 29 | plural: middlewares 30 | singular: middleware 31 | scope: Namespaced 32 | 33 | --- 34 | apiVersion: apiextensions.k8s.io/v1beta1 35 | kind: CustomResourceDefinition 36 | metadata: 37 | name: ingressroutetcps.traefik.containo.us 38 | 39 | spec: 40 | group: traefik.containo.us 41 | version: v1alpha1 42 | names: 43 | kind: IngressRouteTCP 44 | plural: ingressroutetcps 45 | singular: ingressroutetcp 46 | scope: Namespaced 47 | 48 | --- 49 | apiVersion: apiextensions.k8s.io/v1beta1 50 | kind: CustomResourceDefinition 51 | metadata: 52 | name: ingressrouteudps.traefik.containo.us 53 | 54 | spec: 55 | group: traefik.containo.us 56 | version: v1alpha1 57 | names: 58 | kind: IngressRouteUDP 59 | plural: ingressrouteudps 60 | singular: ingressrouteudp 61 | scope: Namespaced 62 | 63 | --- 64 | apiVersion: apiextensions.k8s.io/v1beta1 65 | kind: CustomResourceDefinition 66 | metadata: 67 | name: tlsoptions.traefik.containo.us 68 | 69 | spec: 70 | group: traefik.containo.us 71 | version: v1alpha1 72 | names: 73 | kind: TLSOption 74 | plural: tlsoptions 75 | singular: tlsoption 76 | scope: Namespaced 77 | 78 | --- 79 | apiVersion: apiextensions.k8s.io/v1beta1 80 | kind: CustomResourceDefinition 81 | metadata: 82 | name: tlsstores.traefik.containo.us 83 | 84 | spec: 85 | group: traefik.containo.us 86 | version: v1alpha1 87 | names: 88 | kind: TLSStore 89 | plural: tlsstores 90 | singular: tlsstore 91 | scope: Namespaced 92 | 93 | --- 94 | apiVersion: apiextensions.k8s.io/v1beta1 95 | kind: CustomResourceDefinition 96 | metadata: 97 | name: traefikservices.traefik.containo.us 98 | 99 | spec: 100 | group: traefik.containo.us 101 | version: v1alpha1 102 | names: 103 | kind: TraefikService 104 | plural: traefikservices 105 | singular: traefikservice 106 | scope: Namespaced 107 | 108 | --- 109 | apiVersion: apiextensions.k8s.io/v1beta1 110 | kind: CustomResourceDefinition 111 | metadata: 112 | name: serverstransports.traefik.containo.us 113 | 114 | spec: 115 | group: traefik.containo.us 116 | version: v1alpha1 117 | names: 118 | kind: ServersTransport 119 | plural: serverstransports 120 | singular: serverstransport 121 | scope: Namespaced -------------------------------------------------------------------------------- /roles/plugins/kubernetes-dashboard/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - block: 2 | - name: 在第一台 master 节点创建 kubernetes-dashboard 配置文件目录 3 | file: 4 | path: /etc/kubernetes/plugins/kubernetes-dashboard 5 | state: directory 6 | 7 | - name: 创建 kubernetes-dashboard 默认证书 8 | shell: > 9 | openssl req -x509 -nodes -days {{ kubernetesui_dashboard_certs_expired }} \ 10 | -newkey rsa:2048 \ 11 | -keyout /etc/kubernetes/plugins/kubernetes-dashboard/dashboard.key \ 12 | -out /etc/kubernetes/plugins/kubernetes-dashboard/dashboard.crt \ 13 | -subj "/CN=timebye.github.io" 14 | 15 | - name: 获取 kubernetes-dashboard 默认证书 16 | slurp: 17 | src: /etc/kubernetes/plugins/kubernetes-dashboard/{{ item }} 18 | with_items: 19 | - dashboard.key 20 | - dashboard.crt 21 | register: dashboard_certs 22 | 23 | - name: 渲染 kubernetes-dashboard 配置文件 24 | template: 25 | src: kubernetes-dashboard.yaml.j2 26 | dest: /etc/kubernetes/plugins/kubernetes-dashboard/kubernetes-dashboard.yaml 27 | register: kubernetes_dashboard_manifest 28 | 29 | - name: 部署 kubernetes-dashboard 30 | shell: kubectl apply -f /etc/kubernetes/plugins/kubernetes-dashboard/kubernetes-dashboard.yaml 31 | when: kubernetes_dashboard_manifest.changed 32 | 33 | - name: 轮询等待 kubernetes-dashboard 运行 34 | shell: kubectl get pod --all-namespaces -o wide | grep 'kubernetes-dashboard' | awk '{print $4}' 35 | register: pod_status 36 | until: "'Running' in pod_status.stdout" 37 | retries: 12 38 | delay: 5 39 | ignore_errors: true 40 | when: wait_plugins_ready|bool 41 | 42 | when: inventory_hostname == groups['kube-master'][0] and kubernetesui_dashboard_enabled|bool -------------------------------------------------------------------------------- /roles/plugins/metrics-server/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - block: 2 | - name: 在第一台 master 节点创建 metrics-server 配置文件目录 3 | file: 4 | path: /etc/kubernetes/plugins/metrics-server 5 | state: directory 6 | 7 | - name: 渲染 metrics-server 配置文件 8 | template: 9 | src: metrics-server.yaml.j2 10 | dest: /etc/kubernetes/plugins/metrics-server/metrics-server.yaml 11 | register: metrics_server_manifest 12 | 13 | - name: 部署 metrics-server 14 | shell: kubectl apply -f /etc/kubernetes/plugins/metrics-server/metrics-server.yaml 15 | when: metrics_server_manifest.changed 16 | 17 | - name: 轮询等待 metrics-server 运行 18 | shell: kubectl get pod --all-namespaces -o wide | grep 'metrics-server' | awk '{print $4}' 19 | register: pod_status 20 | until: "'Running' in pod_status.stdout" 21 | retries: 12 22 | delay: 5 23 | ignore_errors: true 24 | when: wait_plugins_ready|bool 25 | 26 | when: inventory_hostname == groups['kube-master'][0] and metrics_server_enabled|bool -------------------------------------------------------------------------------- /roles/plugins/network-plugins/tasks/calico.yml: -------------------------------------------------------------------------------- 1 | - name: 渲染 calico 配置文件 2 | template: 3 | src: calico/calico-typha.yaml.j2 4 | dest: /etc/kubernetes/plugins/network-plugin/calico-typha.yaml 5 | register: calico_typha_manifest 6 | 7 | - name: 渲染 calicoctl 配置文件 8 | template: 9 | src: calico/calicoctl-daemonset.yaml.j2 10 | dest: /etc/kubernetes/plugins/network-plugin/calicoctl-daemonset.yaml 11 | register: calicoctl_manifest 12 | 13 | - name: 部署 calico 14 | shell: kubectl apply -f /etc/kubernetes/plugins/network-plugin/calico-typha.yaml 15 | when: calico_typha_manifest.changed 16 | 17 | - name: 部署 calicoctl 18 | shell: kubectl apply -f /etc/kubernetes/plugins/network-plugin/calicoctl-daemonset.yaml 19 | when: calicoctl_manifest.changed 20 | 21 | - name: 轮询等待 calico 运行 22 | shell: kubectl get pod --all-namespaces -o wide | grep 'calico' | awk '{print $4}' 23 | register: pod_status 24 | until: "'Running' in pod_status.stdout" 25 | retries: 12 26 | delay: 5 27 | ignore_errors: true 28 | when: wait_plugins_ready|bool -------------------------------------------------------------------------------- /roles/plugins/network-plugins/tasks/flannel.yml: -------------------------------------------------------------------------------- 1 | - name: 渲染 flannel 配置文件 2 | template: 3 | src: kube-flannel.yaml.j2 4 | dest: /etc/kubernetes/plugins/network-plugin/kube-flannel.yaml 5 | register: flannel_manifest 6 | 7 | - name: 部署 flannel 8 | shell: kubectl apply -f /etc/kubernetes/plugins/network-plugin/kube-flannel.yaml 9 | when: flannel_manifest.changed 10 | 11 | - name: 轮询等待 flannel 运行 12 | shell: kubectl get pod --all-namespaces -o wide | grep 'flannel' | awk '{print $4}' 13 | register: pod_status 14 | until: "'Running' in pod_status.stdout" 15 | retries: 12 16 | delay: 5 17 | ignore_errors: true 18 | when: wait_plugins_ready|bool -------------------------------------------------------------------------------- /roles/plugins/network-plugins/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - block: 2 | - name: 创建 cni 相关目录 3 | file: 4 | name: /etc/kubernetes/plugins/network-plugin 5 | state: directory 6 | 7 | - include_tasks: flannel.yml 8 | when: network_plugin == 'flannel' 9 | 10 | - include_tasks: calico.yml 11 | when: network_plugin == 'calico' 12 | 13 | when: 14 | - network_plugins_enabled|bool 15 | - inventory_hostname == groups['kube-master'][0] -------------------------------------------------------------------------------- /roles/plugins/network-plugins/templates/calico/calicoctl-daemonset.yaml.j2: -------------------------------------------------------------------------------- 1 | # ref: https://docs.projectcalico.org/archive/v3.19/manifests/calicoctl.yaml 2 | # Calico Version v3.19.1 3 | # https://docs.projectcalico.org/archive/v3.19/releases#v3.19.1 4 | # This manifest includes the following component versions: 5 | # calico/ctl:v3.19.1 6 | 7 | apiVersion: v1 8 | kind: ServiceAccount 9 | metadata: 10 | name: calicoctl 11 | namespace: kube-system 12 | 13 | --- 14 | 15 | kind: ClusterRole 16 | apiVersion: rbac.authorization.k8s.io/v1 17 | metadata: 18 | name: calicoctl 19 | rules: 20 | - apiGroups: [""] 21 | resources: 22 | - namespaces 23 | - nodes 24 | verbs: 25 | - get 26 | - list 27 | - update 28 | - apiGroups: [""] 29 | resources: 30 | - nodes/status 31 | verbs: 32 | - update 33 | - apiGroups: [""] 34 | resources: 35 | - pods 36 | - serviceaccounts 37 | verbs: 38 | - get 39 | - list 40 | - apiGroups: [""] 41 | resources: 42 | - pods/status 43 | verbs: 44 | - update 45 | - apiGroups: ["crd.projectcalico.org"] 46 | resources: 47 | - bgppeers 48 | - bgpconfigurations 49 | - clusterinformations 50 | - felixconfigurations 51 | - globalnetworkpolicies 52 | - globalnetworksets 53 | - ippools 54 | - kubecontrollersconfigurations 55 | - networkpolicies 56 | - networksets 57 | - hostendpoints 58 | - ipamblocks 59 | - blockaffinities 60 | - ipamhandles 61 | - ipamconfigs 62 | verbs: 63 | - create 64 | - get 65 | - list 66 | - update 67 | - delete 68 | - apiGroups: ["networking.k8s.io"] 69 | resources: 70 | - networkpolicies 71 | verbs: 72 | - get 73 | - list 74 | 75 | --- 76 | 77 | apiVersion: rbac.authorization.k8s.io/v1 78 | kind: ClusterRoleBinding 79 | metadata: 80 | name: calicoctl 81 | roleRef: 82 | apiGroup: rbac.authorization.k8s.io 83 | kind: ClusterRole 84 | name: calicoctl 85 | subjects: 86 | - kind: ServiceAccount 87 | name: calicoctl 88 | namespace: kube-system 89 | 90 | --- 91 | 92 | kind: DaemonSet 93 | apiVersion: apps/v1 94 | metadata: 95 | name: calicoctl 96 | namespace: kube-system 97 | spec: 98 | selector: 99 | matchLabels: 100 | k8s-app: calicoctl 101 | template: 102 | metadata: 103 | labels: 104 | k8s-app: calicoctl 105 | spec: 106 | serviceAccountName: calicoctl 107 | hostNetwork: true 108 | hostPID: true 109 | tolerations: 110 | - operator: Exists 111 | effect: NoSchedule 112 | containers: 113 | - name: calicoctl 114 | image: {{ calicoctl_image }} 115 | stdin: true 116 | tty: true 117 | command: 118 | - /calicoctl 119 | args: 120 | - version 121 | - --poll=1m 122 | volumeMounts: 123 | - name: var-run-calico 124 | mountPath: /var/run/calico 125 | - name: var-run-bird 126 | mountPath: /var/run/bird 127 | env: 128 | - name: DATASTORE_TYPE 129 | value: kubernetes 130 | volumes: 131 | - name: var-run-calico 132 | hostPath: 133 | path: /var/run/calico 134 | - name: var-run-bird 135 | hostPath: 136 | path: /var/run/bird -------------------------------------------------------------------------------- /roles/post/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 获取当前集群节点信息 2 | shell: kubectl get node -o name 3 | delegate_to: "{{ groups['kube-master'][0] }}" 4 | ignore_errors: true 5 | run_once: true 6 | register: kubectl_get_node_output 7 | 8 | - block: 9 | - name: 取消节点原有角色标签 10 | shell: > 11 | kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/control-plane- && 12 | kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/master- && 13 | kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/worker- && 14 | kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/lb- && 15 | kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/etcd- 16 | 17 | - name: 设置控制面节点 role 18 | shell: > 19 | kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/control-plane='' --overwrite && 20 | kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/master='' --overwrite 21 | when: inventory_hostname in (groups['kube-master'] + groups['new-master']) 22 | 23 | - name: 设置 worker 节点 role 24 | shell: > 25 | kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/worker='' --overwrite 26 | when: inventory_hostname in (groups['kube-worker'] + groups['new-worker']) 27 | 28 | - name: 设置 load balancer 节点 role 29 | shell: > 30 | kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/lb='' --overwrite 31 | when: inventory_hostname in groups['lb'] 32 | 33 | - name: 设置 etcd 节点 role 34 | shell: > 35 | kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/etcd='' --overwrite 36 | when: inventory_hostname in (groups['etcd'] + groups['new-etcd']) 37 | 38 | ignore_errors: true 39 | delegate_to: "{{ groups['kube-master'][0] }}" 40 | when: inventory_hostname in kubectl_get_node_output.stdout -------------------------------------------------------------------------------- /roles/prepare/base/tasks/centos.yml: -------------------------------------------------------------------------------- 1 | # - name: 删除centos默认安装 2 | # yum: 3 | # name: 4 | # - firewalld 5 | # - python-firewall 6 | # - firewalld-filesystem 7 | # state: absent 8 | # disablerepo: "*" 9 | 10 | - block: 11 | - name: 判断 firewalld 是否安装 12 | shell: > 13 | systemctl status firewalld | grep active || echo "not be found" 14 | register: firewalld_already_installed 15 | 16 | - name: 禁用防火墙 17 | service: 18 | name: firewalld 19 | state: stopped 20 | enabled: no 21 | when: '"active" in firewalld_already_installed.stdout' 22 | when: "(firewalld_disabled | bool) | default(true)" 23 | 24 | - name: 设置 yum obsoletes 值为 0 25 | lineinfile: 26 | path: /etc/yum.conf 27 | regexp: '^obsoletes' 28 | line: obsoletes=0 29 | 30 | - name: 清除 yum 缓存 31 | command: yum clean all 32 | # args: 33 | # warn: no 34 | 35 | - block: 36 | - name: 备份原有 yum 源 37 | shell: mv /etc/yum.repos.d /etc/yum.repos.d.orig.$(date '+%Y%m%dT%H%M%S') 38 | ignore_errors: true 39 | 40 | - name: 创建相关目录 41 | file: 42 | path: /etc/yum.repos.d 43 | state: directory 44 | 45 | - name: 添加基础 yum 仓库 46 | yum_repository: 47 | name: base 48 | description: Custom Repository 49 | baseurl: "{{ base_yum_repo }}" 50 | enabled: yes 51 | gpgcheck: no 52 | state: present 53 | when: base_yum_repo is defined and base_yum_repo != None 54 | 55 | - name: 添加 epel 仓库 56 | yum_repository: 57 | name: epel 58 | file: epel 59 | description: Extra Packages for Enterprise Linux $releasever - $basearch 60 | baseurl: "{{ epel_yum_repo }}" 61 | enabled: yes 62 | gpgcheck: no 63 | state: present 64 | 65 | - name: 安装基础软件包 66 | yum: 67 | name: 68 | - jq # 轻量JSON处理程序 69 | - git 70 | - htop 71 | - lvm2 # docker会用到 72 | - curl # 基础工具 73 | - wget 74 | - audit 75 | - iotop 76 | - ipset # ipvs 模式需要 77 | - socat # 用于port forwarding 78 | - sysstat 79 | - ipvsadm # ipvs 模式需要 80 | - nmap-ncat # 使用lb时进行端口判断时会用到 81 | - nfs-utils # 挂载nfs 共享文件需要 (创建基于 nfs的 PV 需要) 82 | - iscsi-initiator-utils # iSCSI 服务端及管理命令 (管理 IP SAN。有 externel volume provisioner 时依赖) 83 | - yum-utils # 基础工具 84 | - net-tools 85 | - libseccomp 86 | - conntrack-tools # ipvs 模式需要 87 | - bash-completion # bash命令补全工具,需要重新登录服务器生效 88 | - device-mapper-persistent-data # docker会用到 89 | state: latest 90 | 91 | - name: 重新启动 crond 避免因修改时区而导致的作业错乱问题 92 | service: 93 | name: crond 94 | state: restarted 95 | enabled: yes 96 | 97 | - name: 临时关闭 selinux 98 | shell: "setenforce 0" 99 | failed_when: false 100 | 101 | - name: 永久关闭 selinux 102 | lineinfile: 103 | dest: /etc/selinux/config 104 | regexp: "^SELINUX=" 105 | line: "SELINUX=disabled" -------------------------------------------------------------------------------- /roles/prepare/base/tasks/common.yml: -------------------------------------------------------------------------------- 1 | - name: "统一时区为 {{ timezone }}" 2 | timezone: 3 | name: "{{ timezone }}" 4 | 5 | - name: 禁用系统 swap 6 | shell: "swapoff -a && sysctl -w vm.swappiness=0" 7 | ignore_errors: true 8 | 9 | - name: 删除 fstab swap 相关配置 10 | lineinfile: 11 | path: /etc/fstab 12 | regexp: 'swap' 13 | state: absent 14 | backup: yes 15 | 16 | - name: 加载内核模块 17 | modprobe: 18 | name: "{{ item }}" 19 | state: present 20 | with_items: 21 | - sunrpc 22 | - ip_vs 23 | - ip_vs_rr 24 | - ip_vs_sh 25 | - ip_vs_wrr 26 | - br_netfilter 27 | ignore_errors: true 28 | 29 | - name: 加载 nf_conntrack_ipv4 30 | modprobe: 31 | name: nf_conntrack_ipv4 32 | state: present 33 | register: modprobe_nf_conntrack_ipv4 34 | ignore_errors: true 35 | 36 | - name: 加载 nf_conntrack 37 | modprobe: 38 | name: nf_conntrack 39 | state: present 40 | when: modprobe_nf_conntrack_ipv4 is not success 41 | ignore_errors: true 42 | 43 | - name: 设置 systemd-modules-load 配置 44 | template: 45 | src: 10-k8s-modules.conf.j2 46 | dest: /etc/modules-load.d/10-k8s-modules.conf 47 | 48 | - name: 启动/重启 systemd-modules-load 49 | service: 50 | name: systemd-modules-load 51 | state: restarted 52 | enabled: yes 53 | 54 | - name: 设置系统参数 55 | template: 56 | src: 95-k8s-sysctl.conf.j2 57 | dest: /etc/sysctl.d/95-k8s-sysctl.conf 58 | 59 | - name: 生效系统参数 60 | shell: "sysctl -p /etc/sysctl.d/95-k8s-sysctl.conf" 61 | ignore_errors: true 62 | 63 | - name: 优化 nfs clinet 配置 64 | template: 65 | src: sunrpc.conf.j2 66 | dest: /etc/modprobe.d/sunrpc.conf 67 | 68 | - name: 生效 nfs clinet 配置 69 | shell: "sysctl -w sunrpc.tcp_slot_table_entries=128" 70 | ignore_errors: true 71 | 72 | - name: 添加集群节点 hostname 信息到 hosts 文件中 73 | blockinfile: 74 | path: /etc/hosts 75 | block: |- 76 | {% for item in (groups['all']|unique) -%} 77 | {% if hostvars[item]['ansible_host'] is defined -%} 78 | {{ hostvars[item]['ansible_host'] }} {{ hostvars[item]['inventory_hostname'] }} {{ hostvars[item]['inventory_hostname'] }}.{{ kube_dns_domain }} 79 | {% endif %} 80 | {% endfor %} 81 | {% for custom_hosts_ip, custom_hosts_addr in custom_hosts.items() %} 82 | {{ custom_hosts_ip }} {{ ([ custom_hosts_addr ] | flatten ) | join(' ') }} 83 | {% endfor %} 84 | state: present 85 | create: yes 86 | backup: yes 87 | marker: "# Ansible inventory hosts {mark}" 88 | 89 | - name: 确认 hosts 文件中 localhost ipv4 配置正确 90 | lineinfile: 91 | dest: /etc/hosts 92 | line: "127.0.0.1 localhost localhost.localdomain" 93 | regexp: '^127.0.0.1.*$' 94 | state: present 95 | 96 | - name: 确认 hosts 文件中 localhost ipv6 配置正确 97 | lineinfile: 98 | dest: /etc/hosts 99 | line: "::1 localhost6 localhost6.localdomain" 100 | regexp: '^::1.*$' 101 | state: present 102 | 103 | - name: 创建 systemd 配置目录 104 | file: 105 | name: /etc/systemd/system.conf.d 106 | state: directory 107 | 108 | - name: 设置系统 ulimits 109 | template: 110 | src: 30-k8s-ulimits.conf.j2 111 | dest: /etc/systemd/system.conf.d/30-k8s-ulimits.conf 112 | -------------------------------------------------------------------------------- /roles/prepare/base/tasks/debian.yml: -------------------------------------------------------------------------------- 1 | # # 删除默认安装 2 | # - name: 删除ubuntu默认安装 3 | # apt: 4 | # name: 5 | # - ufw 6 | # - lxd 7 | # - lxd-client 8 | # - lxcfs 9 | # - lxc-common 10 | # state: absent 11 | 12 | - block: 13 | - name: 判断 ufw 是否安装 14 | shell: > 15 | systemctl status ufw | grep active || echo "not be found" 16 | register: ufw_already_installed 17 | 18 | - name: 禁用防火墙 19 | service: 20 | name: ufw 21 | state: stopped 22 | enabled: no 23 | when: '"active" in ufw_already_installed.stdout' 24 | when: "(firewalld_disabled | bool) | default(true)" 25 | 26 | - name: iptables 工具切换到 “旧版” 模式 27 | shell: "{{ item }} || true" 28 | with_items: 29 | - update-alternatives --set iptables /usr/sbin/iptables-legacy 30 | - update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy 31 | - update-alternatives --set arptables /usr/sbin/arptables-legacy 32 | - update-alternatives --set ebtables /usr/sbin/ebtables-legacy 33 | when: >- 34 | (ansible_distribution == "Debian" and ansible_distribution_version is version('10', '>=')) or 35 | (ansible_distribution == "Ubuntu" and ansible_distribution_version is version('19', '>=')) 36 | 37 | - name: 添加基础 Apt 仓库 38 | apt_repository: 39 | repo: "{{ base_apt_repo }}" 40 | state: present 41 | when: base_apt_repo is defined and base_apt_repo != None 42 | 43 | # Ubuntu 安装基础软件包 44 | - name: 安装 ubuntu 基础软件 45 | apt: 46 | name: 47 | - jq # 轻量JSON处理程序,安装docker查询镜像需要 48 | - git 49 | - htop 50 | - curl # 基础工具 51 | - lvm2 # docker会用到 52 | - socat # 用于port forwarding 53 | - ipset # ipvs 模式需要 54 | - iotop 55 | - sysstat 56 | - ipvsadm # ipvs 模式需要 57 | - conntrack # network connection cleanup 用到 58 | - net-tools 59 | - nfs-common # 挂载nfs 共享文件需要 (创建基于 nfs的PV 需要) 60 | - libseccomp2 # 安装containerd需要 61 | - netcat-openbsd # 使用lb时进行端口判断时会用到 62 | - ca-certificates # docker会用到 63 | - bash-completion # bash命令补全工具,需要重新登录服务器生效 64 | - apt-transport-https # 添加https协议的apt源会用到 65 | - software-properties-common # docker会用到 66 | state: present 67 | update_cache: yes 68 | allow_unauthenticated: true 69 | 70 | - name: 重新启动 cron 避免因修改时区而导致的作业错乱问题 71 | service: 72 | name: cron 73 | state: restarted 74 | enabled: yes -------------------------------------------------------------------------------- /roles/prepare/base/tasks/main.yml: -------------------------------------------------------------------------------- 1 | # 参数校验 2 | - include_tasks: verify_variables.yml 3 | 4 | # 预检内存是否满足要求 5 | - name: 校验节点资源配置 6 | include_tasks: verify_node.yml 7 | when: not ( skip_verify_node | bool ) 8 | 9 | # 公共系统参数设置 10 | - include_tasks: common.yml 11 | 12 | # 系统基础软件环境 13 | - include_tasks: centos.yml 14 | when: ansible_distribution in [ 'CentOS','OracleLinux','RedHat' ] 15 | 16 | - include_tasks: debian.yml 17 | when: ansible_distribution in [ 'Ubuntu','Debian' ] -------------------------------------------------------------------------------- /roles/prepare/base/tasks/verify_node.yml: -------------------------------------------------------------------------------- 1 | # 软件资源校验 2 | - name: 校验节点操作系统 3 | assert: 4 | that: ansible_distribution in [ 'CentOS','OracleLinux','RedHat','Ubuntu','Debian' ] 5 | msg: "节点:{{ inventory_hostname }} {{ ansible_distribution }}操作系统不支持本脚本安装Kubernetes集群。" 6 | 7 | - name: 校验节点 systemd 类型操作系统 8 | assert: 9 | that: ansible_service_mgr == "systemd" 10 | msg: "节点:{{ inventory_hostname }} 期望为systemd类型操作系统,实际获取:{{ ansible_service_mgr }},安装需求校验不通过。" 11 | 12 | - name: 校验节点系统内核 13 | assert: 14 | that: ansible_kernel is version('3.10', '>=') 15 | msg: "节点:{{ inventory_hostname }} 系统内核版本小于3.10,实际获取:{{ ansible_kernel }},安装需求校验不通过。" 16 | 17 | - name: 校验节点系统架构 18 | assert: 19 | that: ansible_machine in [ 'x86_64','aarch64' ] 20 | msg: "节点:{{ inventory_hostname }} {{ ansible_machine }} 系统指令集不支持本脚本安装Kubernetes集群" 21 | 22 | - name: 校验节点系统版本 23 | assert: 24 | that: ansible_distribution_version is version('7.4', '>=') 25 | msg: "节点:{{ inventory_hostname }} 系统版本期望不小于CentOS 7.4,实际获取:{{ ansible_distribution_version }},安装需求校验不通过。" 26 | when: 27 | - ansible_distribution == "CentOS" 28 | 29 | - name: 校验节点系统版本 30 | assert: 31 | that: ansible_distribution_version is version('7', '>=') 32 | msg: "节点:{{ inventory_hostname }} 系统版本期望不小于RedHat 7,实际获取:{{ ansible_distribution_version }},安装需求校验不通过。" 33 | when: 34 | - ansible_distribution == "RedHat" 35 | 36 | - name: 校验节点系统版本 37 | assert: 38 | that: ansible_distribution_version is version('16.04', '>=') 39 | msg: "Master节点:{{ inventory_hostname }} 系统版本期望不小于Ubuntu 16.04,实际获取:{{ ansible_distribution_version }},安装需求校验不通过。" 40 | when: 41 | - ansible_distribution == "Ubuntu" 42 | 43 | - name: 校验节点系统版本 44 | assert: 45 | that: ansible_distribution_version is version('10', '>=') 46 | msg: "Master节点:{{ inventory_hostname }} 系统版本期望不小于Debian 10,实际获取:{{ ansible_distribution_version }},安装需求校验不通过。" 47 | when: 48 | - ansible_distribution == "Debian" 49 | 50 | # 物理资源校验 51 | - name: 校验 master 节点内存 52 | assert: 53 | that: ansible_memtotal_mb >= 2*1024*0.85 54 | msg: "Master节点:{{ inventory_hostname }} 内存期望不小于2G,实际获取:{{ ansible_memtotal_mb }}MB,安装需求校验不通过。" 55 | when: 56 | - inventory_hostname in (groups['kube-master'] + groups['new-master']) 57 | 58 | - name: 校验 worker 节点内存 59 | assert: 60 | that: ansible_memtotal_mb >= 4*1024*0.85 61 | msg: "Worker节点:{{ inventory_hostname }} 内存期望不小于4G,实际获取:{{ ansible_memtotal_mb }}MB,安装需求校验不通过。" 62 | when: 63 | - inventory_hostname in (groups['kube-worker'] + groups['new-worker']) 64 | 65 | - name: 校验 master 节点CPU核数 66 | assert: 67 | that: ansible_processor_vcpus >= 2 68 | msg: "Master节点:{{ inventory_hostname }} CPU核数期望不小于2C,实际获取:{{ ansible_memtotal_mb }}C,安装需求校验不通过。" 69 | when: 70 | - inventory_hostname in (groups['kube-master'] + groups['new-master']) 71 | 72 | - name: 校验 worker 节点CPU核数 73 | assert: 74 | that: ansible_processor_vcpus >= 2 75 | msg: "Worker节点:{{ inventory_hostname }} CPU核数期望不小于2C,实际获取:{{ ansible_memtotal_mb }}C,安装需求校验不通过。" 76 | when: 77 | - inventory_hostname in (groups['kube-worker'] + groups['new-worker']) -------------------------------------------------------------------------------- /roles/prepare/base/tasks/verify_variables.yml: -------------------------------------------------------------------------------- 1 | - name: 校验 NodeName 是否合法 2 | assert: 3 | that: inventory_hostname is match('^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$') 4 | msg: "NodeName 必须由小写字母、数字,“-”或“.”组成,并且必须以小写字母或数字开头和结尾,请修改重试。" 5 | 6 | - name: 校验 etcd 节点数量 7 | run_once: true 8 | assert: 9 | that: groups['etcd']|length >= 1 10 | msg: "请将添加至少一个节点到 etcd 节点组中。" 11 | 12 | - name: 校验 master 节点数量 13 | run_once: true 14 | assert: 15 | that: groups['kube-master']|length >= 1 16 | msg: "请将添加至少一个节点到 kube-master 节点组中。" 17 | 18 | - name: 校验 worker 节点数量 19 | run_once: true 20 | assert: 21 | that: groups['kube-worker']|length >= 1 22 | msg: "请将添加至少一个节点到 kube-worker 节点组中。" 23 | 24 | - name: 校验 new-etcd 节点组数量 25 | assert: 26 | that: groups['new-etcd']|length <= 1 27 | msg: "同时只能添加一个 etcd 节点。" 28 | 29 | - name: 校验 etcd 节点数量 30 | run_once: true 31 | assert: 32 | that: (groups['etcd']|length is odd) or ((groups['etcd']|length + groups['new-etcd']|length) is odd) 33 | msg: "初始化集群时 etcd 节点只能为奇数个,当前 {{ groups['etcd']|length }} 个,请添加或减少 etcd 节点数量至奇数个。" 34 | 35 | - name: 校验 lb 模式类型 36 | run_once: true 37 | assert: 38 | that: lb_mode in ['openresty','nginx','haproxy','envoy','slb'] 39 | msg: "不支持当前 {{ lb_mode }} 负载均衡模式,请在 ['nginx','haproxy','envoy','slb'] 中进行选择。" 40 | 41 | - name: 校验 lb 节点组非空时 lb_kube_apiserver_ip 是否赋值 42 | run_once: true 43 | assert: 44 | that: lb_kube_apiserver_ip is defined 45 | msg: "lb 节点组非空,请设置 lb_kube_apiserver_ip 以启用 keepalive。" 46 | when: groups['lb']|length > 0 47 | 48 | - name: 校验 slb 模式 lb_kube_apiserver_ip 是否赋值 49 | run_once: true 50 | assert: 51 | that: lb_kube_apiserver_ip is defined 52 | msg: "当前负载均衡为 slb 模式,请将 slb 的 ip 地址赋值给变量 lb_kube_apiserver_ip。" 53 | when: lb_mode == 'slb' 54 | 55 | - block: 56 | - name: 校验 lb_kube_apiserver_ip 是否符合 IP 规则 57 | run_once: true 58 | assert: 59 | that: lb_kube_apiserver_ip is regex("^((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)$") 60 | msg: "请设置一个正确的 lb_kube_apiserver_ip。" 61 | 62 | - name: 校验 lb_kube_apiserver_ip 是否为节点 IP 63 | assert: 64 | that: lb_kube_apiserver_ip != current_host_ip 65 | msg: "lb_kube_apiserver_ip 不能设置为集群节点的 IP。" 66 | when: lb_kube_apiserver_ip is defined 67 | 68 | - name: 校验 lb 模式端口设置 69 | assert: 70 | that: lb_kube_apiserver_port|int != 6443 71 | msg: "当前 {{ inventory_hostname }} 节点 6443 端口已使用,请更换端口。" 72 | when: 73 | - lb_kube_apiserver_ip is not defined 74 | - inventory_hostname in ((groups['kube-master'] + groups['new-master'])|unique) 75 | 76 | - block: 77 | - name: 校验 lb 节点组设置节点是否满足要求 78 | run_once: true 79 | assert: 80 | that: groups['lb']|length > 1 81 | msg: "请将添加至少两个节点到 lb 节点组中。" 82 | 83 | - name: 校验 lb 模式端口设置 84 | assert: 85 | that: lb_kube_apiserver_port|int != 6443 86 | msg: "lb 节点与 master 节点有复用情况,lb_kube_apiserver_port 请勿使用6443端口。" 87 | when: 88 | - inventory_hostname in groups['lb'] 89 | - inventory_hostname in ((groups['kube-master'] + groups['new-master'])|unique) 90 | when: 91 | - lb_mode != 'slb' 92 | - lb_kube_apiserver_ip is defined 93 | 94 | - name: 校验 kube-proxy 模式类型 95 | run_once: true 96 | assert: 97 | that: kube_proxy_mode in ['iptables','ipvs'] 98 | msg: "kube-proxy 不支持当前 {{ kube_proxy_mode }} 模式,请在 ['iptables','ipvs'] 中进行选择。" 99 | 100 | - name: 校验容器运行时类型 101 | run_once: true 102 | assert: 103 | that: container_manager in ['docker','containerd'] 104 | msg: "容器运行时不支持当前 {{ container_manager }} 模式,请在 ['docker','containerd'] 中进行选择。" -------------------------------------------------------------------------------- /roles/prepare/base/templates/10-k8s-modules.conf.j2: -------------------------------------------------------------------------------- 1 | sunrpc 2 | ip_vs 3 | ip_vs_rr 4 | ip_vs_wrr 5 | ip_vs_sh 6 | br_netfilter 7 | {% if modprobe_nf_conntrack_ipv4 is success %} 8 | nf_conntrack_ipv4 9 | {% else %} 10 | nf_conntrack 11 | {% endif %} -------------------------------------------------------------------------------- /roles/prepare/base/templates/30-k8s-ulimits.conf.j2: -------------------------------------------------------------------------------- 1 | [Manager] 2 | DefaultLimitCORE=infinity 3 | DefaultLimitNOFILE=100000 4 | DefaultLimitNPROC=100000 -------------------------------------------------------------------------------- /roles/prepare/base/templates/95-k8s-sysctl.conf.j2: -------------------------------------------------------------------------------- 1 | # 最大限度使用物理内存 2 | vm.swappiness = 0 3 | 4 | # 决定检查一次相邻层记录的有效性的周期。当相邻层记录失效时,将在给它发送数据前,再解析一次。缺省值是60秒。 5 | net.ipv4.neigh.default.gc_stale_time = 120 6 | 7 | # see details in https://help.aliyun.com/knowledge_detail/39428.html 8 | net.ipv4.conf.all.rp_filter = 0 9 | net.ipv4.conf.default.rp_filter = 0 10 | net.ipv4.conf.default.arp_announce = 2 11 | net.ipv4.conf.lo.arp_announce = 2 12 | net.ipv4.conf.all.arp_announce = 2 13 | 14 | # see details in https://help.aliyun.com/knowledge_detail/41334.html 15 | net.ipv4.tcp_max_tw_buckets = 5000 16 | net.ipv4.tcp_syncookies = 1 17 | net.ipv4.tcp_max_syn_backlog = 1024 18 | net.ipv4.tcp_synack_retries = 2 19 | 20 | # 容器要想访问外部网络,需要本地系统的转发支持 21 | net.ipv4.ip_forward = 1 22 | 23 | # 访问业务域名时而会出现无法访问或连接超时的情况 24 | # refer to https://www.ziji.work/kubernetes/kubernetes_cannot_accesspod_port.html 25 | {% if ansible_distribution in ["CentOS","RedHat"] and ansible_kernel is version('4.12', '<') %} 26 | net.ipv4.tcp_tw_recycle = 0 27 | {% endif %} 28 | net.ipv4.tcp_tw_reuse = 0 29 | 30 | # bridge-nf 使得 netfilter 可以对 Linux 网桥上的 IPv4/ARP/IPv6 包过滤。 31 | # 比如,设置net.bridge.bridge-nf-call-iptables=1后,二层的网桥在转发包时也会被 iptables 的 FORWARD 规则所过滤。 32 | # refer to https://www.qikqiak.com/k8strain/k8s-basic/install/ 33 | # 是否在 iptables 链中过滤 IPv4 包 34 | net.bridge.bridge-nf-call-iptables = 1 35 | # 是否在 ip6tables 链中过滤 IPv6 包 36 | net.bridge.bridge-nf-call-ip6tables = 1 37 | # 是否在 arptables 的 FORWARD 中过滤网桥的 ARP 包 38 | net.bridge.bridge-nf-call-arptables = 1 39 | 40 | # 定义了系统中每一个端口最大的监听队列的长度,这是个全局的参数,默认值为128 41 | net.core.somaxconn = 32768 42 | 43 | # 服务器在访问量很大时,出现网络连接丢包的问题 44 | # 比较现代的系统(Ubuntu 16+, CentOS 7+)里,64 位,16G 内存的机器, 45 | # max 通常默认为 524288, 46 | # bucket 为 131072(在sunrpc.conf文件中修改)。 47 | # 随着内存大小翻倍这 2 个值也翻倍。 48 | # refer to https://testerhome.com/topics/15824 49 | net.netfilter.nf_conntrack_max = 524288 50 | 51 | # 单个进程可分配的最大文件数 52 | fs.nr_open = 6553600 53 | # Linux系统级别限制所有用户进程能打开的文件描述符总数 54 | fs.file-max = 6553600 55 | 56 | # 每个进程内存拥有的VMA(虚拟内存区域)的数量。虚拟内存区域是一个连续的虚拟地址空间区域。在进程的生命 57 | # 周期中,每当程序尝试在内存中映射文件,链接到共享内存段,或者分配堆空间的时候,这些区域将被创建。 58 | # 进程加载的动态库、分配的内存、mmap的内存都会增加VMA的数量。通常一个进程会有小于1K个VMA,如果进程有 59 | # 特殊逻辑,可能会超过该限制。 60 | # 调优这个值将限制进程可拥有VMA的数量。限制一个进程拥有VMA的总数可能导致应用程序出错,因为当进程达到 61 | # 了VMA上线但又只能释放少量的内存给其他的内核进程使用时,操作系统会抛出内存不足的错误。如果你的操作系 62 | # 统在NORMAL区域仅占用少量的内存,那么调低这个值可以帮助释放内存给内核用。 63 | # refer to https://www.elastic.co/guide/en/elasticsearch/reference/current/vm-max-map-count.html 64 | # 可以使用命令 cat /proc/${pid}/maps 来查看指定进程拥有的VMA。 65 | vm.max_map_count = 655360 66 | 67 | # 修复ipvs模式下长连接timeout问题 小于900即可 68 | # refer to https://github.com/moby/moby/issues/31208 69 | # ipvsadm -l --timout 70 | {% if kube_proxy_mode == 'ipvs' %} 71 | net.ipv4.tcp_keepalive_time = 600 72 | net.ipv4.tcp_keepalive_intvl = 30 73 | net.ipv4.tcp_keepalive_probes = 10 74 | {% endif %} 75 | 76 | # refer to https://github.com/Azure/aks-engine/blob/d6f4929a659241ea33d8fd4d9fc86d0e27b0cb07/parts/k8s/cloud-init/artifacts/sysctl-d-60-CIS.conf 77 | # refer to https://github.com/kubernetes/kubernetes/blob/75d45bdfc9eeda15fb550e00da662c12d7d37985/pkg/kubelet/cm/container_manager_linux.go#L359-L397 78 | vm.overcommit_memory = 1 79 | kernel.panic = 10 80 | kernel.panic_on_oops = 1 81 | 82 | # refer to https://github.com/Azure/AKS/issues/772 83 | fs.inotify.max_user_watches = 1048576 84 | 85 | # 指定每个真实用户 ID 可以创建的 inotify 实例数量上限 86 | # 指定 inotify 实例可以排队事件数量的上限 87 | fs.inotify.max_user_instances = 1048576 88 | fs.inotify.max_queued_events = 1048576 89 | fs.pipe-user-pages-soft=102400 -------------------------------------------------------------------------------- /roles/prepare/base/templates/sunrpc.conf.j2: -------------------------------------------------------------------------------- 1 | options nf_conntrack hashsize=131072 2 | options sunrpc tcp_slot_table_entries=128 3 | options sunrpc tcp_max_slot_table_entries=128 -------------------------------------------------------------------------------- /roles/prepare/container-engine/defaults/main.yml: -------------------------------------------------------------------------------- 1 | #----------------------------------------------- 容器运行时参数(不可配置项) -------------------------------------------# 2 | # CRI socket path 3 | cri_socket: >- 4 | {%- if container_manager == 'containerd' -%} 5 | /var/run/containerd/containerd.sock 6 | {%- else -%} 7 | /var/run/dockershim.sock 8 | {%- endif -%} 9 | -------------------------------------------------------------------------------- /roles/prepare/container-engine/tasks/containerd/centos.yml: -------------------------------------------------------------------------------- 1 | - name: 添加 Docker yum 仓库 2 | yum_repository: 3 | name: docker-ce-stable 4 | file: docker-ce 5 | description: Docker CE Stable - $basearch 6 | baseurl: "{{ docker_yum_repo }}" 7 | enabled: no 8 | gpgcheck: no 9 | state: present 10 | 11 | - name: 安装 containerd 12 | yum: 13 | name: 14 | - "containerd.io-{{ containerd_version.split('-')[0] }}" 15 | state: present 16 | enablerepo: docker-ce-stable -------------------------------------------------------------------------------- /roles/prepare/container-engine/tasks/containerd/common.yml: -------------------------------------------------------------------------------- 1 | - name: 准备 containerd 相关目录 2 | file: 3 | name: "{{ item }}" 4 | state: directory 5 | with_items: 6 | - /etc/systemd/system/containerd.service.d 7 | - "{{ containerd_config.root | default('/var/lib/containerd') }}" 8 | 9 | - name: 编写 containerd 代理配置 10 | template: 11 | src: http-proxy.conf.j2 12 | dest: /etc/systemd/system/containerd.service.d/http-proxy.conf 13 | when: http_proxy is defined or https_proxy is defined 14 | 15 | - name: 确认 containerd 配置是否有修改 16 | template: 17 | src: containerd/config.toml.j2 18 | dest: /etc/containerd/config.toml 19 | owner: root 20 | mode: 0644 21 | backup: yes 22 | register: configuration_result 23 | 24 | - name: 生成 containerd registries 配置目录 25 | file: 26 | name: "{{ containerd_registries_config_dir | default('/etc/containerd/certs.d') }}/{{ item }}" 27 | state: directory 28 | with_items: "{{ containerd_registries }}" 29 | 30 | - name: 生成 containerd registries 配置文件 31 | template: 32 | src: containerd/hosts.toml.j2 33 | dest: "{{ containerd_registries_config_dir | default('/etc/containerd/certs.d') }}/{{ item }}/hosts.toml" 34 | owner: root 35 | mode: 0644 36 | backup: yes 37 | with_items: "{{ containerd_registries }}" 38 | 39 | - name: 生成 crictl 配置文件 40 | template: 41 | src: containerd/crictl.yaml.j2 42 | dest: /etc/crictl.yaml 43 | owner: root 44 | mode: 0644 45 | 46 | - name: 重新加载 daemon 47 | systemd: 48 | daemon_reload: yes 49 | 50 | - name: 启动/重启 containerd 51 | service: 52 | name: containerd 53 | state: restarted 54 | enabled: yes 55 | when: 56 | - configuration_result.changed 57 | - '"active" not in containerd_already_running.stdout' 58 | 59 | - block: 60 | - name: 需手动重启 containerd 61 | vars: 62 | msg: | 63 | 节点:{{ inventory_hostname }} ,containerd 处于运行中, 64 | 但 containerd 配置文件 /etc/containerd/config.toml 已更新, 65 | 请在集群安装完成后手动执行下面命令重启该节点 containerd 与 kubelet: 66 | systemctl restart containerd 67 | systemctl restart kubelet 68 | 69 | 若重启后 kubelet 无法正常启动: 70 | 请确认以下两个文件中 cgroup driver 参数是否设置为 systemd, 71 | 若不是则修改为 systemd 后再次重启 kubelet。 72 | vi /var/lib/kubelet/config.yaml 73 | vi /var/lib/kubelet/kubeadm-flags.env 74 | 在 /var/lib/kubelet/config.yaml 文件中参数名为:cgroupDriver; 75 | 在 /var/lib/kubelet/kubeadm-flags.env 文件中参数名为:--cgroup-driver,若参数不存在请忽略。 76 | 77 | debug: 78 | msg: "{{ msg.split('\n') }}" 79 | 80 | - name: 等待用户查看日志 81 | shell: sleep 60 82 | run_once: true 83 | when: 84 | - configuration_result.changed 85 | - '"active" in containerd_already_running.stdout' 86 | 87 | - name: 设置 containerd 开机自启 88 | service: 89 | name: containerd 90 | state: started 91 | enabled: yes -------------------------------------------------------------------------------- /roles/prepare/container-engine/tasks/containerd/debian.yml: -------------------------------------------------------------------------------- 1 | - name: 生成 Docker GPG 公钥 2 | copy: 3 | src: docker.gpg 4 | dest: /tmp/docker.gpg 5 | owner: root 6 | mode: 0644 7 | 8 | - name: 添加 Docker GPG 公钥 9 | shell: apt-key add /tmp/docker.gpg 10 | 11 | - name: 添加 Docker apt 仓库 12 | apt_repository: 13 | repo: "{{ docker_apt_repo }}" 14 | state: present 15 | 16 | - name: 安装 containerd 17 | apt: 18 | name: 19 | - "containerd.io={{ containerd_version }}" 20 | state: present 21 | allow_unauthenticated: true -------------------------------------------------------------------------------- /roles/prepare/container-engine/tasks/containerd/main.yml: -------------------------------------------------------------------------------- 1 | # 系统基础软件环境 2 | - include_tasks: centos.yml 3 | when: ansible_distribution in [ 'CentOS','OracleLinux','RedHat' ] 4 | 5 | - include_tasks: debian.yml 6 | when: ansible_distribution in [ 'Ubuntu','Debian' ] 7 | 8 | # 公共系统参数设置 9 | - include_tasks: common.yml -------------------------------------------------------------------------------- /roles/prepare/container-engine/tasks/docker/centos.yml: -------------------------------------------------------------------------------- 1 | - name: 添加 Docker yum 仓库 2 | yum_repository: 3 | name: docker-ce-stable 4 | file: docker-ce 5 | description: Docker CE Stable - $basearch 6 | baseurl: "{{ docker_yum_repo }}" 7 | enabled: no 8 | gpgcheck: no 9 | state: present 10 | 11 | - name: 安装 Docker 12 | yum: 13 | name: 14 | - "docker-ce-{{ docker_version }}.ce" 15 | state: present 16 | enablerepo: docker-ce-stable 17 | when: docker_version is version('18.09', '<') 18 | 19 | - name: 安装 Docker 20 | yum: 21 | name: 22 | - "docker-ce-{{ docker_version }}" 23 | - "docker-ce-cli-{{ docker_version }}" 24 | - "containerd.io-{{ containerd_version.split('-')[0] }}" 25 | state: present 26 | enablerepo: docker-ce-stable 27 | when: docker_version is version('18.09', '>=') -------------------------------------------------------------------------------- /roles/prepare/container-engine/tasks/docker/common.yml: -------------------------------------------------------------------------------- 1 | - name: 准备 Docker 相关目录 2 | file: 3 | name: "{{ item }}" 4 | state: directory 5 | with_items: 6 | - /etc/docker 7 | - "{{ docker_storage_dir }}" 8 | - /etc/systemd/system/docker.service.d 9 | 10 | - name: 编写 Docker 代理配置 11 | template: 12 | src: http-proxy.conf.j2 13 | dest: /etc/systemd/system/docker.service.d/http-proxy.conf 14 | when: http_proxy is defined or https_proxy is defined 15 | 16 | - name: 确认 Docker 配置是否有修改 17 | template: 18 | src: docker-daemon.json.j2 19 | dest: /etc/docker/daemon.json 20 | owner: root 21 | mode: 0644 22 | register: configuration_result 23 | 24 | - name: 重新加载 daemon 25 | systemd: 26 | daemon_reload: yes 27 | 28 | - name: 启动/重启 Docker 29 | service: 30 | name: docker 31 | state: restarted 32 | enabled: yes 33 | when: 34 | - configuration_result.changed 35 | - '"active" not in docker_already_running.stdout' 36 | 37 | - block: 38 | - name: 需手动重启 Docker 39 | vars: 40 | msg: | 41 | 节点:{{ inventory_hostname }} ,Docker 处于运行中, 42 | 但 Docker 配置文件 /etc/docker/daemon.json 已更新, 43 | 请在集群安装完成后手动执行下面命令重启该节点 docker 与 kubelet: 44 | systemctl restart docker 45 | systemctl restart kubelet 46 | 47 | 若重启后 kubelet 无法正常启动: 48 | 请确认以下两个文件中 cgroup driver 参数是否设置为 systemd, 49 | 若不是则修改为 systemd 后再次重启 kubelet。 50 | vi /var/lib/kubelet/config.yaml 51 | vi /var/lib/kubelet/kubeadm-flags.env 52 | 在 /var/lib/kubelet/config.yaml 文件中参数名为:cgroupDriver; 53 | 在 /var/lib/kubelet/kubeadm-flags.env 文件中参数名为:--cgroup-driver,若参数不存在请忽略。 54 | 55 | debug: 56 | msg: "{{ msg.split('\n') }}" 57 | 58 | - name: 等待用户查看日志 59 | shell: sleep 60 60 | run_once: true 61 | when: 62 | - configuration_result.changed 63 | - '"active" in docker_already_running.stdout' 64 | 65 | - name: 设置 Docker 开机自启 66 | service: 67 | name: docker 68 | state: started 69 | enabled: yes 70 | 71 | - name: 添加当前用户到 Docker 用户组 72 | user: 73 | name: "{{ ansible_user }}" 74 | groups: docker 75 | append: yes -------------------------------------------------------------------------------- /roles/prepare/container-engine/tasks/docker/debian.yml: -------------------------------------------------------------------------------- 1 | - name: 生成 Docker GPG 公钥 2 | copy: 3 | src: docker.gpg 4 | dest: /tmp/docker.gpg 5 | owner: root 6 | mode: 0644 7 | 8 | - name: 添加 Docker GPG 公钥 9 | shell: apt-key add /tmp/docker.gpg 10 | 11 | - name: 添加 Docker apt 仓库 12 | apt_repository: 13 | repo: "{{ docker_apt_repo }}" 14 | state: present 15 | 16 | - name: 安装 Docker 17 | apt: 18 | name: 19 | - "docker-ce={{ docker_version }}~ce~3-0~{{ ansible_distribution | lower }}" 20 | state: present 21 | allow_unauthenticated: true 22 | when: docker_version is version('18.09', '<') 23 | 24 | - name: 安装 Docker 25 | apt: 26 | name: 27 | - "docker-ce=5:{{ docker_version }}~3-0~{{ ansible_distribution | lower }}-{{ ansible_distribution_release }}" 28 | - "docker-ce-cli=5:{{ docker_version }}~3-0~{{ ansible_distribution | lower }}-{{ ansible_distribution_release }}" 29 | - "containerd.io={{ containerd_version }}" 30 | state: present 31 | allow_unauthenticated: true 32 | when: docker_version is version('18.09', '>=') -------------------------------------------------------------------------------- /roles/prepare/container-engine/tasks/docker/main.yml: -------------------------------------------------------------------------------- 1 | # 系统基础软件环境 2 | - include_tasks: centos.yml 3 | when: ansible_distribution in [ 'CentOS','OracleLinux','RedHat' ] 4 | 5 | - include_tasks: debian.yml 6 | when: ansible_distribution in [ 'Ubuntu','Debian' ] 7 | 8 | # 公共系统参数设置 9 | - include_tasks: common.yml -------------------------------------------------------------------------------- /roles/prepare/container-engine/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 判断 Docker 是否早已安装 2 | shell: > 3 | systemctl status docker | grep running || echo "not running" 4 | register: docker_already_running 5 | 6 | - name: 判断 containerd 是否早已安装 7 | shell: > 8 | systemctl status containerd | grep running || echo "not running" 9 | register: containerd_already_running 10 | 11 | - include_tasks: docker/main.yml 12 | when: 13 | - container_manager == "docker" 14 | - '"active" not in docker_already_running.stdout' 15 | - '"active" not in containerd_already_running.stdout' 16 | 17 | - include_tasks: containerd/main.yml 18 | when: 19 | - container_manager == "containerd" 20 | - '"active" not in docker_already_running.stdout' 21 | - '"active" not in containerd_already_running.stdout' -------------------------------------------------------------------------------- /roles/prepare/container-engine/templates/containerd/config.toml.j2: -------------------------------------------------------------------------------- 1 | version = 2 2 | root = "{{ containerd_storage_dir }}" 3 | state = "{{ containerd_state_dir }}" 4 | oom_score = {{ containerd_oom_score }} 5 | 6 | [grpc] 7 | max_recv_message_size = {{ containerd_grpc_max_recv_message_size | default(16777216) }} 8 | max_send_message_size = {{ containerd_grpc_max_send_message_size | default(16777216) }} 9 | 10 | [debug] 11 | level = "{{ containerd_debug_level | default('info') }}" 12 | 13 | [metrics] 14 | address = "{{ containerd_metrics_address | default('') }}" 15 | grpc_histogram = {{ containerd_metrics_grpc_histogram | default(false) | lower }} 16 | 17 | [plugins] 18 | [plugins."io.containerd.grpc.v1.cri"] 19 | sandbox_image = "{{ pod_infra_container_image }}" 20 | max_container_log_line_size = {{ containerd_max_container_log_line_size }} 21 | [plugins."io.containerd.grpc.v1.cri".containerd] 22 | default_runtime_name = "{{ containerd_default_runtime | default('runc') }}" 23 | snapshotter = "{{ containerd_snapshotter | default('overlayfs') }}" 24 | [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] 25 | {% for runtime in containerd_runtimes %} 26 | [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.{{ runtime.name }}] 27 | runtime_type = "{{ runtime.type }}" 28 | runtime_engine = "{{ runtime.engine }}" 29 | runtime_root = "{{ runtime.root }}" 30 | [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.{{ runtime.name }}.options] 31 | {% for key, value in runtime.options.items() %} 32 | {{ key }} = {{ value }} 33 | {% endfor %} 34 | {% endfor %} 35 | [plugins."io.containerd.grpc.v1.cri".registry] 36 | config_path = "{{ containerd_registries_config_dir | default('/etc/containerd/certs.d') }}" 37 | 38 | {% if containerd_extra_args is defined %} 39 | {{ containerd_extra_args }} 40 | {% endif %} -------------------------------------------------------------------------------- /roles/prepare/container-engine/templates/containerd/crictl.yaml.j2: -------------------------------------------------------------------------------- 1 | runtime-endpoint: unix://{{ cri_socket }} 2 | image-endpoint: unix://{{ cri_socket }} 3 | timeout: 30 4 | debug: false 5 | -------------------------------------------------------------------------------- /roles/prepare/container-engine/templates/containerd/hosts.toml.j2: -------------------------------------------------------------------------------- 1 | server = "https://{{ item }}" 2 | 3 | [host."{{ containerd_registries[item] }}"] 4 | skip_verify = true 5 | capabilities = ["pull", "resolve"] -------------------------------------------------------------------------------- /roles/prepare/container-engine/templates/docker-daemon.json.j2: -------------------------------------------------------------------------------- 1 | { 2 | {% if docker_mirror is defined and docker_mirror != None %} 3 | "registry-mirrors": [{% for registry in docker_mirror %}"{{ registry }}"{% if not loop.last %},{% endif %}{% endfor %}], 4 | {% endif %} 5 | {% if docker_insecure_registries is defined and docker_insecure_registries != None %} 6 | "insecure-registries": [{% for registry in docker_insecure_registries %}"{{ registry }}"{% if not loop.last %},{% endif %}{% endfor %}], 7 | {% endif %} 8 | "max-concurrent-downloads": {{ docker_max_concurrent_downloads | int }}, 9 | "log-driver": "{{ docker_log_driver }}", 10 | "log-level": "{{ docker_log_level }}", 11 | "log-opts": { 12 | "max-size": "{{ docker_log_max_size }}", 13 | "max-file": "{{ docker_log_max_file }}" 14 | }, 15 | "bip": "{{ docker_bip }}", 16 | "data-root": "{{ docker_storage_dir }}", 17 | "exec-opts": ["native.cgroupdriver=systemd"], 18 | "storage-driver": "overlay2", 19 | "storage-opts": [ 20 | "overlay2.override_kernel_check=true" 21 | ] 22 | } -------------------------------------------------------------------------------- /roles/prepare/container-engine/templates/http-proxy.conf.j2: -------------------------------------------------------------------------------- 1 | [Service] 2 | Environment={% if http_proxy is defined %}"HTTP_PROXY={{ http_proxy }}"{% endif %} {% if https_proxy is defined %}"HTTPS_PROXY={{ https_proxy }}"{% endif %} {% if no_proxy is defined %}"NO_PROXY={{ no_proxy }}"{% endif %} 3 | -------------------------------------------------------------------------------- /roles/prepare/kernel/tasks/centos.yml: -------------------------------------------------------------------------------- 1 | - name: 安装 kernel-ml 2 | yum: 3 | name: "{{ kernel_centos }}" 4 | state: present 5 | 6 | - name: 设置默认内核为最新版本 7 | shell: "grub2-set-default 0 && grub2-mkconfig -o /boot/grub2/grub.cfg" 8 | ignore_errors: true 9 | 10 | - name: 获取默认内核版本 11 | shell: "grubby --default-kernel" 12 | ignore_errors: true 13 | 14 | - name: 开启 User namespaces 15 | shell: grubby --args="user_namespace.enable=1" --update-kernel="$(grubby --default-kernel)" 16 | ignore_errors: true -------------------------------------------------------------------------------- /roles/prepare/kernel/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 校验节点系统架构 2 | assert: 3 | that: ansible_machine in [ 'x86_64' ] 4 | msg: "节点:{{ inventory_hostname }} {{ ansible_machine }} 系统指令集不支持使用本脚本升级内核" 5 | 6 | - include_tasks: centos.yml 7 | when: ansible_distribution in [ 'CentOS','OracleLinux','RedHat' ] 8 | 9 | - include_tasks: ubuntu.yml 10 | when: ansible_distribution in [ 'Ubuntu','Debian' ] 11 | 12 | - name: 需手动重启服务器 13 | debug: 14 | msg: > 15 | 节点:{{ inventory_hostname }}, 16 | 内核已升级完成, 17 | 请手动执行 reboot -f 重启该服务器。 -------------------------------------------------------------------------------- /roles/prepare/kernel/tasks/ubuntu.yml: -------------------------------------------------------------------------------- 1 | - name: 升级 linux-base 2 | apt: 3 | deb: "{{ item }}" 4 | with_items: "{{ linux_base }}" 5 | when: ansible_distribution in [ 'Debian' ] 6 | 7 | - name: 升级 libssl 8 | apt: 9 | deb: "{{ item }}" 10 | with_items: "{{ libssl }}" 11 | when: ansible_distribution in [ 'Ubuntu' ] 12 | 13 | - name: 安装 kernel-ml 14 | apt: 15 | deb: "{{ item }}" 16 | with_items: "{{ kernel_ubuntu }}" 17 | 18 | - name: 更新 grub 19 | shell: update-grub 20 | ignore_errors: true -------------------------------------------------------------------------------- /roles/prepare/kubernetes/files/kubernetes.gpg: -------------------------------------------------------------------------------- 1 | -----BEGIN PGP PUBLIC KEY BLOCK----- 2 | 3 | mQENBGKItdQBCADWmKTNZEYWgXy73FvKFY5fRro4tGNa4Be4TZW3wZpct9Cj8Ejy 4 | kU7S9EPoJ3EdKpxFltHRu7QbDi6LWSNA4XxwnudQrYGxnxx6Ru1KBHFxHhLfWsvF 5 | cGMwit/znpxtIt9UzqCm2YTEW5NUnzQ4rXYqVQK2FLG4weYJ5bKwkY+ZsnRJpzxd 6 | HGJ0pBiqwkMT8bfQdJymUBown+SeuQ2HEqfjVMsIRe0dweD2PHWeWo9fTXsz1Q5a 7 | biGckyOVyoN9//DgSvLUocUcZsrWvYPaN+o8lXTO3GYFGNVsx069rxarkeCjOpiQ 8 | OWrQmywXISQudcusSgmmgfsRZYW7FDBy5MQrABEBAAG0UVJhcHR1cmUgQXV0b21h 9 | dGljIFNpZ25pbmcgS2V5IChjbG91ZC1yYXB0dXJlLXNpZ25pbmcta2V5LTIwMjIt 10 | MDMtMDctMDhfMDFfMDEucHViKYkBIgQTAQgAFgUCYoi11AkQtT3IDRPt7wUCGwMC 11 | GQEAAMGoB/98QBNIIN3Q2D3aahrfkb6axd55zOwR0tnriuJRoPHoNuorOpCv9aWM 12 | MvQACNWkxsvJxEF8OUbzhSYjAR534RDigjTetjK2i2wKLz/kJjZbuF4ZXMynCm40 13 | eVm1XZqU63U9XR2RxmXppyNpMqQO9LrzGEnNJuh23icaZY6no12axymxcle/+SCm 14 | da8oDAfa0iyA2iyg/eU05buZv54MC6RB13QtS+8vOrKDGr7RYp/VYvQzYWm+ck6D 15 | vlaVX6VB51BkLl23SQknyZIJBVPm8ttU65EyrrgG1jLLHFXDUqJ/RpNKq+PCzWiy 16 | t4uy3AfXK89RczLu3uxiD0CQI0T31u/IuQENBGKItdQBCADIMMJdRcg0Phv7+CrZ 17 | z3xRE8Fbz8AN+YCLigQeH0B9lijxkjAFr+thB0IrOu7ruwNY+mvdP6dAewUur+pJ 18 | aIjEe+4s8JBEFb4BxJfBBPuEbGSxbi4OPEJuwT53TMJMEs7+gIxCCmwioTggTBp6 19 | JzDsT/cdBeyWCusCQwDWpqoYCoUWJLrUQ6dOlI7s6p+iIUNIamtyBCwb4izs27Hd 20 | EpX8gvO9rEdtcb7399HyO3oD4gHgcuFiuZTpvWHdn9WYwPGM6npJNG7crtLnctTR 21 | 0cP9KutSPNzpySeAniHx8L9ebdD9tNPCWC+OtOcGRrcBeEznkYh1C4kzdP1ORm5u 22 | pnknABEBAAGJAR8EGAEIABMFAmKItdQJELU9yA0T7e8FAhsMAABJmAgAhRPk/dFj 23 | 71bU/UTXrkEkZZzE9JzUgan/ttyRrV6QbFZABByf4pYjBj+yLKw3280//JWurKox 24 | 2uzEq1hdXPedRHICRuh1Fjd00otaQ+wGF3kY74zlWivB6Wp6tnL9STQ1oVYBUv7H 25 | hSHoJ5shELyedxxHxurUgFAD+pbFXIiK8cnAHfXTJMcrmPpC+YWEC/DeqIyEcNPk 26 | zRhtRSuERXcq1n+KJvMUAKMD/tezwvujzBaaSWapmdnGmtRjjL7IxUeGamVWOwLQ 27 | bUr+34MwzdeJdcL8fav5LA8Uk0ulyeXdwiAK8FKQsixI+xZvz7HUs8ln4pZwGw/T 28 | pvO9cMkHogtgzZkBDQRgkbezAQgA5GCRx0EKC+rSq1vy25n0fZY8+4m9mlp6OCTt 29 | 1SkLy8I8lDD6av0l1zDp8fI18IFos6T8UGA0SdEkF0vVCydYV0S/zoDJ2QGL2A3l 30 | dowZyrACBHYhv3tapvD+FvaqViXPoTauxTk9d0cxlkcee0nS1kl6NCnmN/K/Zb44 31 | zpk/3LjnJo8JQ0/V2H/0UjvsifwLMjHQK/mWw3kFHfR2CYj3SNOJRmhjNNjIwzJ8 32 | fpqJ3PsueLfmfq8tVrUHc6ELfXR5SD5VdbUfsVeQxx7HowmcbvU1s80pS+cHwQXh 33 | M+0fziM4rxiaVkHSc3ftkA10kYPatl2Fj+WVbUoI1VSYzZW+mQARAQABtFRBcnRp 34 | ZmFjdCBSZWdpc3RyeSBSZXBvc2l0b3J5IFNpZ25lciA8YXJ0aWZhY3QtcmVnaXN0 35 | cnktcmVwb3NpdG9yeS1zaWduZXJAZ29vZ2xlLmNvbT6JAU4EEwEKADgWIQQ1uqCz 36 | Pp6zlvWcqDjAulzm3GMVowUCYJG3swIbAwULCQgHAgYVCgkICwIEFgIDAQIeAQIX 37 | gAAKCRDAulzm3GMVo/ooCADBYeg6wGDHqvbG2dWRuqADK4p1IXhkGxKnu+pyA0Db 38 | GZ4Q8GdsFqoFQuw4DjKpYUJjps5uzOjc5qtnbz8Kt8QtjniPX0Ms40+9nXgU8yz+ 39 | zyaJPTyRTjHS3yC0rFJ5jLIXkLeA1DtI2AF9ilLljiF1yWmd9fUMqETQT2Guas+6 40 | l0u8ByzmPPSA6nx7egLnfBEec4cjsocrXGDHmhgtYNSClpoHsJ4RKtNhWp7TCRpZ 41 | phYtngNBDw9Nhgt++NkBqkcS8I1rJuf06crlNuBGCkRgkZu0HVSKN7oBUnrSq59G 42 | 8jsVhgb7buHx/F1r2ZEU/rvssx9bOchWAanNiU66yb0V 43 | =UL8X 44 | -----END PGP PUBLIC KEY BLOCK----- 45 | -------------------------------------------------------------------------------- /roles/prepare/kubernetes/tasks/centos.yml: -------------------------------------------------------------------------------- 1 | - name: 添加 Kubernetes yum 仓库 2 | yum_repository: 3 | name: kubernetes 4 | file: kubernetes 5 | description: Kubernetes 6 | baseurl: "{{ kubernetes_yum_repo }}" 7 | enabled: no 8 | gpgcheck: no 9 | state: present 10 | 11 | - name: 安装 kubeadm kubelet kubectl 12 | yum: 13 | name: 14 | - "kubectl-{{ kube_version }}" 15 | - "kubelet-{{ kube_version }}" 16 | - "kubeadm-{{ kube_version }}" 17 | state: present 18 | enablerepo: kubernetes -------------------------------------------------------------------------------- /roles/prepare/kubernetes/tasks/debian.yml: -------------------------------------------------------------------------------- 1 | - name: 生成 Kubernetes GPG 公钥 2 | copy: 3 | src: kubernetes.gpg 4 | dest: /tmp/kubernetes.gpg 5 | owner: root 6 | mode: 0644 7 | 8 | - name: 添加 Kubernetes GPG 公钥 9 | shell: apt-key add /tmp/kubernetes.gpg 10 | 11 | - name: 添加 Kubernetes apt 仓库 12 | apt_repository: 13 | repo: "{{ kubernetes_apt_repo }}" 14 | state: present 15 | 16 | - name: 安装 kubeadm kubelet kubectl 17 | apt: 18 | name: 19 | - "kubectl={{ kube_version }}-00" 20 | - "kubelet={{ kube_version }}-00" 21 | - "kubeadm={{ kube_version }}-00" 22 | state: present 23 | allow_unauthenticated: true -------------------------------------------------------------------------------- /roles/prepare/kubernetes/tasks/main.yml: -------------------------------------------------------------------------------- 1 | # 系统基础软件环境 2 | - include_tasks: centos.yml 3 | when: ansible_distribution in [ 'CentOS','OracleLinux','RedHat' ] 4 | 5 | - include_tasks: debian.yml 6 | when: ansible_distribution in [ 'Ubuntu','Debian' ] 7 | 8 | - name: 配置 kubectl 命令行自动补全 9 | shell: kubectl completion bash > /usr/share/bash-completion/completions/kubectl -------------------------------------------------------------------------------- /roles/prepare/variables/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 设置变量 2 | debug: 3 | msg: "Check roles/prepare/variables/defaults/main.yml" -------------------------------------------------------------------------------- /roles/remove/etcd/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 判断 Docker 是否早已安装 2 | shell: > 3 | systemctl status docker | grep running || echo "not running" 4 | register: docker_already_running 5 | 6 | - name: 设置 container_manager_detected 变量 7 | set_fact: 8 | container_manager_detected: >- 9 | {%- if "active" in docker_already_running.stdout -%} 10 | docker 11 | {%- else -%} 12 | containerd 13 | {%- endif -%} 14 | 15 | - name: 校验 etcd 节点数量 16 | run_once: true 17 | assert: 18 | that: groups['etcd']|length > 1 19 | msg: "Etcd 节点数量大于 1 才可进行 etcd 节点移除操作。" 20 | 21 | - name: 校验被移除的 etcd 节点数量 22 | run_once: true 23 | assert: 24 | that: groups['del-etcd']|length == 1 25 | msg: "单次移除的 etcd 节点数量仅能为 1 个节点。" 26 | 27 | - name: 校验将被移除的 etcd 节点是否在原 etcd 组中 28 | run_once: true 29 | assert: 30 | that: "{{ item in groups['etcd'] }}" 31 | msg: "当前节点: {{ item }},并未在 etcd 组中,不需要进行移除操作。" 32 | with_items: "{{ groups['del-etcd'] }}" 33 | 34 | - name: 校验移除 etcd 节点后剩余 etcd 节点数量 35 | run_once: true 36 | assert: 37 | that: "{{ (groups['etcd'] | difference(groups['del-etcd']))|length >= 1 }}" 38 | msg: "移除 etcd 节点后,剩余 etcd 节点数量应大于等于 1。" 39 | 40 | - name: 获取将要移除的 etcd 节点 member id 41 | shell: > 42 | {% if container_manager_detected == 'containerd' %} 43 | ctr -n k8s.io run --net-host --env ETCDCTL_API=3 44 | --mount type=bind,src=/etc/kubernetes/pki/etcd,dst=/etc/kubernetes/pki/etcd,options=rbind:ro 45 | --rm {{ etcd_image }} etcd-list-member 46 | {% elif container_manager_detected == 'docker' %} 47 | docker run --net host -e ETCDCTL_API=3 48 | -v /etc/kubernetes/pki/etcd:/etc/kubernetes/pki/etcd 49 | --rm {{ etcd_image }} 50 | {% endif %} 51 | etcdctl member list 52 | --cacert=/etc/kubernetes/pki/etcd/ca.crt 53 | --key=/etc/kubernetes/pki/etcd/healthcheck-client.key 54 | --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt 55 | --endpoints=https://[127.0.0.1]:2379 | grep {{ inventory_hostname }} | cut -d',' -f1 56 | delegate_to: "{{ (groups['etcd'] | difference(groups['del-etcd']))|first }}" 57 | register: etcd_member_id 58 | 59 | - name: 从 etcd 集群中移除 {{ inventory_hostname }} 节点 60 | shell: > 61 | {% if container_manager_detected == 'containerd' %} 62 | ctr -n k8s.io run --net-host --env ETCDCTL_API=3 63 | --mount type=bind,src=/etc/kubernetes/pki/etcd,dst=/etc/kubernetes/pki/etcd,options=rbind:ro 64 | --rm {{ etcd_image }} etcd-remove-member 65 | {% elif container_manager_detected == 'docker' %} 66 | docker run --net host -e ETCDCTL_API=3 67 | -v /etc/kubernetes/pki/etcd:/etc/kubernetes/pki/etcd 68 | --rm {{ etcd_image }} 69 | {% endif %} 70 | etcdctl member remove {{ etcd_member_id.stdout }} 71 | --cacert=/etc/kubernetes/pki/etcd/ca.crt 72 | --key=/etc/kubernetes/pki/etcd/healthcheck-client.key 73 | --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt 74 | --endpoints=https://[127.0.0.1]:2379 75 | delegate_to: "{{ (groups['etcd'] | difference(groups['del-etcd']))|first }}" 76 | when: etcd_member_id.stdout != '' 77 | 78 | - name: 删除 etcd mainfest 文件以及数据 79 | file: 80 | name: "{{ item }}" 81 | state: absent 82 | with_items: 83 | - "{{ etcd_data_dir }}" 84 | - /etc/kubernetes/manifests/etcd-external.yaml 85 | 86 | - name: 取消节点原有 etcd 角色标签 87 | shell: kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/etcd- 88 | ignore_errors: true 89 | delegate_to: "{{ groups['kube-master'][0] }}" 90 | when: inventory_hostname in (groups['kube-master'] + groups['new-master'] + groups['kube-worker'] + groups['new-worker']) -------------------------------------------------------------------------------- /roles/remove/master/defaults/main.yml: -------------------------------------------------------------------------------- 1 | # 提权操作 2 | ansible_become: true 3 | 4 | drain_grace_period: 300 5 | drain_timeout: 360s -------------------------------------------------------------------------------- /roles/remove/master/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 校验 master 节点数量 2 | run_once: true 3 | assert: 4 | that: groups['kube-master']|length > 1 5 | msg: "Master 节点数量大于 1 才可进行 master 节点移除操作。" 6 | 7 | - name: 校验将被移除的 master 节点是否在原 master 组中 8 | run_once: true 9 | assert: 10 | that: "{{ item in groups['kube-master'] }}" 11 | msg: "当前节点: {{ item }},并未在 kube-master 组中,不需要进行移除操作。" 12 | with_items: "{{ groups['del-master'] }}" 13 | 14 | - name: 校验移除 master 节点后剩余 master 节点数量 15 | run_once: true 16 | assert: 17 | that: "{{ (groups['kube-master'] | difference(groups['del-master']))|length >= 1 }}" 18 | msg: "移除 master 节点后,剩余 master 节点数量应大于等于 1。" 19 | 20 | - name: 刷新集群 master 节点状态 21 | shell: kubeadm reset phase update-cluster-status 22 | ignore_errors: true 23 | 24 | - name: 删除 master 节点组件 manifest 文件 25 | file: 26 | name: "{{ item }}" 27 | state: absent 28 | with_items: 29 | - /etc/kubernetes/manifests/kube-apiserver.yaml 30 | - /etc/kubernetes/manifests/kube-scheduler.yaml 31 | - /etc/kubernetes/manifests/kube-controller-manager.yaml 32 | 33 | - name: 取消节点原有 master 角色标签 34 | shell: > 35 | kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/control-plane='' --overwrite && 36 | kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/master='' --overwrite && 37 | kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/control-plane- && 38 | kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/master- 39 | ignore_errors: true 40 | delegate_to: "{{ (groups['kube-master'] | difference(groups['del-master']))[0] }}" 41 | 42 | - name: 取消 master 节点 taint,使 master 节点可以调度 43 | shell: > 44 | kubectl taint nodes {{inventory_hostname}} node-role.kubernetes.io/master='':NoSchedule --overwrite && 45 | kubectl taint nodes {{inventory_hostname}} node-role.kubernetes.io/master- 46 | ignore_errors: yes 47 | delegate_to: "{{ (groups['kube-master'] | difference(groups['del-master']))[0] }}" 48 | 49 | - name: 添加 worker 角色标签 50 | shell: kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/worker='' --overwrite 51 | ignore_errors: true 52 | delegate_to: "{{ (groups['kube-master'] | difference(groups['del-master']))[0] }}" -------------------------------------------------------------------------------- /roles/remove/node/defaults/main.yml: -------------------------------------------------------------------------------- 1 | # 提权操作 2 | ansible_become: true 3 | 4 | drain_grace_period: 300 5 | drain_timeout: 360s -------------------------------------------------------------------------------- /roles/remove/node/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 校验将被移除的节点是否属于 lb 角色组 2 | run_once: true 3 | assert: 4 | that: "{{ item not in (groups['lb'] )}}" 5 | msg: "当前节点: {{ item }},在 lb 角色组中,请先执行移除 lb 角色操作。。" 6 | with_items: "{{ groups['del-node'] }}" 7 | 8 | - name: 校验将被移除的节点是否属于 etcd 角色组 9 | run_once: true 10 | assert: 11 | that: "{{ item not in (groups['etcd'] + groups['new-etcd'])}}" 12 | msg: "当前节点: {{ item }},在 etcd 角色组中,请先执行移除 etcd 角色操作。" 13 | with_items: "{{ groups['del-node'] }}" 14 | 15 | - name: 校验将被移除的节点是否属于 master 角色组 16 | run_once: true 17 | assert: 18 | that: "{{ item not in (groups['kube-master'] + groups['new-master'])}}" 19 | msg: "当前节点: {{ item }},在 master 角色组中,请先执行移除 master 角色操作。" 20 | with_items: "{{ groups['del-node'] }}" 21 | 22 | - name: "移除节点:{{ inventory_hostname }}" 23 | shell: kubectl delete node {{ inventory_hostname }} 24 | ignore_errors: true 25 | delegate_to: "{{ groups['kube-master'][0] }}" 26 | when: inventory_hostname in (groups['kube-worker'] + groups['new-worker']) -------------------------------------------------------------------------------- /roles/remove/worker/defaults/main.yml: -------------------------------------------------------------------------------- 1 | # 提权操作 2 | ansible_become: true 3 | 4 | drain_grace_period: 300 5 | drain_timeout: 360s -------------------------------------------------------------------------------- /roles/remove/worker/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 校验 worker 节点数量 2 | run_once: true 3 | assert: 4 | that: groups['kube-worker']|length > 1 5 | msg: "Worker 节点数量大于 1 才可进行 worker 节点移除操作。" 6 | 7 | - name: 校验将被移除的 worker 节点是否在原 worker 组中 8 | run_once: true 9 | assert: 10 | that: "{{ item in groups['kube-worker'] }}" 11 | msg: "当前节点: {{ item }},并未在 kube-worker 组中,不需要进行移除操作。" 12 | with_items: "{{ groups['del-worker'] }}" 13 | 14 | - name: 校验移除 worker 节点后剩余 worker 节点数量 15 | run_once: true 16 | assert: 17 | that: "{{ (groups['kube-worker'] | difference(groups['del-worker']))|length >= 1 }}" 18 | msg: "移除 worker 节点后,剩余 worker 节点数量应大于等于 1。" 19 | 20 | - name: "禁止 worker 节点:{{ inventory_hostname }} 进行调度" 21 | shell: kubectl cordon {{ inventory_hostname }} 22 | ignore_errors: true 23 | delegate_to: "{{ groups['kube-master'][0] }}" 24 | 25 | - name: "驱逐 worker 节点:{{ inventory_hostname }} 上运行的 Pod" 26 | shell: > 27 | kubectl drain 28 | --force 29 | --ignore-daemonsets 30 | --grace-period {{ drain_grace_period }} 31 | --timeout {{ drain_timeout }} 32 | --delete-local-data {{ inventory_hostname }} 33 | ignore_errors: true 34 | delegate_to: "{{ groups['kube-master'][0] }}" 35 | 36 | - name: 取消节点原有 worker 角色标签 37 | shell: > 38 | kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/worker='' --overwrite && 39 | kubectl label node {{ inventory_hostname }} node-role.kubernetes.io/worker- 40 | ignore_errors: true 41 | delegate_to: "{{ groups['kube-master'][0] }}" -------------------------------------------------------------------------------- /roles/reset/tasks/centos.yml: -------------------------------------------------------------------------------- 1 | - name: 卸载相关软件 2 | yum: 3 | name: 4 | - kubeadm 5 | - kubectl 6 | - kubelet 7 | - docker-ce 8 | - docker-ce-cli 9 | - containerd.io 10 | state: absent -------------------------------------------------------------------------------- /roles/reset/tasks/debian.yml: -------------------------------------------------------------------------------- 1 | - name: 卸载相关软件 2 | apt: 3 | name: 4 | - kubeadm 5 | - kubectl 6 | - kubelet 7 | - docker-ce 8 | - docker-ce-cli 9 | - containerd.io 10 | state: absent -------------------------------------------------------------------------------- /roles/upgrade/files/kubernetes.gpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/open-hand/kubeadm-ha/cb8b769ba6c1edd2595730d38f96be01b2c3861a/roles/upgrade/files/kubernetes.gpg -------------------------------------------------------------------------------- /roles/upgrade/tasks/centos.yml: -------------------------------------------------------------------------------- 1 | - name: 添加 Kubernetes yum 仓库 2 | yum_repository: 3 | name: kubernetes 4 | description: Kubernetes Repository 5 | baseurl: "{{ kubernetes_yum_repo }}" 6 | enabled: no 7 | gpgcheck: no 8 | state: present 9 | 10 | - name: "安装 kubeadm-{{ kube_upgrade_version }}" 11 | yum: 12 | name: 13 | - "kubeadm-{{ kube_upgrade_version }}" 14 | state: present 15 | enablerepo: kubernetes 16 | 17 | - include_tasks: common.yml 18 | when: inventory_hostname in (groups['kube-master'] + groups['new-master'] + groups['kube-worker'] + groups['new-worker']) 19 | 20 | - name: "安装 kubelet-{{ kube_upgrade_version }} kubectl-{{ kube_upgrade_version }}" 21 | yum: 22 | name: 23 | - "kubectl-{{ kube_upgrade_version }}" 24 | - "kubelet-{{ kube_upgrade_version }}" 25 | state: present 26 | enablerepo: kubernetes -------------------------------------------------------------------------------- /roles/upgrade/tasks/common.yml: -------------------------------------------------------------------------------- 1 | - name: 确认 kubeadm 版本 2 | command: "kubeadm version -o short" 3 | register: kubeadm_version_output 4 | 5 | - name: 设置 kubeadm api version 为 v1beta1 6 | set_fact: 7 | kubeadmConfig_api_version: v1beta1 8 | when: 9 | - kubeadm_version_output.stdout is version('v1.13.0', '>=') 10 | - kubeadm_version_output.stdout is version('v1.15.0', '<') 11 | 12 | - name: 设置 kubeadm api version 为 v1beta2 13 | set_fact: 14 | kubeadmConfig_api_version: v1beta2 15 | when: 16 | - kubeadm_version_output.stdout is version('v1.15.0', '>=') 17 | 18 | - name: 删除不需要的参数 iptables.max 19 | lineinfile: 20 | path: /etc/kubernetes/kubeadm-config.yaml 21 | regexp: 'max:' 22 | state: absent 23 | 24 | - name: 删除不需要的参数 resourceContainer 25 | lineinfile: 26 | path: /etc/kubernetes/kubeadm-config.yaml 27 | regexp: 'resourceContainer:' 28 | state: absent 29 | 30 | - name: 更新 kubeadm config 版本 31 | lineinfile: 32 | path: /etc/kubernetes/kubeadm-config.yaml 33 | regexp: '^kubernetesVersion' 34 | line: "kubernetesVersion: v{{ kube_upgrade_version }}" 35 | 36 | - name: "迁移 kubeadm config 至 v{{ kube_upgrade_version }} 版本" 37 | shell: > 38 | kubeadm config migrate 39 | --old-config=/etc/kubernetes/kubeadm-config.yaml 40 | --new-config=/etc/kubernetes/kubeadm-config.yaml 41 | when: kubeadmConfig_api_version != "v1beta2" 42 | 43 | - name: "升级第一个 master 节点: {{ inventory_hostname }} 至 v{{ kube_upgrade_version }}" 44 | shell: > 45 | kubeadm upgrade apply --config=/etc/kubernetes/kubeadm-config.yaml --force --ignore-preflight-errors=ImagePull 46 | when: inventory_hostname == groups['kube-master'][0] 47 | 48 | - name: "升级剩余 master 节点: {{ inventory_hostname }} 至 v{{ kube_upgrade_version }}" 49 | shell: > 50 | kubeadm upgrade node 51 | {% if kube_upgrade_version.split('.')[1]|int == 14 %} 52 | experimental-control-plane 53 | {% endif %} 54 | when: 55 | - inventory_hostname != groups['kube-master'][0] 56 | - inventory_hostname in (groups['kube-master'] + groups['new-master']) 57 | 58 | - name: "升级 worker 节点: {{ inventory_hostname }} 至 v{{ kube_upgrade_version }}" 59 | shell: > 60 | kubeadm upgrade node 61 | {% if kube_upgrade_version.split('.')[1]|int == 14 %} 62 | config --kubelet-version v{{ kube_upgrade_version }} 63 | {% endif %} 64 | when: 65 | - inventory_hostname in (groups['kube-worker'] + groups['new-worker']) 66 | - inventory_hostname not in (groups['kube-master'] + groups['new-master']) -------------------------------------------------------------------------------- /roles/upgrade/tasks/debian.yml: -------------------------------------------------------------------------------- 1 | - name: 生成 Kubernetes GPG 公钥 2 | copy: 3 | src: kubernetes.gpg 4 | dest: /tmp/kubernetes.gpg 5 | owner: root 6 | mode: 0644 7 | 8 | - name: 添加 Kubernetes GPG 公钥 9 | shell: apt-key add /tmp/kubernetes.gpg 10 | 11 | - name: 添加 Kubernetes apt 仓库 12 | apt_repository: 13 | repo: "{{ kubernetes_apt_repo }}" 14 | state: present 15 | 16 | - name: "安装 kubeadm-{{ kube_upgrade_version }}" 17 | apt: 18 | name: 19 | - "kubeadm={{ kube_upgrade_version }}-00" 20 | state: present 21 | allow_unauthenticated: true 22 | 23 | - include_tasks: common.yml 24 | when: inventory_hostname in (groups['kube-master'] + groups['new-master'] + groups['kube-worker'] + groups['new-worker']) 25 | 26 | - name: "安装 kubelet-{{ kube_upgrade_version }} kubectl-{{ kube_upgrade_version }}" 27 | apt: 28 | name: 29 | - "kubectl={{ kube_upgrade_version }}-00" 30 | - "kubelet={{ kube_upgrade_version }}-00" 31 | state: present 32 | allow_unauthenticated: true -------------------------------------------------------------------------------- /roles/upgrade/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include_tasks: centos.yml 2 | when: ansible_distribution in [ 'CentOS','OracleLinux','RedHat' ] 3 | 4 | - include_tasks: debian.yml 5 | when: ansible_distribution in [ 'Ubuntu','Debian' ] 6 | 7 | - name: 重新加载 daemon 8 | systemd: 9 | daemon_reload: yes 10 | 11 | - name: 重新启动 kubelet 12 | service: 13 | name: kubelet 14 | state: restarted 15 | enabled: yes 16 | 17 | - name: 更新 kubectl 命令行自动补全 18 | shell: kubectl completion bash > /usr/share/bash-completion/completions/kubectl --------------------------------------------------------------------------------