├── .gitignore ├── 01.prepare.yml ├── 02.etcd.yml ├── 03.kubectl.yml ├── 04.docker.yml ├── 05.kube-master.yml ├── 06.kube-node.yml ├── 07.calico.yml ├── 07.flannel.yml ├── 11.harbor.yml ├── 20.addnode.yml ├── 90.setup.yml ├── 99.clean.yml ├── LICENSE ├── README.md ├── ansible.cfg ├── bin └── VERSION.md ├── branch.md ├── docs ├── 00-集群规划和基础参数设定.md ├── 01-创建CA证书和环境配置.md ├── 02-安装etcd集群.md ├── 03-配置kubectl命令行工具.md ├── 04-安装docker服务.md ├── 05-安装kube-master节点.md ├── 06-安装kube-node节点.md ├── 07-安装calico网络组件.md ├── 07-安装flannel网络组件.md ├── guide │ ├── dashboard.md │ ├── efk.md │ ├── harbor.md │ ├── heapster.md │ ├── hpa.md │ ├── index.md │ ├── ingress.md │ ├── kubedns.md │ └── networkpolicy.md ├── quickStart.md └── upgrade.md ├── down └── download.sh ├── example ├── hosts.allinone.example ├── hosts.m-masters.example └── hosts.s-master.example ├── manifests ├── dashboard │ ├── kubernetes-dashboard.yaml │ ├── ui-admin-rbac.yaml │ └── ui-read-rbac.yaml ├── heapster │ ├── grafana.yaml │ ├── heapster.yaml │ └── influxdb.yaml ├── ingress │ ├── test-hello.ing.yaml │ ├── traefik-ingress.yaml │ └── traefik-ui.ing.yaml └── kubedns │ ├── kubedns.yaml │ └── readme.md ├── pics ├── alipay.png ├── ansible.jpg ├── docker.jpg ├── grafana.png ├── influxdb.png └── kube.jpg └── roles ├── calico ├── tasks │ └── main.yml └── templates │ ├── calico-csr.json.j2 │ ├── calico-rbac.yaml.j2 │ ├── calico.yaml.j2 │ └── calicoctl.cfg.j2 ├── deploy ├── tasks │ └── main.yml └── templates │ ├── ca-config.json.j2 │ ├── ca-csr.json.j2 │ └── kubedns.yaml.j2 ├── docker ├── files │ ├── daemon.json │ ├── docker │ └── docker-tag ├── tasks │ └── main.yml └── templates │ └── docker.service.j2 ├── etcd ├── tasks │ └── main.yml └── templates │ ├── etcd-csr.json.j2 │ └── etcd.service.j2 ├── flannel ├── tasks │ └── main.yml └── templates │ └── kube-flannel.yaml.j2 ├── harbor ├── tasks │ └── main.yml └── templates │ ├── harbor-csr.json.j2 │ └── harbor.cfg.j2 ├── kube-master ├── tasks │ └── main.yml └── templates │ ├── basic-auth.csv.j2 │ ├── kube-apiserver.service.j2 │ ├── kube-controller-manager.service.j2 │ ├── kube-scheduler.service.j2 │ ├── kubernetes-csr.json.j2 │ └── token.csv.j2 ├── kube-node ├── tasks │ └── main.yml └── templates │ ├── cni-default.conf.j2 │ ├── kube-proxy-csr.json.j2 │ ├── kube-proxy.service.j2 │ └── kubelet.service.j2 ├── kubectl ├── tasks │ └── main.yml └── templates │ └── admin-csr.json.j2 ├── lb ├── tasks │ └── main.yml └── templates │ ├── haproxy.cfg.j2 │ ├── haproxy.service.j2 │ ├── keepalived-backup.conf.j2 │ └── keepalived-master.conf.j2 └── prepare ├── files └── 95-k8s-sysctl.conf └── tasks └── main.yml /.gitignore: -------------------------------------------------------------------------------- 1 | down/* 2 | !down/download.sh 3 | bin/* 4 | !bin/VERSION.md 5 | hosts 6 | *.crt 7 | *.pem 8 | roles/prepare/files/ca* 9 | -------------------------------------------------------------------------------- /01.prepare.yml: -------------------------------------------------------------------------------- 1 | # 在deploy节点生成CA相关证书,以及kubedns.yaml配置文件 2 | - hosts: deploy 3 | roles: 4 | - deploy 5 | 6 | # 集群节点的公共配置任务 7 | - hosts: 8 | - kube-cluster 9 | - deploy 10 | - etcd 11 | - lb 12 | roles: 13 | - prepare 14 | 15 | # [可选]多master部署时的负载均衡配置 16 | - hosts: lb 17 | roles: 18 | - lb 19 | -------------------------------------------------------------------------------- /02.etcd.yml: -------------------------------------------------------------------------------- 1 | - hosts: etcd 2 | roles: 3 | - etcd 4 | -------------------------------------------------------------------------------- /03.kubectl.yml: -------------------------------------------------------------------------------- 1 | - hosts: 2 | - kube-cluster 3 | - deploy 4 | roles: 5 | - kubectl 6 | -------------------------------------------------------------------------------- /04.docker.yml: -------------------------------------------------------------------------------- 1 | - hosts: kube-cluster 2 | gather_facts: True 3 | roles: 4 | - docker 5 | -------------------------------------------------------------------------------- /05.kube-master.yml: -------------------------------------------------------------------------------- 1 | - hosts: kube-master 2 | roles: 3 | - kube-master 4 | -------------------------------------------------------------------------------- /06.kube-node.yml: -------------------------------------------------------------------------------- 1 | - hosts: kube-node 2 | roles: 3 | - kube-node 4 | -------------------------------------------------------------------------------- /07.calico.yml: -------------------------------------------------------------------------------- 1 | - hosts: kube-cluster 2 | roles: 3 | - calico 4 | -------------------------------------------------------------------------------- /07.flannel.yml: -------------------------------------------------------------------------------- 1 | - hosts: kube-cluster 2 | roles: 3 | - flannel 4 | -------------------------------------------------------------------------------- /11.harbor.yml: -------------------------------------------------------------------------------- 1 | - hosts: harbor 2 | roles: 3 | - prepare 4 | - docker 5 | - harbor 6 | 7 | - hosts: kube-node 8 | tasks: 9 | - name: harbor证书目录创建 10 | file: name=/etc/docker/certs.d/{{ HARBOR_DOMAIN }} state=directory 11 | 12 | - name: harbor服务器证书安装 13 | copy: src={{ base_dir }}/roles/prepare/files/ca.pem dest=/etc/docker/certs.d/{{ HARBOR_DOMAIN }}/ca.crt 14 | 15 | # 如果你的环境中有dns服务器,可以跳过hosts文件设置 16 | - name: 增加harbor的hosts解析 17 | shell: "sed -i '/{{ HARBOR_DOMAIN }}/d' /etc/hosts && \ 18 | echo {{ HARBOR_IP }} {{ HARBOR_DOMAIN }} >> /etc/hosts" 19 | -------------------------------------------------------------------------------- /20.addnode.yml: -------------------------------------------------------------------------------- 1 | - hosts: new-node 2 | roles: 3 | - prepare 4 | - kubectl 5 | - docker 6 | - calico 7 | - kube-node 8 | 9 | - hosts: deploy 10 | tasks: 11 | - name: 批准新增node节点 12 | shell: "sleep 15 && {{ bin_dir }}/kubectl get csr|grep 'Pending' | awk 'NR>0{print $1}'| xargs {{ bin_dir }}/kubectl certificate approve" 13 | ignore_errors: true 14 | -------------------------------------------------------------------------------- /90.setup.yml: -------------------------------------------------------------------------------- 1 | # 在deploy节点生成CA相关证书,以供整个集群使用 2 | # 以及初始化kubedns.yaml配置文件 3 | - hosts: deploy 4 | roles: 5 | - deploy 6 | 7 | # 集群节点的公共配置任务 8 | - hosts: 9 | - kube-cluster 10 | - deploy 11 | - etcd 12 | - lb 13 | roles: 14 | - prepare 15 | 16 | # [可选]多master部署时的负载均衡配置 17 | - hosts: lb 18 | roles: 19 | - lb 20 | 21 | # 创建etcd集群 22 | - hosts: etcd 23 | roles: 24 | - etcd 25 | 26 | # kubectl 客户端配置 27 | - hosts: 28 | - kube-cluster 29 | - deploy 30 | roles: 31 | - kubectl 32 | 33 | # docker服务安装 34 | - hosts: kube-cluster 35 | roles: 36 | - docker 37 | 38 | # master 节点部署 39 | - hosts: kube-master 40 | roles: 41 | - kube-master 42 | 43 | # node 节点部署 44 | - hosts: kube-node 45 | roles: 46 | - kube-node 47 | 48 | # 集群网络插件部署,只能选择一种安装 49 | - hosts: kube-cluster 50 | roles: 51 | - { role: calico, when: "CLUSTER_NETWORK == 'calico'" } 52 | - { role: flannel, when: "CLUSTER_NETWORK == 'flannel'" } 53 | -------------------------------------------------------------------------------- /99.clean.yml: -------------------------------------------------------------------------------- 1 | # 警告:此脚本将清理整个K8S集群,包括所有POD、ETCD数据等 2 | # 请三思后运行此脚本 ansible-playbook 99.clean.yml 3 | 4 | - hosts: 5 | - kube-node 6 | - new-node 7 | tasks: 8 | - name: stop kube-node service 9 | shell: "systemctl stop kubelet kube-proxy" 10 | ignore_errors: true 11 | 12 | - name: umount kubelet 挂载的目录 13 | shell: "mount | grep '/var/lib/kubelet'| awk '{print $3}'|xargs umount" 14 | ignore_errors: true 15 | 16 | - name: 清理目录和文件 17 | file: name={{ item }} state=absent 18 | with_items: 19 | - "/var/lib/kubelet/" 20 | - "/var/lib/kube-proxy/" 21 | - "/etc/kubernetes/" 22 | - "/etc/systemd/system/kubelet.service" 23 | - "/etc/systemd/system/kube-proxy.service" 24 | # - "/root/local/bin/" 25 | 26 | - hosts: kube-master 27 | tasks: 28 | - name: stop kube-master service 29 | shell: "systemctl stop kube-apiserver kube-controller-manager kube-scheduler" 30 | ignore_errors: true 31 | 32 | - name: 清理目录和文件 33 | file: name={{ item }} state=absent 34 | with_items: 35 | - "/var/run/kubernetes" 36 | - "/etc/systemd/system/kube-apiserver.service" 37 | - "/etc/systemd/system/kube-controller-manager.service" 38 | - "/etc/systemd/system/kube-scheduler.service" 39 | - "/etc/kubernetes/" 40 | 41 | - hosts: 42 | - kube-cluster 43 | - new-node 44 | - deploy 45 | tasks: 46 | - name: stop docker service 47 | shell: "systemctl stop docker" 48 | ignore_errors: true 49 | 50 | # 因为calico-kube-controller使用了host网络,相当于使用了docker -net=host,需要 51 | # 卸载 /var/run/docker/netns/default 52 | - name: 卸载docker 相关fs1 53 | mount: path=/var/run/docker/netns/default state=unmounted 54 | 55 | - name: 卸载docker 相关fs2 56 | mount: path=/var/lib/docker/overlay state=unmounted 57 | 58 | - name: 清理目录和文件 59 | file: name={{ item }} state=absent 60 | with_items: 61 | - "/etc/cni/" 62 | - "/root/.kube/" 63 | - "/run/flannel/" 64 | - "/etc/calico/" 65 | - "/var/run/calico/" 66 | - "/var/log/calico/" 67 | - "/var/lib/docker/" 68 | - "/var/run/docker/" 69 | - "/etc/systemd/system/calico-node.service" 70 | - "/etc/systemd/system/docker.service" 71 | - "/etc/systemd/system/docker.service.requires/" 72 | 73 | - name: 清理 iptables 74 | shell: "iptables -F && iptables -X \ 75 | && iptables -F -t nat && iptables -X -t nat \ 76 | && iptables -F -t raw && iptables -X -t raw \ 77 | && iptables -F -t mangle && iptables -X -t mangle" 78 | 79 | - name: 清理网络 80 | shell: "ip link del docker0; \ 81 | ip link del tunl0; \ 82 | ip link del flannel.1; \ 83 | ip link del cni0; \ 84 | ip link del mynet0; \ 85 | systemctl restart networking; \ 86 | systemctl restart network" 87 | ignore_errors: true 88 | 89 | - hosts: etcd 90 | tasks: 91 | - name: stop etcd service 92 | shell: systemctl stop etcd 93 | ignore_errors: true 94 | 95 | - name: 清理目录和文件 96 | file: name={{ item }} state=absent 97 | with_items: 98 | - "/var/lib/etcd" 99 | - "/etc/etcd/" 100 | - "/etc/systemd/system/etcd.service" 101 | 102 | - hosts: lb 103 | tasks: 104 | - name: stop keepalived service 105 | shell: systemctl disable keepalived && systemctl stop keepalived 106 | ignore_errors: true 107 | 108 | - name: stop haproxy service 109 | shell: systemctl disable haproxy && systemctl stop haproxy 110 | ignore_errors: true 111 | 112 | - name: 清理LB 配置文件目录 113 | file: name={{ item }} state=absent 114 | with_items: 115 | - "/etc/haproxy" 116 | - "/etc/keepalived" 117 | ignore_errors: true 118 | 119 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, and 10 | distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by the copyright 13 | owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all other entities 16 | that control, are controlled by, or are under common control with that entity. 17 | For the purposes of this definition, "control" means (i) the power, direct or 18 | indirect, to cause the direction or management of such entity, whether by 19 | contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the 20 | outstanding shares, or (iii) beneficial ownership of such entity. 21 | 22 | "You" (or "Your") shall mean an individual or Legal Entity exercising 23 | permissions granted by this License. 24 | 25 | "Source" form shall mean the preferred form for making modifications, including 26 | but not limited to software source code, documentation source, and configuration 27 | files. 28 | 29 | "Object" form shall mean any form resulting from mechanical transformation or 30 | translation of a Source form, including but not limited to compiled object code, 31 | generated documentation, and conversions to other media types. 32 | 33 | "Work" shall mean the work of authorship, whether in Source or Object form, made 34 | available under the License, as indicated by a copyright notice that is included 35 | in or attached to the work (an example is provided in the Appendix below). 36 | 37 | "Derivative Works" shall mean any work, whether in Source or Object form, that 38 | is based on (or derived from) the Work and for which the editorial revisions, 39 | annotations, elaborations, or other modifications represent, as a whole, an 40 | original work of authorship. For the purposes of this License, Derivative Works 41 | shall not include works that remain separable from, or merely link (or bind by 42 | name) to the interfaces of, the Work and Derivative Works thereof. 43 | 44 | "Contribution" shall mean any work of authorship, including the original version 45 | of the Work and any modifications or additions to that Work or Derivative Works 46 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 47 | by the copyright owner or by an individual or Legal Entity authorized to submit 48 | on behalf of the copyright owner. For the purposes of this definition, 49 | "submitted" means any form of electronic, verbal, or written communication sent 50 | to the Licensor or its representatives, including but not limited to 51 | communication on electronic mailing lists, source code control systems, and 52 | issue tracking systems that are managed by, or on behalf of, the Licensor for 53 | the purpose of discussing and improving the Work, but excluding communication 54 | that is conspicuously marked or otherwise designated in writing by the copyright 55 | owner as "Not a Contribution." 56 | 57 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf 58 | of whom a Contribution has been received by Licensor and subsequently 59 | incorporated within the Work. 60 | 61 | 2. Grant of Copyright License. 62 | 63 | Subject to the terms and conditions of this License, each Contributor hereby 64 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 65 | irrevocable copyright license to reproduce, prepare Derivative Works of, 66 | publicly display, publicly perform, sublicense, and distribute the Work and such 67 | Derivative Works in Source or Object form. 68 | 69 | 3. Grant of Patent License. 70 | 71 | Subject to the terms and conditions of this License, each Contributor hereby 72 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 73 | irrevocable (except as stated in this section) patent license to make, have 74 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where 75 | such license applies only to those patent claims licensable by such Contributor 76 | that are necessarily infringed by their Contribution(s) alone or by combination 77 | of their Contribution(s) with the Work to which such Contribution(s) was 78 | submitted. If You institute patent litigation against any entity (including a 79 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a 80 | Contribution incorporated within the Work constitutes direct or contributory 81 | patent infringement, then any patent licenses granted to You under this License 82 | for that Work shall terminate as of the date such litigation is filed. 83 | 84 | 4. Redistribution. 85 | 86 | You may reproduce and distribute copies of the Work or Derivative Works thereof 87 | in any medium, with or without modifications, and in Source or Object form, 88 | provided that You meet the following conditions: 89 | 90 | You must give any other recipients of the Work or Derivative Works a copy of 91 | this License; and 92 | You must cause any modified files to carry prominent notices stating that You 93 | changed the files; and 94 | You must retain, in the Source form of any Derivative Works that You distribute, 95 | all copyright, patent, trademark, and attribution notices from the Source form 96 | of the Work, excluding those notices that do not pertain to any part of the 97 | Derivative Works; and 98 | If the Work includes a "NOTICE" text file as part of its distribution, then any 99 | Derivative Works that You distribute must include a readable copy of the 100 | attribution notices contained within such NOTICE file, excluding those notices 101 | that do not pertain to any part of the Derivative Works, in at least one of the 102 | following places: within a NOTICE text file distributed as part of the 103 | Derivative Works; within the Source form or documentation, if provided along 104 | with the Derivative Works; or, within a display generated by the Derivative 105 | Works, if and wherever such third-party notices normally appear. The contents of 106 | the NOTICE file are for informational purposes only and do not modify the 107 | License. You may add Your own attribution notices within Derivative Works that 108 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 109 | provided that such additional attribution notices cannot be construed as 110 | modifying the License. 111 | You may add Your own copyright statement to Your modifications and may provide 112 | additional or different license terms and conditions for use, reproduction, or 113 | distribution of Your modifications, or for any such Derivative Works as a whole, 114 | provided Your use, reproduction, and distribution of the Work otherwise complies 115 | with the conditions stated in this License. 116 | 117 | 5. Submission of Contributions. 118 | 119 | Unless You explicitly state otherwise, any Contribution intentionally submitted 120 | for inclusion in the Work by You to the Licensor shall be under the terms and 121 | conditions of this License, without any additional terms or conditions. 122 | Notwithstanding the above, nothing herein shall supersede or modify the terms of 123 | any separate license agreement you may have executed with Licensor regarding 124 | such Contributions. 125 | 126 | 6. Trademarks. 127 | 128 | This License does not grant permission to use the trade names, trademarks, 129 | service marks, or product names of the Licensor, except as required for 130 | reasonable and customary use in describing the origin of the Work and 131 | reproducing the content of the NOTICE file. 132 | 133 | 7. Disclaimer of Warranty. 134 | 135 | Unless required by applicable law or agreed to in writing, Licensor provides the 136 | Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, 137 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, 138 | including, without limitation, any warranties or conditions of TITLE, 139 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are 140 | solely responsible for determining the appropriateness of using or 141 | redistributing the Work and assume any risks associated with Your exercise of 142 | permissions under this License. 143 | 144 | 8. Limitation of Liability. 145 | 146 | In no event and under no legal theory, whether in tort (including negligence), 147 | contract, or otherwise, unless required by applicable law (such as deliberate 148 | and grossly negligent acts) or agreed to in writing, shall any Contributor be 149 | liable to You for damages, including any direct, indirect, special, incidental, 150 | or consequential damages of any character arising as a result of this License or 151 | out of the use or inability to use the Work (including but not limited to 152 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or 153 | any and all other commercial damages or losses), even if such Contributor has 154 | been advised of the possibility of such damages. 155 | 156 | 9. Accepting Warranty or Additional Liability. 157 | 158 | While redistributing the Work or Derivative Works thereof, You may choose to 159 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or 160 | other liability obligations and/or rights consistent with this License. However, 161 | in accepting such obligations, You may act only on Your own behalf and on Your 162 | sole responsibility, not on behalf of any other Contributor, and only if You 163 | agree to indemnify, defend, and hold each Contributor harmless for any liability 164 | incurred by, or claims asserted against, such Contributor by reason of your 165 | accepting any such warranty or additional liability. 166 | 167 | END OF TERMS AND CONDITIONS 168 | 169 | APPENDIX: How to apply the Apache License to your work 170 | 171 | To apply the Apache License to your work, attach the following boilerplate 172 | notice, with the fields enclosed by brackets "{}" replaced with your own 173 | identifying information. (Don't include the brackets!) The text should be 174 | enclosed in the appropriate comment syntax for the file format. We also 175 | recommend that a file or class name and description of purpose be included on 176 | the same "printed page" as the copyright notice for easier identification within 177 | third-party archives. 178 | 179 | Copyright 2017 jmgao 180 | 181 | Licensed under the Apache License, Version 2.0 (the "License"); 182 | you may not use this file except in compliance with the License. 183 | You may obtain a copy of the License at 184 | 185 | http://www.apache.org/licenses/LICENSE-2.0 186 | 187 | Unless required by applicable law or agreed to in writing, software 188 | distributed under the License is distributed on an "AS IS" BASIS, 189 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 190 | See the License for the specific language governing permissions and 191 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 利用Ansible部署kubernetes集群 2 | 3 | ![docker](./pics/docker.jpg) ![kube](./pics/kube.jpg) ![ansible](./pics/ansible.jpg) 4 | 5 | 本系列文档致力于提供快速部署高可用`k8s`集群的工具,并且也努力成为`k8s`实践、使用的参考书;基于二进制方式部署和利用`ansible-playbook`实现自动化:既提供一键安装脚本,也可以分步执行安装各个组件,同时讲解每一步主要参数配置和注意事项;二进制方式部署有助于理解系统各组件的交互原理和熟悉组件启动参数,有助于快速排查解决实际问题。 6 | 7 | **集群特性:`TLS` 双向认证、`RBAC` 授权、多`Master`高可用、支持`Network Policy`** 8 | 9 | **注意:** 为提高集群网络插件安装的灵活性,使用`DaemonSet Pod`方式运行网络插件,目前支持`Calico` `flannel`可选 10 | 11 | 文档基于`Ubuntu 16.04/CentOS 7`,其他系统需要读者自行替换部分命令;由于使用经验有限和简化脚本考虑,已经尽量避免`ansible-playbook`的高级特性和复杂逻辑。 12 | 13 | 你可能需要掌握基本`kubernetes` `docker` `linux shell` 知识,关于`ansible`建议阅读 [ansible超快入门](http://weiweidefeng.blog.51cto.com/1957995/1895261) 基本够用。 14 | 15 | 请阅读[项目分支说明](branch.md),欢迎提`Issues`和`PRs`参与维护项目。 16 | 17 | ## 组件版本 18 | 19 | 1. kubernetes v1.9.1 20 | 1. etcd v3.2.13 21 | 1. docker 17.12.0-ce 22 | 1. calico/node v2.6.5 23 | 1. flannel v0.9.1 24 | 25 | + 附:集群用到的所有二进制文件已打包好供下载 [https://pan.baidu.com/s/1c4RFaA](https://pan.baidu.com/s/1c4RFaA) 26 | + 注:`Kubernetes v1.8.x` 版本请切换到项目分支 `v1.8`, 若你需要从v1.8 升级至 v1.9,请参考 [升级注意](docs/upgrade.md) 27 | 28 | ## 快速指南 29 | 30 | 单机快速体验k8s集群的测试、开发环境--[AllinOne部署](docs/quickStart.md);在国内的网络环境下要比官方的minikube方便、简单很多。 31 | 32 | ## 安装步骤 33 | 34 | 1. [集群规划和基础参数设定](docs/00-集群规划和基础参数设定.md) 35 | 1. [创建CA证书和环境配置](docs/01-创建CA证书和环境配置.md) 36 | 1. [安装etcd集群](docs/02-安装etcd集群.md) 37 | 1. [配置kubectl命令行工具](docs/03-配置kubectl命令行工具.md) 38 | 1. [安装docker服务](docs/04-安装docker服务.md) 39 | 1. [安装kube-master节点](docs/05-安装kube-master节点.md) 40 | 1. [安装kube-node节点](docs/06-安装kube-node节点.md) 41 | 1. [安装calico网络组件](docs/07-安装calico网络组件.md) 42 | 1. [安装flannel网络组件](docs/07-安装flannel网络组件.md) 43 | 44 | ## 使用指南 45 | 46 | - 常用插件部署 [kubedns](docs/guide/kubedns.md) [dashboard](docs/guide/dashboard.md) [heapster](docs/guide/heapster.md) [ingress](docs/guide/ingress.md) [efk](docs/guide/efk.md) [harbor](docs/guide/harbor.md) 47 | - K8S 特性实验 [HPA](docs/guide/hpa.md) [NetworkPolicy](docs/guide/networkpolicy.md) 48 | - 集群运维指南 49 | - 应用部署实践 50 | 51 | 请根据这份 [目录](docs/guide/index.md) 阅读你所感兴趣的内容,尚在更新中... 52 | 53 | ## 参考阅读 54 | 55 | 1. 建议阅读 [rootsongjc-Kubernetes指南](https://github.com/rootsongjc/kubernetes-handbook) 原理和实践指南。 56 | 1. 建议阅读 [feisky-Kubernetes指南](https://github.com/feiskyer/kubernetes-handbook/blob/master/SUMMARY.md) 原理和部署章节。 57 | 1. 建议阅读 [opsnull-安装教程](https://github.com/opsnull/follow-me-install-kubernetes-cluster) 二进制手工部署。 58 | 59 | ## 版权 60 | 61 | Copyright 2017 gjmzj (jmgaozz@163.com) 62 | 63 | Apache License 2.0,详情见 [LICENSE](LICENSE) 文件。 64 | 65 | 如果觉得这份文档对你有帮助,请支付宝扫描下方的二维码进行捐赠,谢谢! 66 | 67 | ![donate](./pics/alipay.png) 68 | -------------------------------------------------------------------------------- /ansible.cfg: -------------------------------------------------------------------------------- 1 | # config file for ansible -- http://ansible.com/ 2 | # ============================================== 3 | 4 | # nearly all parameters can be overridden in ansible-playbook 5 | # or with command line flags. ansible will read ANSIBLE_CONFIG, 6 | # ansible.cfg in the current working directory, .ansible.cfg in 7 | # the home directory or /etc/ansible/ansible.cfg, whichever it 8 | # finds first 9 | 10 | [defaults] 11 | 12 | # some basic default values... 13 | 14 | #inventory = /etc/ansible/hosts 15 | #library = /usr/share/my_modules/ 16 | #remote_tmp = $HOME/.ansible/tmp 17 | #forks = 5 18 | #poll_interval = 15 19 | #sudo_user = root 20 | #ask_sudo_pass = True 21 | #ask_pass = True 22 | #transport = smart 23 | #remote_port = 22 24 | #module_lang = C 25 | 26 | # plays will gather facts by default, which contain information about 27 | # the remote system. 28 | # 29 | # smart - gather by default, but don't regather if already gathered 30 | # implicit - gather by default, turn off with gather_facts: False 31 | # explicit - do not gather by default, must say gather_facts: True 32 | gathering = explicit 33 | 34 | # additional paths to search for roles in, colon separated 35 | roles_path = /etc/ansible/roles 36 | 37 | # uncomment this to disable SSH key host checking 38 | #host_key_checking = False 39 | 40 | # change the default callback 41 | #stdout_callback = skippy 42 | # enable additional callbacks 43 | #callback_whitelist = timer, mail 44 | 45 | # change this for alternative sudo implementations 46 | #sudo_exe = sudo 47 | 48 | # What flags to pass to sudo 49 | # WARNING: leaving out the defaults might create unexpected behaviours 50 | #sudo_flags = -H -S -n 51 | 52 | # SSH timeout 53 | #timeout = 10 54 | 55 | # default user to use for playbooks if user is not specified 56 | # (/usr/bin/ansible will use current user as default) 57 | #remote_user = root 58 | 59 | # logging is off by default unless this path is defined 60 | # if so defined, consider logrotate 61 | #log_path = /var/log/ansible.log 62 | 63 | # default module name for /usr/bin/ansible 64 | #module_name = command 65 | 66 | # use this shell for commands executed under sudo 67 | # you may need to change this to bin/bash in rare instances 68 | # if sudo is constrained 69 | #executable = /bin/sh 70 | 71 | # if inventory variables overlap, does the higher precedence one win 72 | # or are hash values merged together? The default is 'replace' but 73 | # this can also be set to 'merge'. 74 | #hash_behaviour = replace 75 | 76 | # by default, variables from roles will be visible in the global variable 77 | # scope. To prevent this, the following option can be enabled, and only 78 | # tasks and handlers within the role will see the variables there 79 | #private_role_vars = yes 80 | 81 | # list any Jinja2 extensions to enable here: 82 | #jinja2_extensions = jinja2.ext.do,jinja2.ext.i18n 83 | 84 | # if set, always use this private key file for authentication, same as 85 | # if passing --private-key to ansible or ansible-playbook 86 | #private_key_file = /path/to/file 87 | 88 | # format of string {{ ansible_managed }} available within Jinja2 89 | # templates indicates to users editing templates files will be replaced. 90 | # replacing {file}, {host} and {uid} and strftime codes with proper values. 91 | #ansible_managed = Ansible managed: {file} modified on %Y-%m-%d %H:%M:%S by {uid} on {host} 92 | # This short version is better used in templates as it won't flag the file as changed every run. 93 | #ansible_managed = Ansible managed: {file} on {host} 94 | 95 | # by default, ansible-playbook will display "Skipping [host]" if it determines a task 96 | # should not be run on a host. Set this to "False" if you don't want to see these "Skipping" 97 | # messages. NOTE: the task header will still be shown regardless of whether or not the 98 | # task is skipped. 99 | #display_skipped_hosts = True 100 | 101 | # by default (as of 1.3), Ansible will raise errors when attempting to dereference 102 | # Jinja2 variables that are not set in templates or action lines. Uncomment this line 103 | # to revert the behavior to pre-1.3. 104 | #error_on_undefined_vars = False 105 | 106 | # by default (as of 1.6), Ansible may display warnings based on the configuration of the 107 | # system running ansible itself. This may include warnings about 3rd party packages or 108 | # other conditions that should be resolved if possible. 109 | # to disable these warnings, set the following value to False: 110 | #system_warnings = True 111 | 112 | # by default (as of 1.4), Ansible may display deprecation warnings for language 113 | # features that should no longer be used and will be removed in future versions. 114 | # to disable these warnings, set the following value to False: 115 | #deprecation_warnings = True 116 | 117 | # (as of 1.8), Ansible can optionally warn when usage of the shell and 118 | # command module appear to be simplified by using a default Ansible module 119 | # instead. These warnings can be silenced by adjusting the following 120 | # setting or adding warn=yes or warn=no to the end of the command line 121 | # parameter string. This will for example suggest using the git module 122 | # instead of shelling out to the git command. 123 | # command_warnings = False 124 | 125 | 126 | # set plugin path directories here, separate with colons 127 | #action_plugins = /usr/share/ansible/plugins/action 128 | #callback_plugins = /usr/share/ansible/plugins/callback 129 | #connection_plugins = /usr/share/ansible/plugins/connection 130 | #lookup_plugins = /usr/share/ansible/plugins/lookup 131 | #vars_plugins = /usr/share/ansible/plugins/vars 132 | #filter_plugins = /usr/share/ansible/plugins/filter 133 | #test_plugins = /usr/share/ansible/plugins/test 134 | 135 | # by default callbacks are not loaded for /bin/ansible, enable this if you 136 | # want, for example, a notification or logging callback to also apply to 137 | # /bin/ansible runs 138 | #bin_ansible_callbacks = False 139 | 140 | 141 | # don't like cows? that's unfortunate. 142 | # set to 1 if you don't want cowsay support or export ANSIBLE_NOCOWS=1 143 | #nocows = 1 144 | 145 | # set which cowsay stencil you'd like to use by default. When set to 'random', 146 | # a random stencil will be selected for each task. The selection will be filtered 147 | # against the `cow_whitelist` option below. 148 | #cow_selection = default 149 | #cow_selection = random 150 | 151 | # when using the 'random' option for cowsay, stencils will be restricted to this list. 152 | # it should be formatted as a comma-separated list with no spaces between names. 153 | # NOTE: line continuations here are for formatting purposes only, as the INI parser 154 | # in python does not support them. 155 | #cow_whitelist=bud-frogs,bunny,cheese,daemon,default,dragon,elephant-in-snake,elephant,eyes,\ 156 | # hellokitty,kitty,luke-koala,meow,milk,moofasa,moose,ren,sheep,small,stegosaurus,\ 157 | # stimpy,supermilker,three-eyes,turkey,turtle,tux,udder,vader-koala,vader,www 158 | 159 | # don't like colors either? 160 | # set to 1 if you don't want colors, or export ANSIBLE_NOCOLOR=1 161 | #nocolor = 1 162 | 163 | # if set to a persistent type (not 'memory', for example 'redis') fact values 164 | # from previous runs in Ansible will be stored. This may be useful when 165 | # wanting to use, for example, IP information from one group of servers 166 | # without having to talk to them in the same playbook run to get their 167 | # current IP information. 168 | #fact_caching = memory 169 | 170 | 171 | # retry files 172 | # When a playbook fails by default a .retry file will be created in ~/ 173 | # You can disable this feature by setting retry_files_enabled to False 174 | # and you can change the location of the files by setting retry_files_save_path 175 | 176 | #retry_files_enabled = False 177 | #retry_files_save_path = ~/.ansible-retry 178 | 179 | 180 | # prevents logging of task data, off by default 181 | #no_log = False 182 | 183 | # prevents logging of tasks, but only on the targets, data is still logged on the master/controller 184 | #no_target_syslog = False 185 | 186 | # controls the compression level of variables sent to 187 | # worker processes. At the default of 0, no compression 188 | # is used. This value must be an integer from 0 to 9. 189 | #var_compression_level = 9 190 | 191 | [privilege_escalation] 192 | #become=True 193 | #become_method=sudo 194 | #become_user=root 195 | #become_ask_pass=False 196 | 197 | [paramiko_connection] 198 | 199 | # uncomment this line to cause the paramiko connection plugin to not record new host 200 | # keys encountered. Increases performance on new host additions. Setting works independently of the 201 | # host key checking setting above. 202 | #record_host_keys=False 203 | 204 | # by default, Ansible requests a pseudo-terminal for commands executed under sudo. Uncomment this 205 | # line to disable this behaviour. 206 | #pty=False 207 | 208 | [ssh_connection] 209 | 210 | # ssh arguments to use 211 | # Leaving off ControlPersist will result in poor performance, so use 212 | # paramiko on older platforms rather than removing it 213 | #ssh_args = -o ControlMaster=auto -o ControlPersist=60s 214 | 215 | # The path to use for the ControlPath sockets. This defaults to 216 | # "%(directory)s/ansible-ssh-%%h-%%p-%%r", however on some systems with 217 | # very long hostnames or very long path names (caused by long user names or 218 | # deeply nested home directories) this can exceed the character limit on 219 | # file socket names (108 characters for most platforms). In that case, you 220 | # may wish to shorten the string below. 221 | # 222 | # Example: 223 | # control_path = %(directory)s/%%h-%%r 224 | #control_path = %(directory)s/ansible-ssh-%%h-%%p-%%r 225 | 226 | # Enabling pipelining reduces the number of SSH operations required to 227 | # execute a module on the remote server. This can result in a significant 228 | # performance improvement when enabled, however when using "sudo:" you must 229 | # first disable 'requiretty' in /etc/sudoers 230 | # 231 | # By default, this option is disabled to preserve compatibility with 232 | # sudoers configurations that have requiretty (the default on many distros). 233 | # 234 | #pipelining = False 235 | 236 | # if True, make ansible use scp if the connection type is ssh 237 | # (default is sftp) 238 | #scp_if_ssh = True 239 | 240 | # if False, sftp will not use batch mode to transfer files. This may cause some 241 | # types of file transfer failures impossible to catch however, and should 242 | # only be disabled if your sftp version has problems with batch mode 243 | #sftp_batch_mode = False 244 | 245 | [accelerate] 246 | #accelerate_port = 5099 247 | #accelerate_timeout = 30 248 | #accelerate_connect_timeout = 5.0 249 | 250 | # The daemon timeout is measured in minutes. This time is measured 251 | # from the last activity to the accelerate daemon. 252 | #accelerate_daemon_timeout = 30 253 | 254 | # If set to yes, accelerate_multi_key will allow multiple 255 | # private keys to be uploaded to it, though each user must 256 | # have access to the system via SSH to add a new key. The default 257 | # is "no". 258 | #accelerate_multi_key = yes 259 | 260 | [selinux] 261 | # file systems that require special treatment when dealing with security context 262 | # the default behaviour that copies the existing context or uses the user default 263 | # needs to be changed to use the file system dependent context. 264 | #special_context_filesystems=nfs,vboxsf,fuse,ramfs 265 | -------------------------------------------------------------------------------- /bin/VERSION.md: -------------------------------------------------------------------------------- 1 | # 主要组件版本 2 | 3 | + kubernetes v1.9.1 4 | + etcd v3.2.13 5 | + docker 17.12.0-ce 6 | -------------------------------------------------------------------------------- /branch.md: -------------------------------------------------------------------------------- 1 | ## 项目分支说明 2 | 3 | 目前项目分支为 `master` `v1.9` `v1.8`,说明如下: 4 | 5 | - `master` 分支将尽量使用最新版k8s和相关组件,网络使用`DaemonSet Pod`方式安装,目前提供`calico` `flannel` 可选 6 | - `v1.9` 分支将尽量使用k8s v1.9的最新小版本和相关组件,使用`systemd service`方式安装 `calico`网络 7 | - `v1.8` 分支将尽量使用k8s v1.8的最新小版本和相关组件,使用`systemd service`方式安装 `calico`网络 8 | -------------------------------------------------------------------------------- /docs/00-集群规划和基础参数设定.md: -------------------------------------------------------------------------------- 1 | ## 00-集群规划和基础参数设定.md 2 | 3 | 多节点高可用集群部署步骤与[AllinOne部署](quickStart.md)基本一致,增加LB 负载均衡部署步骤。 4 | 5 | ## 高可用集群所需节点配置如下: 6 | + 部署节点 x1 : 运行这份 ansible 脚本的节点 7 | + etcd节点 x3 : 注意etcd集群必须是1,3,5,7...奇数个节点 8 | + master节点 x2 : 根据实际集群规模可以增加节点数,需要额外规划一个master VIP(虚地址) 9 | + lb节点 x2 : 负载均衡节点两个,安装 haproxy+keepalived 10 | + node节点 x3 : 真正应用负载的节点,根据需要增加机器配置和节点数 11 | 12 | 生产环境使用建议一个节点只是一个角色,避免性能瓶颈问题,这里演示环境将节点绑定多个角色。项目预定义了3个例子,请修改后完成适合你的集群规划。 13 | 14 | + [单节点](../example/hosts.allinone.example) 15 | + [单主多节点](../example/hosts.s-master.example) 16 | + [多主多节点](../example/hosts.m-masters.example) 17 | 18 | ## 集群所用到的参数举例如下: 19 | ``` bash 20 | # ---------集群主要参数--------------- 21 | #集群 MASTER IP, 需要负载均衡,一般为VIP地址 22 | MASTER_IP="192.168.1.10" 23 | KUBE_APISERVER="https://192.168.1.10:8443" 24 | 25 | #pause镜像地址 26 | POD_INFRA_CONTAINER_IMAGE=mirrorgooglecontainers/pause-amd64:3.0 27 | 28 | #TLS Bootstrapping 使用的 Token,使用 head -c 16 /dev/urandom | od -An -t x | tr -d ' ' 生成 29 | BOOTSTRAP_TOKEN="c30302226d4b810e08731702d3890f50" 30 | 31 | # 集群网络插件,目前支持calico和flannel 32 | CLUSTER_NETWORK="calico" 33 | 34 | # 部分calico相关配置,更全配置可以去roles/calico/templates/calico.yaml.j2自定义 35 | # 设置 CALICO_IPV4POOL_IPIP=“off”,可以提高网络性能,条件限制详见 05.安装calico网络组件.md 36 | CALICO_IPV4POOL_IPIP="always" 37 | # 设置 calico-node使用的host IP,bgp邻居通过该地址建立,可手动指定端口"interface=eth0"或使用>如下自动发现 38 | IP_AUTODETECTION_METHOD="can-reach=223.5.5.5" 39 | 40 | # 部分flannel配置,详见roles/flannel/templates/kube-flannel.yaml.j2 41 | FLANNEL_BACKEND="vxlan" 42 | 43 | # 服务网段 (Service CIDR),部署前路由不可达,部署后集群内使用 IP:Port 可达 44 | SERVICE_CIDR="10.68.0.0/16" 45 | 46 | # POD 网段 (Cluster CIDR),部署前路由不可达,**部署后**路由可达 47 | CLUSTER_CIDR="172.20.0.0/16" 48 | 49 | # 服务端口范围 (NodePort Range) 50 | NODE_PORT_RANGE="20000-40000" 51 | 52 | # kubernetes 服务 IP (预分配,一般是 SERVICE_CIDR 中第一个IP) 53 | CLUSTER_KUBERNETES_SVC_IP="10.68.0.1" 54 | 55 | # 集群 DNS 服务 IP (从 SERVICE_CIDR 中预分配) 56 | CLUSTER_DNS_SVC_IP="10.68.0.2" 57 | 58 | # 集群 DNS 域名 59 | CLUSTER_DNS_DOMAIN="cluster.local." 60 | 61 | # etcd 集群间通信的IP和端口, **根据实际 etcd 集群成员设置** 62 | ETCD_NODES="etcd1=https://192.168.1.1:2380,etcd2=https://192.168.1.2:2380,etcd3=https://192.168.1.3:2380" 63 | 64 | # etcd 集群服务地址列表, **根据实际 etcd 集群成员设置** 65 | ETCD_ENDPOINTS="https://192.168.1.1:2379,https://192.168.1.2:2379,https://192.168.1.3:2379" 66 | 67 | # 集群basic auth 使用的用户名和密码 68 | BASIC_AUTH_USER="admin" 69 | BASIC_AUTH_PASS="test1234" 70 | 71 | # ---------附加参数-------------------- 72 | #默认二进制文件目录 73 | bin_dir="/root/local/bin" 74 | 75 | #证书目录 76 | ca_dir="/etc/kubernetes/ssl" 77 | 78 | #部署目录,即 ansible 工作目录,建议不要修改 79 | base_dir="/etc/ansible" 80 | 81 | #私有仓库 harbor服务器 (域名或者IP) 82 | #HARBOR_IP="192.168.1.8" 83 | #HARBOR_DOMAIN="harbor.yourdomain.com" 84 | ``` 85 | 86 | + 请事先规划好使用何种网络插件(calico flannel),并配置对应网络插件的参数 87 | 88 | ## 部署步骤 89 | 90 | 按照[多主多节点](../example/hosts.m-masters.example)示例的节点配置,至少准备4台虚机,测试搭建一个多主高可用集群。 91 | 92 | ### 1.基础系统配置 93 | 94 | + 推荐内存2G/硬盘20G以上 95 | + 最小化安装`Ubuntu 16.04 server`或者`CentOS 7 Minimal` 96 | + 配置基础网络、更新源、SSH登陆等 97 | 98 | ### 2.在每个节点安装依赖工具 99 | 100 | Ubuntu 16.04 请执行以下脚本: 101 | 102 | ``` bash 103 | # 文档中脚本默认均以root用户执行 104 | apt-get update && apt-get upgrade -y && apt-get dist-upgrade -y 105 | # 删除不要的默认安装 106 | apt-get purge ufw lxd lxd-client lxcfs lxc-common 107 | # 安装python2 108 | apt-get install python2.7 109 | # Ubuntu16.04可能需要配置以下软连接 110 | ln -s /usr/bin/python2.7 /usr/bin/python 111 | ``` 112 | CentOS 7 请执行以下脚本: 113 | 114 | ``` bash 115 | # 文档中脚本默认均以root用户执行 116 | # 安装 epel 源并更新 117 | yum install epel-release -y 118 | yum update 119 | # 删除不要的默认安装 120 | yum erase firewalld firewalld-filesystem python-firewall -y 121 | # 安装python 122 | yum install python -y 123 | ``` 124 | ### 3.在deploy节点安装及准备ansible 125 | 126 | ``` bash 127 | # Ubuntu 16.04 128 | apt-get install git python-pip -y 129 | # CentOS 7 130 | yum install git python-pip -y 131 | # pip安装ansible(国内如果安装太慢可以直接用pip阿里云加速) 132 | #pip install pip --upgrade 133 | #pip install ansible 134 | pip install pip --upgrade -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com 135 | pip install --no-cache-dir ansible -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com 136 | ``` 137 | ### 4.在deploy节点配置免密码登陆 138 | 139 | ``` bash 140 | ssh-keygen -t rsa -b 2048 回车 回车 回车 141 | ssh-copy-id $IPs #$IPs为所有节点地址包括自身,按照提示输入yes 和root密码 142 | ``` 143 | ### 5.在deploy节点编排k8s安装 144 | 145 | ``` bash 146 | # 下载项目文件 147 | git clone https://github.com/gjmzj/kubeasz.git 148 | mv kubeasz /etc/ansible 149 | # 下载已打包好的binaries,并且解压缩到/etc/ansible/bin目录 150 | # 国内请从我分享的百度云链接下载 https://pan.baidu.com/s/1c4RFaA 151 | # 如果你有合适网络环境也可以按照/down/download.sh自行从官网下载各种tar包到 ./down目录,并执行download.sh 152 | tar zxvf k8s.191.tar.gz 153 | mv bin/* /etc/ansible/bin 154 | cd /etc/ansible 155 | cp example/hosts.m-masters.example hosts 156 | # 根据上文实际规划修改此hosts文件 157 | vi hosts 158 | ``` 159 | + 验证ansible安装 160 | 161 | 在deploy 节点使用如下命令 162 | 163 | ``` bash 164 | ansible all -m ping 165 | ``` 166 | 如果配置正确可以看到类似输出: 167 | 168 | ``` text 169 | 192.168.1.42 | SUCCESS => { 170 | "changed": false, 171 | "failed": false, 172 | "ping": "pong" 173 | } 174 | 192.168.1.43 | SUCCESS => { 175 | "changed": false, 176 | "failed": false, 177 | "ping": "pong" 178 | } 179 | 192.168.1.44 | SUCCESS => { 180 | "changed": false, 181 | "failed": false, 182 | "ping": "pong" 183 | } 184 | ``` 185 | + 开始安装集群,请阅读每步安装讲解后执行分步安装 186 | 187 | ``` bash 188 | #ansible-playbook 01.prepare.yml 189 | ###注意,如果后台进程有yum在运行,该命令会无限等待,不会超时退出并打印出错误,这个是Bug,需要手工将yum进程杀死。因为该任务里面需要将firewalld, firewalld-system,python-firewall三个组件使用yum命令删除掉,yum命令的缺陷。 190 | 191 | #ansible-playbook 02.etcd.yml 192 | #ansible-playbook 03.kubectl.yml 193 | #ansible-playbook 04.docker.yml 194 | #ansible-playbook 05.kube-master.yml 195 | #ansible-playbook 06.kube-node.yml 196 | #ansible-playbook 07.calico.yml 或者 ansible-playbook 07.flannel.yml 只能选择一种网络插件 197 | #ansible-playbook 90.setup.yml # 一步安装 198 | ``` 199 | 200 | [前一篇](quickStart.md) -- [后一篇](01-创建CA证书和环境配置.md) 201 | -------------------------------------------------------------------------------- /docs/01-创建CA证书和环境配置.md: -------------------------------------------------------------------------------- 1 | # 01-创建CA证书和环境配置.md 2 | 3 | 本步骤[01.prepare.yml](../01.prepare.yml)主要完成CA证书创建、分发、环境变量、负载均衡配置等。 4 | 5 | ### 创建 CA 证书和秘钥 6 | ``` bash 7 | roles/deploy 8 | ├── tasks 9 | │   └── main.yml 10 | └── templates 11 | ├── ca-config.json.j2 12 | └── ca-csr.json.j2 13 | ``` 14 | kubernetes 系统各组件需要使用 TLS 证书对通信进行加密,使用 CloudFlare 的 PKI 工具集生成自签名的CA证书,用来签名后续创建的其它 TLS 证书。[参考阅读](https://coreos.com/os/docs/latest/generate-self-signed-certificates.html) 15 | 16 | 根据认证对象可以将证书分成三类:服务器证书,客户端证书,对等证书 `peer cert`(表示既是`server cert`又是`client cert`),在kubernetes 集群中需要的证书种类如下: 17 | 18 | + `etcd` 节点需要标识自己监听服务的server cert,也需要client cert与`etcd`集群其他节点交互,当然可以分别指定2个证书,这里为简化使用一个peer 证书 19 | + `kube-apiserver` 需要标识apiserver服务的server cert,也需要client cert 从而操作`etcd`集群,这里为简化使用一个peer 证书 20 | + `kubectl` `calico` `kube-proxy` 只需要 client cert,因此证书请求中 hosts 字段可以为空 21 | + `kubelet` 证书比较特殊,不是手动生成,它由node节点`TLS BootStrap` 向`apiserver`请求,由master节点的`controller-manager` 自动签发,包含一个client cert 和一个server cert 22 | 23 | 请在另外窗口打开[roles/deploy/tasks/main.yml](../roles/deploy/tasks/main.yml) 文件,对照看以下讲解内容。 24 | 25 | #### 创建 CA 配置文件 [ca-config.json.j2](../roles/deploy/templates/ca-config.json.j2) 26 | ``` bash 27 | { 28 | "signing": { 29 | "default": { 30 | "expiry": "87600h" 31 | }, 32 | "profiles": { 33 | "kubernetes": { 34 | "usages": [ 35 | "signing", 36 | "key encipherment", 37 | "server auth", 38 | "client auth" 39 | ], 40 | "expiry": "87600h" 41 | } 42 | } 43 | } 44 | } 45 | ``` 46 | + `ca-config.json`:可以定义多个 profiles,分别指定不同的过期时间、使用场景等参数;这里为了方便使用 `kubernetes` 这个profile 签发三种不同类型证书 47 | + `signing`:表示该证书可用于签名其它证书;生成的 ca.pem 证书中 `CA=TRUE`; 48 | + `server auth`:表示 client 可以用该 CA 对 server 提供的证书进行验证; 49 | + `client auth`:表示 server 可以用该 CA 对 client 提供的证书进行验证; 50 | 51 | #### 创建 CA 证书签名请求 [ca-csr.json.j2](../roles/deploy/templates/ca-csr.json.j2) 52 | ``` bash 53 | { 54 | "CN": "kubernetes", 55 | "key": { 56 | "algo": "rsa", 57 | "size": 2048 58 | }, 59 | "names": [ 60 | { 61 | "C": "CN", 62 | "ST": "HangZhou", 63 | "L": "XS", 64 | "O": "k8s", 65 | "OU": "System" 66 | } 67 | ] 68 | } 69 | ``` 70 | 71 | #### 生成CA 证书和私钥 72 | ``` bash 73 | cfssl gencert -initca ca-csr.json | cfssljson -bare ca 74 | ``` 75 | + 注意整个集群只能有一个CA证书和配置文件,所以下一步要分发给每一个节点,包括calico/node也需要使用,`ansible` 角色(role) `prepare` 会完成CA 证书分发,所以把ca 证书相关先复制到 `roles/prepare/files/` 76 | 77 | #### 准备分发 CA证书 78 | 79 | ``` bash 80 | - name: 准备分发 CA证书 81 | copy: src={{ ca_dir }}/{{ item }} dest={{ base_dir }}/roles/prepare/files/{{ item }} force=no 82 | with_items: 83 | - ca.pem 84 | - ca-key.pem 85 | - ca.csr 86 | - ca-config.json 87 | ``` 88 | + force=no 保证整个安装的幂等性,如果已经生成过CA证书,就使用已经存在的CA,可以多次运行 `ansible-playbook 90.setup.yml` 89 | + 如果确实需要更新CA 证书,删除/roles/prepare/files/ca* 可以使用新CA 证书 90 | 91 | ### kubedns.yaml 配置生成 92 | 93 | + kubedns.yaml文件中部分参数(CLUSTER_DNS_SVC_IP, CLUSTER_DNS_DOMAIN)根据hosts文件设置而定,因此需要用template模块替换参数 94 | + 运行本步骤后,在 manifests/kubedns目录下生成 kubedns.yaml 文件,以供后续部署时使用 95 | 96 | ``` bash 97 | roles/prepare/ 98 | ├── files 99 | │   ├── 95-k8s-sysctl.conf 100 | │   ├── ca-config.json 101 | │   ├── ca.csr 102 | │   ├── ca-csr.json 103 | │   ├── ca-key.pem 104 | │   └── ca.pem 105 | └── tasks 106 | └── main.yml 107 | ``` 108 | 请在另外窗口打开[roles/prepare/tasks/main.yml](../roles/prepare/tasks/main.yml) 文件,比较简单直观 109 | 110 | 1. 首先创建一些基础文件目录 111 | 1. 修改环境变量,把{{ bin_dir }} 添加到$PATH,需要重新登陆 shell生效 112 | 1. 把证书工具 CFSSL下发到指定节点 113 | 1. 把CA 证书相关下发到指定节点的 {{ ca_dir }} 目录 114 | 1. 最后设置基础操作系统软件和系统参数,请阅读脚本中的注释内容 115 | 116 | ### LB 负载均衡部署 117 | ``` bash 118 | roles/lb 119 | ├── tasks 120 | │   └── main.yml 121 | └── templates 122 | ├── haproxy.cfg.j2 123 | ├── keepalived-backup.conf.j2 124 | └── keepalived-master.conf.j2 125 | ``` 126 | 127 | Haproxy支持四层和七层负载,稳定性好,根据官方文档,HAProxy可以跑满10Gbps-New benchmark of HAProxy at 10 Gbps using Myricom's 10GbE NICs (Myri-10G PCI-Express),这个作为软件级负载均衡,也是比较惊人的;另外,openstack高可用也有用haproxy的。 128 | 129 | keepalived观其名可知,保持存活,它是基于VRRP协议保证所谓的高可用或热备的,这里用来预防haproxy的单点故障。 130 | 131 | keepalived与haproxy配合,实现master的高可用过程如下: 132 | 133 | + 1.keepalived利用vrrp协议生成一个虚拟地址(VIP),正常情况下VIP存活在keepalive的主节点,当主节点故障时,VIP能够漂移到keepalived的备节点,保障VIP地址可用性。 134 | + 2.在keepalived的主备节点都配置相同haproxy负载配置,并且监听客户端请求在VIP的地址上,保障随时都有一个haproxy负载均衡在正常工作。并且keepalived启用对haproxy进程的存活检测,一旦主节点haproxy进程故障,VIP也能切换到备节点,从而让备节点的haproxy进行负载工作。 135 | + 3.在haproxy的配置中配置多个后端真实kube-apiserver的endpoints,并启用存活监测后端kube-apiserver,如果一个kube-apiserver故障,haproxy会将其剔除负载池。 136 | 137 | 请在另外窗口打开[roles/lb/tasks/main.yml](../roles/lb/tasks/main.yml) 文件,对照看以下讲解内容。 138 | 139 | #### 安装haproxy 140 | 141 | + 使用apt源安装 142 | 143 | #### 配置haproxy [haproxy.cfg.j2](../roles/lb/templates/haproxy.cfg.j2) 144 | ``` bash 145 | global 146 | log /dev/log local0 147 | log /dev/log local1 notice 148 | chroot /var/lib/haproxy 149 | stats socket /run/haproxy/admin.sock mode 660 level admin 150 | stats timeout 30s 151 | user haproxy 152 | group haproxy 153 | daemon 154 | nbproc 1 155 | 156 | defaults 157 | log global 158 | timeout connect 5000 159 | timeout client 50000 160 | timeout server 50000 161 | 162 | listen kube-master 163 | bind 0.0.0.0:{{ MASTER_PORT }} 164 | mode tcp 165 | option tcplog 166 | balance source 167 | server s1 {{ LB_EP1 }} check inter 10000 fall 2 rise 2 weight 1 168 | server s2 {{ LB_EP2 }} check inter 10000 fall 2 rise 2 weight 1 169 | ``` 170 | 如果用apt安装的话,可以在/usr/share/doc/haproxy目录下找到配置指南configuration.txt.gz,全局和默认配置这里不展开,关注`listen` 代理设置模块,各项配置说明: 171 | + 名称 kube-master 172 | + bind 监听客户端请求的地址/端口,保证监听master的VIP地址和端口,{{ MASTER_PORT }}与hosts里面设置对应 173 | + mode 选择四层负载模式 (当然你也可以选择七层负载,请查阅指南,适当调整) 174 | + balance 选择负载算法 (负载算法也有很多供选择) 175 | + server 配置master节点真实的endpoits,必须与 [hosts文件](../example/hosts.m-masters.example)对应设置 176 | 177 | #### 安装keepalived 178 | 179 | + 使用apt源安装 180 | 181 | #### 配置keepalived主节点 [keepalived-master.conf.j2](../roles/lb/templates/keepalived-master.conf.j2) 182 | ``` bash 183 | global_defs { 184 | router_id lb-master 185 | } 186 | 187 | vrrp_script check-haproxy { 188 | script "killall -0 haproxy" 189 | interval 5 190 | weight -30 191 | } 192 | 193 | vrrp_instance VI-kube-master { 194 | state MASTER 195 | priority 120 196 | dont_track_primary 197 | interface {{ LB_IF }} 198 | virtual_router_id 51 199 | advert_int 3 200 | track_script { 201 | check-haproxy 202 | } 203 | virtual_ipaddress { 204 | {{ MASTER_IP }} 205 | } 206 | } 207 | ``` 208 | + vrrp_script 定义了监测haproxy进程的脚本,利用shell 脚本`killall -0 haproxy` 进行检测进程是否存活,如果进程不存在,根据`weight -30`设置将主节点优先级降低30,这样原先备节点将变成主节点。 209 | + vrrp_instance 定义了vrrp组,包括优先级、使用端口、router_id、心跳频率、检测脚本、虚拟地址VIP等 210 | + 特别注意 `virtual_router_id` 标识了一个 VRRP组,在同网段下必须唯一,否则出现 `Keepalived_vrrp: bogus VRRP packet received on eth0 !!!`类似报错 211 | 212 | #### 配置keepalived备节点 [keepalived-backup.conf.j2](../roles/lb/templates/keepalived-backup.conf.j2) 213 | ``` bash 214 | global_defs { 215 | router_id lb-backup 216 | } 217 | 218 | vrrp_instance VI-kube-master { 219 | state BACKUP 220 | priority 110 221 | dont_track_primary 222 | interface {{ LB_IF }} 223 | virtual_router_id 51 224 | advert_int 3 225 | virtual_ipaddress { 226 | {{ MASTER_IP }} 227 | } 228 | } 229 | ``` 230 | + 备节点的配置类似主节点,除了优先级和检测脚本,其他如 `virtual_router_id` `advert_int` `virtual_ipaddress`必须与主节点一致 231 | 232 | ### 启动 keepalived 和 haproxy 后验证 233 | 234 | + lb 节点验证,假定 MASTER_PORT=8443 235 | 236 | ``` bash 237 | systemctl status haproxy # 检查进程状态 238 | journalctl -u haproxy # 检查进程日志是否有报错信息 239 | systemctl status keepalived # 检查进程状态 240 | journalctl -u keepalived # 检查进程日志是否有报错信息 241 | netstat -antlp|grep 8443 # 检查tcp端口是否监听 242 | ``` 243 | + 在 keepalived 主节点 244 | 245 | ``` bash 246 | ip a # 检查 master的 VIP地址是否存在 247 | ``` 248 | ### keepalived 主备切换演练 249 | 250 | 1. 尝试关闭 keepalived主节点上的 haproxy进程,然后在keepalived 备节点上查看 master的 VIP地址是否能够漂移过来,并依次检查上一步中的验证项。 251 | 1. 尝试直接关闭 keepalived 主节点系统,检查各验证项。 252 | 253 | 254 | [前一篇](00-集群规划和基础参数设定.md) -- [后一篇](02-安装etcd集群.md) 255 | -------------------------------------------------------------------------------- /docs/02-安装etcd集群.md: -------------------------------------------------------------------------------- 1 | ## 02-安装etcd集群.md 2 | 3 | ``` bash 4 | roles/etcd 5 | ├── tasks 6 | │   └── main.yml 7 | └── templates 8 | ├── etcd-csr.json.j2 9 | └── etcd.service.j2 10 | ``` 11 | kuberntes 系统使用 etcd 存储所有数据,是最重要的组件之一,注意 etcd集群只能有奇数个节点(1,3,5...),本文档使用3个节点做集群。 12 | 13 | 请在另外窗口打开[roles/etcd/tasks/main.yml](../roles/etcd/tasks/main.yml) 文件,对照看以下讲解内容。 14 | 15 | ### 下载etcd/etcdctl 二进制文件、创建证书目录 16 | 17 | ### 创建etcd证书请求 [etcd-csr.json.j2](../roles/etcd/templates/etcd-csr.json.j2) 18 | 19 | ``` bash 20 | { 21 | "CN": "etcd", 22 | "hosts": [ 23 | "127.0.0.1", 24 | "{{ NODE_IP }}" 25 | ], 26 | "key": { 27 | "algo": "rsa", 28 | "size": 2048 29 | }, 30 | "names": [ 31 | { 32 | "C": "CN", 33 | "ST": "HangZhou", 34 | "L": "XS", 35 | "O": "k8s", 36 | "OU": "System" 37 | } 38 | ] 39 | } 40 | ``` 41 | + hosts 字段指定授权使用该证书的 etcd 节点 IP 42 | 43 | ### 创建证书和私钥 44 | 45 | ``` bash 46 | cd /etc/etcd/ssl && {{ bin_dir }}/cfssl gencert \ 47 | -ca={{ ca_dir }}/ca.pem \ 48 | -ca-key={{ ca_dir }}/ca-key.pem \ 49 | -config={{ ca_dir }}/ca-config.json \ 50 | -profile=kubernetes etcd-csr.json | {{ bin_dir }}/cfssljson -bare etcd 51 | ``` 52 | 53 | ### 创建etcd 服务文件 [etcd.service.j2](../roles/etcd/templates/etcd.service.j2) 54 | 55 | 先创建工作目录 /var/lib/etcd/ 56 | 57 | ``` bash 58 | [Unit] 59 | Description=Etcd Server 60 | After=network.target 61 | After=network-online.target 62 | Wants=network-online.target 63 | Documentation=https://github.com/coreos 64 | 65 | [Service] 66 | Type=notify 67 | WorkingDirectory=/var/lib/etcd/ 68 | ExecStart={{ bin_dir }}/etcd \ 69 | --name={{ NODE_NAME }} \ 70 | --cert-file=/etc/etcd/ssl/etcd.pem \ 71 | --key-file=/etc/etcd/ssl/etcd-key.pem \ 72 | --peer-cert-file=/etc/etcd/ssl/etcd.pem \ 73 | --peer-key-file=/etc/etcd/ssl/etcd-key.pem \ 74 | --trusted-ca-file={{ ca_dir }}/ca.pem \ 75 | --peer-trusted-ca-file={{ ca_dir }}/ca.pem \ 76 | --initial-advertise-peer-urls=https://{{ NODE_IP }}:2380 \ 77 | --listen-peer-urls=https://{{ NODE_IP }}:2380 \ 78 | --listen-client-urls=https://{{ NODE_IP }}:2379,http://127.0.0.1:2379 \ 79 | --advertise-client-urls=https://{{ NODE_IP }}:2379 \ 80 | --initial-cluster-token=etcd-cluster-0 \ 81 | --initial-cluster={{ ETCD_NODES }} \ 82 | --initial-cluster-state=new \ 83 | --data-dir=/var/lib/etcd 84 | Restart=on-failure 85 | RestartSec=5 86 | LimitNOFILE=65536 87 | 88 | [Install] 89 | WantedBy=multi-user.target 90 | ``` 91 | + 完整参数列表请使用 `etcd --help` 查询 92 | + 注意etcd 即需要服务器证书也需要客户端证书,这里为方便使用一个peer 证书代替两个证书,更多证书相关请阅读 [01-创建CA证书和环境配置.md](01-创建CA证书和环境配置.md) 93 | + 注意{{ }} 中的参数与ansible hosts文件中设置对应 94 | + `--initial-cluster-state` 值为 `new` 时,`--name` 的参数值必须位于 `--initial-cluster` 列表中; 95 | 96 | ### 启动etcd服务 97 | 98 | ``` bash 99 | systemctl daemon-reload && systemctl enable etcd && systemctl start etcd 100 | ``` 101 | 102 | ### 验证etcd集群状态 103 | 104 | + systemctl status etcd 查看服务状态 105 | + journalctl -u etcd 查看运行日志 106 | + 在任一 etcd 集群节点上执行如下命令 107 | 108 | ``` bash 109 | # 根据hosts中配置设置shell变量 $NODE_IPS 110 | export NODE_IPS="192.168.1.1 192.168.1.2 192.168.1.3" 111 | $ for ip in ${NODE_IPS}; do 112 | ETCDCTL_API=3 /root/local/bin/etcdctl \ 113 | --endpoints=https://${ip}:2379 \ 114 | --cacert=/etc/kubernetes/ssl/ca.pem \ 115 | --cert=/etc/etcd/ssl/etcd.pem \ 116 | --key=/etc/etcd/ssl/etcd-key.pem \ 117 | endpoint health; done 118 | ``` 119 | 预期结果: 120 | 121 | ``` text 122 | https://192.168.1.1:2379 is healthy: successfully committed proposal: took = 2.210885ms 123 | https://192.168.1.2:2379 is healthy: successfully committed proposal: took = 2.784043ms 124 | https://192.168.1.3:2379 is healthy: successfully committed proposal: took = 3.275709ms 125 | ``` 126 | 三台 etcd 的输出均为 healthy 时表示集群服务正常。 127 | 128 | 129 | [前一篇](01-创建CA证书和环境配置.md) -- [后一篇](03-配置kubectl命令行工具.md) 130 | -------------------------------------------------------------------------------- /docs/03-配置kubectl命令行工具.md: -------------------------------------------------------------------------------- 1 | ## 03-配置kubectl命令行工具.md 2 | 3 | kubectl使用~/.kube/config 配置文件与kube-apiserver进行交互,且拥有完全权限[可配置],因此尽量避免安装在不必要的节点上,这里为了演示方便,将它安装在master/node/deploy节点。 4 | `cat ~/.kube/config`可以看到配置文件包含 kube-apiserver 地址、证书、用户名等信息。 5 | 6 | ``` bash 7 | roles/kubectl 8 | ├── tasks 9 | │   └── main.yml 10 | └── templates 11 | └── admin-csr.json.j2 12 | ``` 13 | 请在另外窗口打开[roles/kubectl/tasks/main.yml](../roles/kubectl/tasks/main.yml) 文件,对照看以下讲解内容。 14 | 15 | ### 准备kubectl使用的admin 证书签名请求 [admin-csr.json.j2](../roles/kubectl/templates/admin-csr.json.j2) 16 | 17 | ``` bash 18 | { 19 | "CN": "admin", 20 | "hosts": [], 21 | "key": { 22 | "algo": "rsa", 23 | "size": 2048 24 | }, 25 | "names": [ 26 | { 27 | "C": "CN", 28 | "ST": "HangZhou", 29 | "L": "XS", 30 | "O": "system:masters", 31 | "OU": "System" 32 | } 33 | ] 34 | } 35 | 36 | ``` 37 | + 后续我们在安装`master`节点时候会启用 `RBAC`特性,它在v1.8.x中已是稳定版本,推荐[RBAC官方文档](https://kubernetes.io/docs/admin/authorization/rbac/) 38 | + 证书请求中 `O` 指定该证书的 Group 为 `system:masters`,而 `RBAC` 预定义的 `ClusterRoleBinding` 将 Group `system:masters` 与 ClusterRole `cluster-admin` 绑定,这就赋予了kubectl**所有集群权限** 39 | 40 | kubectl get clusterrolebinding cluster-admin -o yaml 41 | 42 | ``` bash 43 | apiVersion: rbac.authorization.k8s.io/v1 44 | kind: ClusterRoleBinding 45 | metadata: 46 | annotations: 47 | rbac.authorization.kubernetes.io/autoupdate: "true" 48 | creationTimestamp: 2017-11-30T01:33:10Z 49 | labels: 50 | kubernetes.io/bootstrapping: rbac-defaults 51 | name: cluster-admin 52 | resourceVersion: "76" 53 | selfLink: /apis/rbac.authorization.k8s.io/v1/clusterrolebindings/cluster-admin 54 | uid: 6c9dd451-d56e-11e7-8ed6-525400103a5d 55 | roleRef: 56 | apiGroup: rbac.authorization.k8s.io 57 | kind: ClusterRole 58 | name: cluster-admin 59 | subjects: 60 | - apiGroup: rbac.authorization.k8s.io 61 | kind: Group 62 | name: system:masters 63 | ``` 64 | ### 创建admin 证书和私钥 65 | 66 | ``` bash 67 | cd {{ ca_dir }} && {{ bin_dir }}/cfssl gencert \ 68 | -ca={{ ca_dir }}/ca.pem \ 69 | -ca-key={{ ca_dir }}/ca-key.pem \ 70 | -config={{ ca_dir }}/ca-config.json \ 71 | -profile=kubernetes admin-csr.json | {{ bin_dir }}/cfssljson -bare admin 72 | ``` 73 | ### 创建 kubectl kubeconfig 文件 74 | 75 | #### 设置集群参数,指定CA证书和apiserver地址 76 | 77 | ``` bash 78 | {{ bin_dir }}/kubectl config set-cluster kubernetes \ 79 | --certificate-authority={{ ca_dir }}/ca.pem \ 80 | --embed-certs=true \ 81 | --server={{ KUBE_APISERVER }} 82 | ``` 83 | 84 | #### 设置客户端认证参数,指定使用admin证书和私钥 85 | 86 | ``` bash 87 | {{ bin_dir }}/kubectl config set-credentials admin \ 88 | --client-certificate={{ ca_dir }}/admin.pem \ 89 | --embed-certs=true \ 90 | --client-key={{ ca_dir }}/admin-key.pem 91 | ``` 92 | 93 | #### 设置上下文参数,说明使用cluster集群和用户admin 94 | 95 | ``` bash 96 | {{ bin_dir }}/kubectl config set-context kubernetes \ 97 | --cluster=kubernetes --user=admin 98 | ``` 99 | 100 | #### 选择默认上下文 101 | 102 | ``` bash 103 | {{ bin_dir }}/kubectl config use-context kubernetes 104 | ``` 105 | + 注意{{ }}中参数与ansible hosts文件中设置对应 106 | + 以上生成的 kubeconfig 自动保存到 ~/.kube/config 文件 107 | 108 | 109 | [前一篇](02-安装etcd集群.md) -- [后一篇](04-安装docker服务.md) 110 | -------------------------------------------------------------------------------- /docs/04-安装docker服务.md: -------------------------------------------------------------------------------- 1 | ## 04-安装docker服务.md 2 | 3 | ``` bash 4 | roles/docker/ 5 | ├── files 6 | │   ├── daemon.json 7 | │   ├── docker 8 | │   └── docker-tag 9 | ├── tasks 10 | │   └── main.yml 11 | └── templates 12 | └── docker.service.j2 13 | ``` 14 | 15 | 请在另外窗口打开[roles/docker/tasks/main.yml](../roles/docker/tasks/main.yml) 文件,对照看以下讲解内容。 16 | 17 | ### 创建docker的systemd unit文件 18 | 19 | ``` bash 20 | [Unit] 21 | Description=Docker Application Container Engine 22 | Documentation=http://docs.docker.io 23 | 24 | [Service] 25 | Environment="PATH={{ bin_dir }}:/bin:/sbin:/usr/bin:/usr/sbin" 26 | ExecStart={{ bin_dir }}/dockerd --log-level=error 27 | ExecStartPost=/sbin/iptables -I FORWARD -s 0.0.0.0/0 -j ACCEPT 28 | ExecReload=/bin/kill -s HUP $MAINPID 29 | Restart=on-failure 30 | RestartSec=5 31 | LimitNOFILE=infinity 32 | LimitNPROC=infinity 33 | LimitCORE=infinity 34 | Delegate=yes 35 | KillMode=process 36 | 37 | [Install] 38 | WantedBy=multi-user.target 39 | ``` 40 | + dockerd 运行时会调用其它 docker 命令,如 docker-proxy,所以需要将 docker 命令所在的目录加到 PATH 环境变量中; 41 | + docker 从 1.13 版本开始,将`iptables` 的`filter` 表的`FORWARD` 链的默认策略设置为`DROP`,从而导致 ping 其它 Node 上的 Pod IP 失败,因此必须在 `filter` 表的`FORWARD` 链增加一条默认允许规则 `iptables -I FORWARD -s 0.0.0.0/0 -j ACCEPT` 42 | + 运行`dockerd --help` 查看所有可以可配置参数,确保默认开启 `--iptables` 和 `--ip-masq` 选项 43 | 44 | ### 配置国内镜像加速 45 | 46 | 众所周知从国内下载docker官方仓库镜像非常缓慢,所以对于k8s集群来说配置镜像加速非常重要,配置 `/etc/docker/daemon.json` 47 | 48 | ``` bash 49 | { 50 | "registry-mirrors": ["https://registry.docker-cn.com"], 51 | "max-concurrent-downloads": 6 52 | } 53 | ``` 54 | 55 | 这将在后续部署calico下载 calico/node:v2.6.2镜像和kubedns/heapster/dashboard镜像时起到重要加速效果。 56 | 57 | 由于K8S的官方镜像存放在`gcr.io`仓库,因此这个镜像加速对K8S的官方镜像没有效果;好在`Docker Hub`上有很多K8S镜像的转存,而`Docker Hub`上的镜像是国内可以加速的。 58 | 这里推荐[mirrorgooglecontainers](https://hub.docker.com/u/mirrorgooglecontainers/)几乎能找到所有K8S相关的google镜像,而且更新及时,感谢维护者的辛勤付出!后文将看到部署附加组件时基本都是用他们的镜像。 59 | 60 | 当然对于企业内部应用的docker镜像,想要在K8S平台运行的话,特别是结合开发`CI/CD` 流程,肯定是需要部署私有镜像仓库的,后续会简单提到 `Harbor`的部署。 61 | 62 | ### 清理 iptables 63 | 64 | 因为后续`calico`网络、`kube-proxy`等将大量使用 iptables规则,安装前清空所有`iptables`策略规则;常见发行版`Ubuntu`的 `ufw` 和 `CentOS`的 `firewalld`等基于`iptables`的防火墙最好直接卸载,避免不必要的冲突。 65 | 66 | ``` bash 67 | iptables -F && iptables -X \ 68 | && iptables -F -t nat && iptables -X -t nat \ 69 | && iptables -F -t raw && iptables -X -t raw \ 70 | && iptables -F -t mangle && iptables -X -t mangle 71 | ``` 72 | + calico 网络支持 `network-policy`,使用的`calico-kube-controllers` 会使用到`iptables` 所有的四个表 `filter` `nat` `raw` `mangle`,所以一并清理 73 | 74 | ### 启动 docker 略 75 | 76 | ### 可选-安装docker查询镜像 tag的小工具 77 | 78 | docker官方目前没有提供在命令行直接查询某个镜像的tag信息的方式,网上找来一个脚本工具,使用很方便。 79 | 80 | ``` bash 81 | > docker-tag library/ubuntu 82 | "14.04" 83 | "16.04" 84 | "17.04" 85 | "latest" 86 | "trusty" 87 | "trusty-20171117" 88 | "xenial" 89 | "xenial-20171114" 90 | "zesty" 91 | "zesty-20171114" 92 | >docker-tag mirrorgooglecontainers/kubernetes-dashboard-amd64 93 | "v0.1.0" 94 | "v1.0.0" 95 | "v1.0.0-beta1" 96 | "v1.0.1" 97 | "v1.1.0-beta1" 98 | "v1.1.0-beta2" 99 | "v1.1.0-beta3" 100 | "v1.7.0" 101 | "v1.7.1" 102 | "v1.8.0" 103 | ``` 104 | + 需要先apt安装轻量JSON处理程序 `jq` 105 | + 然后下载脚本即可使用 106 | + 脚本很简单,就一行命令如下 107 | 108 | ``` bash 109 | #!/bin/bash 110 | curl -s -S "https://registry.hub.docker.com/v2/repositories/$@/tags/" | jq '."results"[]["name"]' |sort 111 | ``` 112 | + 对于 CentOS7 安装 `jq` 稍微费力一点,需要启用 `EPEL` 源 113 | 114 | ``` bash 115 | wget http://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm 116 | rpm -ivh epel-release-latest-7.noarch.rpm 117 | yum install jq 118 | ``` 119 | 120 | ### 验证 121 | 122 | 运行`ansible-playbook 04.docker.yml` 成功后可以验证 123 | 124 | ``` bash 125 | systemctl status docker # 服务状态 126 | journalctl -u docker # 运行日志 127 | docker version 128 | docker info 129 | ``` 130 | `iptables-save|grep FORWARD` 查看 iptables filter表 FORWARD链,最后要有一个 `-A FORWARD -j ACCEPT` 保底允许规则 131 | 132 | ``` bash 133 | iptables-save|grep FORWARD 134 | :FORWARD ACCEPT [0:0] 135 | :FORWARD DROP [0:0] 136 | -A FORWARD -j DOCKER-USER 137 | -A FORWARD -j DOCKER-ISOLATION 138 | -A FORWARD -o docker0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT 139 | -A FORWARD -o docker0 -j DOCKER 140 | -A FORWARD -i docker0 ! -o docker0 -j ACCEPT 141 | -A FORWARD -i docker0 -o docker0 -j ACCEPT 142 | -A FORWARD -j ACCEPT 143 | ``` 144 | 145 | [前一篇](03-配置kubectl命令行工具.md) -- [后一篇](05-安装kube-master节点.md) 146 | -------------------------------------------------------------------------------- /docs/05-安装kube-master节点.md: -------------------------------------------------------------------------------- 1 | ## 05-安装kube-master节点.md 2 | 3 | 部署master节点包含三个组件`apiserver` `scheduler` `controller-manager`,其中: 4 | 5 | - apiserver提供集群管理的REST API接口,包括认证授权、数据校验以及集群状态变更等 6 | - 只有API Server才直接操作etcd 7 | - 其他模块通过API Server查询或修改数据 8 | - 提供其他模块之间的数据交互和通信的枢纽 9 | - scheduler负责分配调度Pod到集群内的node节点 10 | - 监听kube-apiserver,查询还未分配Node的Pod 11 | - 根据调度策略为这些Pod分配节点 12 | - controller-manager由一系列的控制器组成,它通过apiserver监控整个集群的状态,并确保集群处于预期的工作状态 13 | 14 | master节点的高可用主要就是实现apiserver组件的高可用,在之前部署lb节点时候已经配置haproxy对它进行负载均衡。 15 | 16 | ``` text 17 | roles/kube-master/ 18 | ├── tasks 19 | │   └── main.yml 20 | └── templates 21 | ├── basic-auth.csv.j2 22 | ├── kube-apiserver.service.j2 23 | ├── kube-controller-manager.service.j2 24 | ├── kubernetes-csr.json.j2 25 | ├── kube-scheduler.service.j2 26 | └── token.csv.j2 27 | ``` 28 | 29 | 请在另外窗口打开[roles/kube-master/tasks/main.yml](../roles/kube-master/tasks/main.yml) 文件,对照看以下讲解内容。 30 | 31 | ### 创建 kubernetes 证书签名请求 32 | 33 | ``` bash 34 | { 35 | "CN": "kubernetes", 36 | "hosts": [ 37 | "127.0.0.1", 38 | "{{ MASTER_IP }}", 39 | "{{ NODE_IP }}", 40 | "{{ CLUSTER_KUBERNETES_SVC_IP }}", 41 | "kubernetes", 42 | "kubernetes.default", 43 | "kubernetes.default.svc", 44 | "kubernetes.default.svc.cluster", 45 | "kubernetes.default.svc.cluster.local" 46 | ], 47 | "key": { 48 | "algo": "rsa", 49 | "size": 2048 50 | }, 51 | "names": [ 52 | { 53 | "C": "CN", 54 | "ST": "HangZhou", 55 | "L": "XS", 56 | "O": "k8s", 57 | "OU": "System" 58 | } 59 | ] 60 | } 61 | ``` 62 | - kubernetes 证书既是服务器证书,同时apiserver又作为客户端证书去访问etcd 集群;作为服务器证书需要设置hosts 指定使用该证书的IP 或域名列表,需要注意的是: 63 | - 多主高可用集群需要把master VIP地址 {{ MASTER_IP }} 也添加进去 64 | - `kubectl get svc` 将看到集群中由api-server 创建的默认服务 `kubernetes`,因此也要把 `kubernetes` 服务名和各个服务域名也添加进去 65 | - 注意所有{{ }}变量与ansible hosts中设置的对应关系 66 | 67 | ### 创建 token 认证配置 68 | 69 | 因为手动为每个node节点配置TLS认证比较麻烦,后续apiserver会开启 experimental-bootstrap-token-auth 特性,利用 kubelet启动时的 token信息与此处token认证匹配认证,然后自动为 node颁发证书 70 | 71 | ``` bash 72 | {{ BOOTSTRAP_TOKEN }},kubelet-bootstrap,10001,"system:kubelet-bootstrap" 73 | ``` 74 | 75 | ### 创建基础用户名/密码认证配置 76 | 77 | 可选,为后续使用基础认证的场景做准备,如实现dashboard 用不同用户名登陆绑定不同的权限,后续更新dashboard的实践文档。 78 | 79 | ### 创建apiserver的服务配置文件 80 | 81 | ``` bash 82 | [Unit] 83 | Description=Kubernetes API Server 84 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 85 | After=network.target 86 | 87 | [Service] 88 | ExecStart={{ bin_dir }}/kube-apiserver \ 89 | --admission-control=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,ResourceQuota,NodeRestriction \ 90 | --bind-address={{ NODE_IP }} \ 91 | --insecure-bind-address=127.0.0.1 \ 92 | --authorization-mode=Node,RBAC \ 93 | --runtime-config=rbac.authorization.k8s.io/v1 \ 94 | --kubelet-https=true \ 95 | --anonymous-auth=false \ 96 | --basic-auth-file={{ ca_dir }}/basic-auth.csv \ 97 | --enable-bootstrap-token-auth \ 98 | --token-auth-file={{ ca_dir }}/token.csv \ 99 | --service-cluster-ip-range={{ SERVICE_CIDR }} \ 100 | --service-node-port-range={{ NODE_PORT_RANGE }} \ 101 | --tls-cert-file={{ ca_dir }}/kubernetes.pem \ 102 | --tls-private-key-file={{ ca_dir }}/kubernetes-key.pem \ 103 | --client-ca-file={{ ca_dir }}/ca.pem \ 104 | --service-account-key-file={{ ca_dir }}/ca-key.pem \ 105 | --etcd-cafile={{ ca_dir }}/ca.pem \ 106 | --etcd-certfile={{ ca_dir }}/kubernetes.pem \ 107 | --etcd-keyfile={{ ca_dir }}/kubernetes-key.pem \ 108 | --etcd-servers={{ ETCD_ENDPOINTS }} \ 109 | --enable-swagger-ui=true \ 110 | --allow-privileged=true \ 111 | --audit-log-maxage=30 \ 112 | --audit-log-maxbackup=3 \ 113 | --audit-log-maxsize=100 \ 114 | --audit-log-path=/var/lib/audit.log \ 115 | --event-ttl=1h \ 116 | --v=2 117 | Restart=on-failure 118 | RestartSec=5 119 | Type=notify 120 | LimitNOFILE=65536 121 | 122 | [Install] 123 | WantedBy=multi-user.target 124 | ``` 125 | + Kubernetes 对 API 访问需要依次经过认证、授权和准入控制(admission controll),认证解决用户是谁的问题,授权解决用户能做什么的问题,Admission Control则是资源管理方面的作用。 126 | + 支持同时提供https(默认监听在6443端口)和http API(默认监听在127.0.0.1的8080端口),其中http API是非安全接口,不做任何认证授权机制,kube-scheduler、kube-controller-manager 一般和 kube-apiserver 部署在同一台机器上,它们使用非安全端口和 kube-apiserver通信; 其他集群外部就使用HTTPS访问 apiserver 127 | + 关于authorization-mode=Node,RBAC v1.7+支持Node授权,配合NodeRestriction准入控制来限制kubelet仅可访问node、endpoint、pod、service以及secret、configmap、PV和PVC等相关的资源;需要注意的是v1.7中Node 授权是默认开启的,v1.8中需要显式配置开启,否则 Node无法正常工作 128 | + 缺省情况下 kubernetes 对象保存在 etcd /registry 路径下,可以通过 --etcd-prefix 参数进行调整 129 | + 详细参数配置请参考`kube-apiserver --help`,关于认证、授权和准入控制请[阅读](https://github.com/feiskyer/kubernetes-handbook/blob/master/components/apiserver.md) 130 | 131 | ### 创建controller-manager 的服务文件 132 | 133 | ``` bash 134 | [Unit] 135 | Description=Kubernetes Controller Manager 136 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 137 | 138 | [Service] 139 | ExecStart={{ bin_dir }}/kube-controller-manager \ 140 | --address=127.0.0.1 \ 141 | --master=http://127.0.0.1:8080 \ 142 | --allocate-node-cidrs=true \ 143 | --service-cluster-ip-range={{ SERVICE_CIDR }} \ 144 | --cluster-cidr={{ CLUSTER_CIDR }} \ 145 | --cluster-name=kubernetes \ 146 | --cluster-signing-cert-file={{ ca_dir }}/ca.pem \ 147 | --cluster-signing-key-file={{ ca_dir }}/ca-key.pem \ 148 | --service-account-private-key-file={{ ca_dir }}/ca-key.pem \ 149 | --root-ca-file={{ ca_dir }}/ca.pem \ 150 | --leader-elect=true \ 151 | --v=2 152 | Restart=on-failure 153 | RestartSec=5 154 | 155 | [Install] 156 | WantedBy=multi-user.target 157 | ``` 158 | + --address 值必须为 127.0.0.1,因为当前 kube-apiserver 期望 scheduler 和 controller-manager 在同一台机器 159 | + --master=http://127.0.0.1:8080 使用非安全 8080 端口与 kube-apiserver 通信 160 | + --cluster-cidr 指定 Cluster 中 Pod 的 CIDR 范围,该网段在各 Node 间必须路由可达(calico 实现) 161 | + --service-cluster-ip-range 参数指定 Cluster 中 Service 的CIDR范围,必须和 kube-apiserver 中的参数一致 162 | + --cluster-signing-* 指定的证书和私钥文件用来签名为 TLS BootStrap 创建的证书和私钥 163 | + --root-ca-file 用来对 kube-apiserver 证书进行校验,指定该参数后,才会在Pod 容器的 ServiceAccount 中放置该 CA 证书文件 164 | + --leader-elect=true 使用多节点选主的方式选择主节点。只有主节点才会启动所有控制器,而其他从节点则仅执行选主算法 165 | 166 | ### 创建scheduler 的服务文件 167 | 168 | ``` bash 169 | [Unit] 170 | Description=Kubernetes Scheduler 171 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 172 | 173 | [Service] 174 | ExecStart={{ bin_dir }}/kube-scheduler \ 175 | --address=127.0.0.1 \ 176 | --master=http://127.0.0.1:8080 \ 177 | --leader-elect=true \ 178 | --v=2 179 | Restart=on-failure 180 | RestartSec=5 181 | 182 | [Install] 183 | WantedBy=multi-user.target 184 | ``` 185 | 186 | + --address 同样值必须为 127.0.0.1 187 | + --master=http://127.0.0.1:8080 使用非安全 8080 端口与 kube-apiserver 通信 188 | + --leader-elect=true 部署多台机器组成的 master 集群时选举产生一个处于工作状态的 kube-controller-manager 进程 189 | 190 | ### master 集群的验证 191 | 192 | 运行 `ansible-playbook 06.kube-master.yml` 成功后,验证 master节点的主要组件: 193 | 194 | ``` bash 195 | # 查看进程状态 196 | systemctl status kube-apiserver 197 | systemctl status kube-controller-manager 198 | systemctl status kube-scheduler 199 | # 查看进程运行日志 200 | journalctl -u kube-apiserver 201 | journalctl -u kube-controller-manager 202 | journalctl -u kube-scheduler 203 | ``` 204 | 执行 `kubectl get componentstatus` 可以看到 205 | 206 | ``` bash 207 | NAME STATUS MESSAGE ERROR 208 | scheduler Healthy ok 209 | controller-manager Healthy ok 210 | etcd-0 Healthy {"health": "true"} 211 | etcd-2 Healthy {"health": "true"} 212 | etcd-1 Healthy {"health": "true"} 213 | ``` 214 | 215 | [前一篇](04-安装docker服务.md) -- [后一篇](06-安装kube-node节点.md) 216 | -------------------------------------------------------------------------------- /docs/06-安装kube-node节点.md: -------------------------------------------------------------------------------- 1 | ## 06-安装kube-node节点.md 2 | 3 | `kube-node` 是集群中承载应用的节点,前置条件需要先部署好`kube-master`节点(因为需要操作`用户角色绑定`、`批准kubelet TLS 证书请求`等),它需要部署如下组件: 4 | 5 | + docker:运行容器 6 | + calico: 配置容器网络 (或者 flannel) 7 | + kubelet: kube-node上最主要的组件 8 | + kube-proxy: 发布应用服务与负载均衡 9 | 10 | ``` bash 11 | roles/kube-node 12 | ├── tasks 13 | │   └── main.yml 14 | └── templates 15 | ├── cni-default.conf.j2 16 | ├── kubelet.service.j2 17 | ├── kube-proxy-csr.json.j2 18 | └── kube-proxy.service.j2 19 | ``` 20 | 21 | 请在另外窗口打开[roles/kube-node/tasks/main.yml](../roles/kube-node/tasks/main.yml) 文件,对照看以下讲解内容。 22 | 23 | ### 创建角色绑定 24 | 25 | kubelet 启动时向 kube-apiserver 发送 TLS bootstrapping 请求,需要先将 bootstrap token 文件中的 kubelet-bootstrap 用户赋予 system:node-bootstrapper 角色,然后 kubelet 才有权限创建认证请求 26 | 27 | ``` bash 28 | # 增加15秒延时是为了等待上一步kube-master 启动完全 29 | "sleep 15 && {{ bin_dir }}/kubectl create clusterrolebinding kubelet-bootstrap \ 30 | --clusterrole=system:node-bootstrapper --user=kubelet-bootstrap" 31 | ``` 32 | 33 | ### 创建 bootstrapping kubeconfig 文件 34 | 35 | ``` bash 36 | #设置集群参数 37 | shell: "{{ bin_dir }}/kubectl config set-cluster kubernetes \ 38 | --certificate-authority={{ ca_dir }}/ca.pem \ 39 | --embed-certs=true \ 40 | --server={{ KUBE_APISERVER }} \ 41 | --kubeconfig=bootstrap.kubeconfig" 42 | #设置客户端认证参数 43 | shell: "{{ bin_dir }}/kubectl config set-credentials kubelet-bootstrap \ 44 | --token={{ BOOTSTRAP_TOKEN }} \ 45 | --kubeconfig=bootstrap.kubeconfig" 46 | #设置上下文参数 47 | shell: "{{ bin_dir }}/kubectl config set-context default \ 48 | --cluster=kubernetes \ 49 | --user=kubelet-bootstrap \ 50 | --kubeconfig=bootstrap.kubeconfig" 51 | #选择默认上下文 52 | shell: "{{ bin_dir }}/kubectl config use-context default --kubeconfig=bootstrap.kubeconfig" 53 | ``` 54 | + 注意 kubelet bootstrapping认证时是靠 token的,后续由 `master`为其生成证书和私钥 55 | + 以上生成的bootstrap.kubeconfig配置文件需要移动到/etc/kubernetes/目录下,后续在kubelet启动参数中指定该目录下的 bootstrap.kubeconfig 56 | 57 | ### 创建cni 基础网络插件配置文件 58 | 59 | 因为后续需要用 `DaemonSet Pod`方式运行k8s网络插件,所以kubelet.server服务必须开启cni相关参数,并且提供cni网络配置文件 60 | 61 | ### 创建 kubelet 的服务文件 62 | 63 | + 必须先创建工作目录 `/var/lib/kubelet` 64 | 65 | ``` bash 66 | [Unit] 67 | Description=Kubernetes Kubelet 68 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 69 | After=docker.service 70 | Requires=docker.service 71 | 72 | [Service] 73 | WorkingDirectory=/var/lib/kubelet 74 | #--pod-infra-container-image=registry.access.redhat.com/rhel7/pod-infrastructure:latest 75 | ExecStart={{ bin_dir }}/kubelet \ 76 | --address={{ NODE_IP }} \ 77 | --hostname-override={{ NODE_IP }} \ 78 | --pod-infra-container-image={{ POD_INFRA_CONTAINER_IMAGE }} \ 79 | --experimental-bootstrap-kubeconfig=/etc/kubernetes/bootstrap.kubeconfig \ 80 | --kubeconfig=/etc/kubernetes/kubelet.kubeconfig \ 81 | --cert-dir={{ ca_dir }} \ 82 | --network-plugin=cni \ 83 | --cni-conf-dir=/etc/cni/net.d \ 84 | --cni-bin-dir={{ bin_dir }} \ 85 | --cluster-dns={{ CLUSTER_DNS_SVC_IP }} \ 86 | --cluster-domain={{ CLUSTER_DNS_DOMAIN }} \ 87 | --hairpin-mode hairpin-veth \ 88 | --allow-privileged=true \ 89 | --fail-swap-on=false \ 90 | --logtostderr=true \ 91 | --v=2 92 | #kubelet cAdvisor 默认在所有接口监听 4194 端口的请求, 以下iptables限制内网访问 93 | ExecStartPost=/sbin/iptables -A INPUT -s 10.0.0.0/8 -p tcp --dport 4194 -j ACCEPT 94 | ExecStartPost=/sbin/iptables -A INPUT -s 172.16.0.0/12 -p tcp --dport 4194 -j ACCEPT 95 | ExecStartPost=/sbin/iptables -A INPUT -s 192.168.0.0/16 -p tcp --dport 4194 -j ACCEPT 96 | ExecStartPost=/sbin/iptables -A INPUT -p tcp --dport 4194 -j DROP 97 | Restart=on-failure 98 | RestartSec=5 99 | 100 | [Install] 101 | WantedBy=multi-user.target 102 | ``` 103 | + --pod-infra-container-image 指定`基础容器`的镜像,负责创建Pod 内部共享的网络、文件系统等,这个基础容器非常重要:**K8S每一个运行的 POD里面必然包含这个基础容器**,如果它没有运行起来那么你的POD 肯定创建不了,kubelet日志里面会看到类似 ` FailedCreatePodSandBox` 错误,本项目集群常见 `SandBox` 容器起不来有两个原因:a. pause镜像没有下载到 b. calico/node 容器还没有正常运行,可用`docker ps -a` 验证 104 | + --experimental-bootstrap-kubeconfig 指向 bootstrap kubeconfig 文件,kubelet 使用该文件中的用户名和 token 向 kube-apiserver 发送 TLS Bootstrapping 请求 105 | + --cluster-dns 指定 kubedns 的 Service IP(可以先分配,后续创建 kubedns 服务时指定该 IP),--cluster-domain 指定域名后缀,这两个参数同时指定后才会生效; 106 | + --network-plugin=cni --cni-conf-dir=/etc/cni/net.d --cni-bin-dir={{ bin_dir }} 为使用cni 网络,并调用calico管理网络所需的配置 107 | + --fail-swap-on=false K8S 1.8需显示禁用这个,否则服务不能启动 108 | 109 | ### 批准kubelet 的 TLS 证书请求 110 | 111 | ``` bash 112 | sleep 15 && {{ bin_dir }}/kubectl get csr|grep 'Pending' | awk 'NR>0{print $1}'| xargs {{ bin_dir }}/kubectl certificate approve 113 | ``` 114 | + 增加15秒延时等待kubelet启动 115 | + `kubectl get csr |grep 'Pending'` 找出待批准的 TLS请求 116 | + `kubectl certificate approve` 批准请求 117 | 118 | ### 创建 kube-proxy 证书请求 119 | 120 | ``` bash 121 | { 122 | "CN": "system:kube-proxy", 123 | "hosts": [], 124 | "key": { 125 | "algo": "rsa", 126 | "size": 2048 127 | }, 128 | "names": [ 129 | { 130 | "C": "CN", 131 | "ST": "HangZhou", 132 | "L": "XS", 133 | "O": "k8s", 134 | "OU": "System" 135 | } 136 | ] 137 | } 138 | ``` 139 | + CN 指定该证书的 User 为 system:kube-proxy,预定义的 ClusterRoleBinding system:node-proxier 将User system:kube-proxy 与 Role system:node-proxier 绑定,授予了调用 kube-apiserver Proxy 相关 API 的权限; 140 | + kube-proxy 使用客户端证书可以不指定hosts 字段 141 | 142 | ### 创建 kube-proxy kubeconfig 文件 143 | 144 | ``` bash 145 | #设置集群参数 146 | shell: "{{ bin_dir }}/kubectl config set-cluster kubernetes \ 147 | --certificate-authority={{ ca_dir }}/ca.pem \ 148 | --embed-certs=true \ 149 | --server={{ KUBE_APISERVER }} \ 150 | --kubeconfig=kube-proxy.kubeconfig" 151 | #设置客户端认证参数 152 | shell: "{{ bin_dir }}/kubectl config set-credentials kube-proxy \ 153 | --client-certificate={{ ca_dir }}/kube-proxy.pem \ 154 | --client-key={{ ca_dir }}/kube-proxy-key.pem \ 155 | --embed-certs=true \ 156 | --kubeconfig=kube-proxy.kubeconfig" 157 | #设置上下文参数 158 | shell: "{{ bin_dir }}/kubectl config set-context default \ 159 | --cluster=kubernetes \ 160 | --user=kube-proxy \ 161 | --kubeconfig=kube-proxy.kubeconfig" 162 | #选择默认上下文 163 | shell: "{{ bin_dir }}/kubectl config use-context default --kubeconfig=kube-proxy.kubeconfig" 164 | ``` 165 | + 生成的kube-proxy.kubeconfig 配置文件需要移动到/etc/kubernetes/目录,后续kube-proxy服务启动参数里面需要指定 166 | 167 | ### 创建 kube-proxy服务文件 168 | 169 | ``` bash 170 | [Unit] 171 | Description=Kubernetes Kube-Proxy Server 172 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 173 | After=network.target 174 | 175 | [Service] 176 | WorkingDirectory=/var/lib/kube-proxy 177 | ExecStart={{ bin_dir }}/kube-proxy \ 178 | --bind-address={{ NODE_IP }} \ 179 | --hostname-override={{ NODE_IP }} \ 180 | --kubeconfig=/etc/kubernetes/kube-proxy.kubeconfig \ 181 | --logtostderr=true \ 182 | --v=2 183 | Restart=on-failure 184 | RestartSec=5 185 | LimitNOFILE=65536 186 | 187 | [Install] 188 | WantedBy=multi-user.target 189 | ``` 190 | 191 | + --hostname-override 参数值必须与 kubelet 的值一致,否则 kube-proxy 启动后会找不到该 Node,从而不会创建任何 iptables 规则 192 | + 特别注意:kube-proxy 根据 --cluster-cidr 判断集群内部和外部流量,指定 --cluster-cidr 或 --masquerade-all 选项后 kube-proxy 才会对访问 Service IP 的请求做 SNAT;但是这个特性与calico 实现 network policy冲突,所以如果要用 network policy,这两个选项都不要指定。 193 | 194 | ### 验证 node 状态 195 | 196 | ``` bash 197 | systemctl status kubelet # 查看状态 198 | systemctl status kube-proxy 199 | journalctl -u kubelet # 查看日志 200 | journalctl -u kube-proxy 201 | ``` 202 | 运行 `kubectl get node` 可以看到类似 203 | 204 | ``` bash 205 | NAME STATUS ROLES AGE VERSION 206 | 192.168.1.42 Ready 2d v1.9.0 207 | 192.168.1.43 Ready 2d v1.9.0 208 | 192.168.1.44 Ready 2d v1.9.0 209 | ``` 210 | 211 | 212 | [前一篇](05-安装kube-master节点.md) -- [后一篇](07-安装calico网络组件.md) 213 | -------------------------------------------------------------------------------- /docs/07-安装calico网络组件.md: -------------------------------------------------------------------------------- 1 | ## 07-安装calico网络组件.md 2 | 3 | 推荐阅读[feiskyer-kubernetes指南](https://github.com/feiskyer/kubernetes-handbook) 网络相关内容 4 | 5 | 首先回顾下K8S网络设计原则,在配置集群网络插件或者实践K8S 应用/服务部署请时刻想到这些原则: 6 | 7 | - 1.每个Pod都拥有一个独立IP地址,Pod内所有容器共享一个网络命名空间 8 | - 2.集群内所有Pod都在一个直接连通的扁平网络中,可通过IP直接访问 9 | - 所有容器之间无需NAT就可以直接互相访问 10 | - 所有Node和所有容器之间无需NAT就可以直接互相访问 11 | - 容器自己看到的IP跟其他容器看到的一样 12 | - 3.Service cluster IP尽可在集群内部访问,外部请求需要通过NodePort、LoadBalance或者Ingress来访问 13 | 14 | `Container Network Interface (CNI)`是目前CNCF主推的网络模型,它由两部分组成: 15 | 16 | - CNI Plugin负责给容器配置网络,它包括两个基本的接口 17 | - 配置网络: AddNetwork(net *NetworkConfig, rt *RuntimeConf) (types.Result, error) 18 | - 清理网络: DelNetwork(net *NetworkConfig, rt *RuntimeConf) error 19 | - IPAM Plugin负责给容器分配IP地址 20 | 21 | Kubernetes Pod的网络是这样创建的: 22 | - 0.每个Pod除了创建时指定的容器外,都有一个kubelet启动时指定的`基础容器`,比如:`mirrorgooglecontainers/pause-amd64` `registry.access.redhat.com/rhel7/pod-infrastructure` 23 | - 1.首先 kubelet创建`基础容器`生成network namespace 24 | - 2.然后 kubelet调用网络CNI driver,由它根据配置调用具体的CNI 插件 25 | - 3.然后 CNI 插件给`基础容器`配置网络 26 | - 4.最后 Pod 中其他的容器共享使用`基础容器`的网络 27 | 28 | 本文档基于CNI driver 调用calico 插件来配置kubernetes的网络,常用CNI插件有 `flannel` `calico` `weave`等等,这些插件各有优势,也在互相借鉴学习优点,比如:在所有node节点都在一个二层网络时候,flannel提供hostgw实现,避免vxlan实现的udp封装开销,估计是目前最高效的;calico也针对L3 Fabric,推出了IPinIP的选项,利用了GRE隧道封装;因此这些插件都能适合很多实际应用场景,这里选择calico,主要考虑它支持 `kubernetes network policy`。 29 | 30 | 推荐阅读[calico kubernetes guide](https://docs.projectcalico.org/v2.6/getting-started/kubernetes/) 31 | 32 | calico-node需要在所有master节点和node节点安装 33 | 34 | ``` bash 35 | roles/calico/ 36 | ├── tasks 37 | │   └── main.yml 38 | └── templates 39 | ├── calico-csr.json.j2 40 | ├── calicoctl.cfg.j2 41 | ├── calico-rbac.yaml.j2 42 | └── calico.yaml.j2 43 | ``` 44 | 请在另外窗口打开[roles/calico/tasks/main.yml](../roles/calico/tasks/main.yml) 文件,对照看以下讲解内容。 45 | 46 | ### 创建calico 证书申请 47 | 48 | ``` bash 49 | { 50 | "CN": "calico", 51 | "hosts": [], 52 | "key": { 53 | "algo": "rsa", 54 | "size": 2048 55 | }, 56 | "names": [ 57 | { 58 | "C": "CN", 59 | "ST": "HangZhou", 60 | "L": "XS", 61 | "O": "k8s", 62 | "OU": "System" 63 | } 64 | ] 65 | } 66 | ``` 67 | - calico 使用客户端证书,所以hosts字段可以为空;后续可以看到calico证书用在四个地方: 68 | - calico/node 这个docker 容器运行时访问 etcd 使用证书 69 | - cni 配置文件中,cni 插件需要访问 etcd 使用证书 70 | - calicoctl 操作集群网络时访问 etcd 使用证书 71 | - calico/kube-controllers 同步集群网络策略时访问 etcd 使用证书 72 | 73 | ### 创建 calico DaemonSet yaml文件和rbac 文件 74 | 75 | 请对照 roles/calico/templates/calico.yaml.j2文件注释和以下注意内容 76 | 77 | + 详细配置参数请参考[calico官方文档](https://docs.projectcalico.org/v2.6/reference/node/configuration) 78 | + calico-node是以docker容器运行在host上的,因此需要把之前的证书目录 /etc/calico/ssl挂载到容器中 79 | + 配置ETCD_ENDPOINTS 、CA、证书等,所有{{ }}变量与ansible hosts文件中设置对应 80 | + 配置集群POD网络 CALICO_IPV4POOL_CIDR={{ CLUSTER_CIDR }} 81 | + **重要**本K8S集群运行在同网段kvm虚机上,虚机间没有网络ACL限制,因此可以设置`CALICO_IPV4POOL_IPIP=off`,如果你的主机位于不同网段,或者运行在公有云上需要打开这个选项 `CALICO_IPV4POOL_IPIP=always` 82 | + 配置FELIX_DEFAULTENDPOINTTOHOSTACTION=ACCEPT 默认允许Pod到Node的网络流量,更多[felix配置选项](https://docs.projectcalico.org/v2.6/reference/felix/configuration) 83 | 84 | ### 安装calico 网络 85 | 86 | + 安装之前必须确保`kube-master`和`kube-node`节点已经成功部署 87 | + 只需要在任意装有kubectl客户端的节点运行 `kubectl create `安装即可,脚本中选取`NODE_ID=node1`节点安装 88 | + 等待15s后(视网络拉取calico相关镜像速度),calico 网络插件安装完成,删除之前kube-node安装时默认cni网络配置 89 | 90 | ### [可选]配置calicoctl工具 [calicoctl.cfg.j2](roles/calico/templates/calicoctl.cfg.j2) 91 | 92 | ``` bash 93 | apiVersion: v1 94 | kind: calicoApiConfig 95 | metadata: 96 | spec: 97 | datastoreType: "etcdv2" 98 | etcdEndpoints: {{ ETCD_ENDPOINTS }} 99 | etcdKeyFile: /etc/calico/ssl/calico-key.pem 100 | etcdCertFile: /etc/calico/ssl/calico.pem 101 | etcdCACertFile: /etc/calico/ssl/ca.pem 102 | ``` 103 | 104 | ### 验证calico网络 105 | 106 | 执行calico安装成功后可以验证如下:(需要等待镜像下载完成,有时候即便上一步已经配置了docker国内加速,还是可能比较慢,请确认以下容器运行起来以后,再执行后续验证步骤) 107 | 108 | ``` bash 109 | kubectl get pod --all-namespaces 110 | NAMESPACE NAME READY STATUS RESTARTS AGE 111 | kube-system calico-kube-controllers-5c6b98d9df-xj2n4 1/1 Running 0 1m 112 | kube-system calico-node-4hr52 2/2 Running 0 1m 113 | kube-system calico-node-8ctc2 2/2 Running 0 1m 114 | kube-system calico-node-9t8md 2/2 Running 0 1m 115 | ``` 116 | 117 | **查看网卡和路由信息** 118 | 119 | 先在集群创建几个测试pod: `kubectl run test --image=busybox --replicas=3 sleep 30000` 120 | 121 | ``` bash 122 | # 查看网卡信息 123 | ip a 124 | ``` 125 | 126 | + 可以看到包含类似cali1cxxx的网卡,是calico为测试pod生成的 127 | + tunl0网卡现在不用管,是默认生成的,当开启IPIP 特性时使用的隧道 128 | 129 | ``` bash 130 | # 查看路由 131 | route -n 132 | Kernel IP routing table 133 | Destination Gateway Genmask Flags Metric Ref Use Iface 134 | 0.0.0.0 192.168.1.1 0.0.0.0 UG 0 0 0 ens3 135 | 192.168.1.0 0.0.0.0 255.255.255.0 U 0 0 0 ens3 136 | 172.17.0.0 0.0.0.0 255.255.0.0 U 0 0 0 docker0 137 | 172.20.3.64 192.168.1.34 255.255.255.192 UG 0 0 0 ens3 138 | 172.20.33.128 0.0.0.0 255.255.255.192 U 0 0 0 * 139 | 172.20.33.129 0.0.0.0 255.255.255.255 UH 0 0 0 caliccc295a6d4f 140 | 172.20.104.0 192.168.1.35 255.255.255.192 UG 0 0 0 ens3 141 | 172.20.166.128 192.168.1.63 255.255.255.192 UG 0 0 0 ens3 142 | ``` 143 | 144 | **查看所有calico节点状态** 145 | 146 | ``` bash 147 | calicoctl node status 148 | Calico process is running. 149 | 150 | IPv4 BGP status 151 | +--------------+-------------------+-------+----------+-------------+ 152 | | PEER ADDRESS | PEER TYPE | STATE | SINCE | INFO | 153 | +--------------+-------------------+-------+----------+-------------+ 154 | | 192.168.1.34 | node-to-node mesh | up | 12:34:00 | Established | 155 | | 192.168.1.35 | node-to-node mesh | up | 12:34:00 | Established | 156 | | 192.168.1.63 | node-to-node mesh | up | 12:34:01 | Established | 157 | +--------------+-------------------+-------+----------+-------------+ 158 | ``` 159 | 160 | **BGP 协议是通过TCP 连接来建立邻居的,因此可以用netstat 命令验证 BGP Peer** 161 | 162 | ``` bash 163 | netstat -antlp|grep ESTABLISHED|grep 179 164 | tcp 0 0 192.168.1.66:179 192.168.1.35:41316 ESTABLISHED 28479/bird 165 | tcp 0 0 192.168.1.66:179 192.168.1.34:40243 ESTABLISHED 28479/bird 166 | tcp 0 0 192.168.1.66:179 192.168.1.63:48979 ESTABLISHED 28479/bird 167 | ``` 168 | 169 | **查看集群ipPool情况** 170 | 171 | ``` bash 172 | calicoctl get ipPool -o yaml 173 | - apiVersion: v1 174 | kind: ipPool 175 | metadata: 176 | cidr: 172.20.0.0/16 177 | spec: 178 | nat-outgoing: true 179 | ``` 180 | 181 | [前一篇](06-安装kube-node节点.md) -- [后一篇]() 182 | -------------------------------------------------------------------------------- /docs/07-安装flannel网络组件.md: -------------------------------------------------------------------------------- 1 | ## 07-安装flannel网络组件.md 2 | 3 | ** 注意: ** 只需选择安装`calico` `flannel`其中之一,如果你已经安装了`calico`,请跳过此步骤。 4 | 5 | 关于k8s网络设计和CNI Plugin的介绍请阅读[安装calico](07-安装calico网络组件.md)中相关内容。 6 | 7 | `Flannel`是最早应用到k8s集群的网络插件之一,简单高效,且提供多个后端`backend`模式供选择;本文介绍以`DaemonSet Pod`方式集成到k8s集群,需要在所有master节点和node节点安装。 8 | 9 | ``` text 10 | roles/flannel/ 11 | ├── tasks 12 | │   └── main.yml 13 | └── templates 14 | └── kube-flannel.yaml.j2 15 | ``` 16 | 17 | 请在另外窗口打开[roles/flannel/tasks/main.yml](../roles/flannel/tasks/main.yml) 文件,对照看以下讲解内容。 18 | 19 | ### 下载基础cni 插件 20 | 21 | 请到CNI 插件最新[release](https://github.com/containernetworking/plugins/releases)页面下载[cni-v0.6.0.tgz](https://github.com/containernetworking/plugins/releases/download/v0.6.0/cni-v0.6.0.tgz),解压后里面有很多插件,选择如下几个复制到项目 `bin`目录下 22 | 23 | - flannel用到的插件 24 | - bridge 25 | - flannel 26 | - host-local 27 | - loopback 28 | - portmap 29 | 30 | Flannel CNI 插件的配置文件可以包含多个`plugin` 或由其调用其他`plugin`;`Flannel DaemonSet Pod`运行以后会生成`/run/flannel/subnet.env `文件,例如: 31 | 32 | ``` bash 33 | FLANNEL_NETWORK=10.1.0.0/16 34 | FLANNEL_SUBNET=10.1.17.1/24 35 | FLANNEL_MTU=1472 36 | FLANNEL_IPMASQ=true 37 | ``` 38 | 然后它利用这个文件信息去配置和调用`bridge`插件来生成容器网络,调用`host-local`来管理`IP`地址,例如: 39 | 40 | ``` bash 41 | { 42 | "name": "mynet", 43 | "type": "bridge", 44 | "mtu": 1472, 45 | "ipMasq": false, 46 | "isGateway": true, 47 | "ipam": { 48 | "type": "host-local", 49 | "subnet": "10.1.17.0/24" 50 | } 51 | } 52 | ``` 53 | - 更多相关介绍请阅读: 54 | - [flannel kubernetes 集成](https://github.com/coreos/flannel/blob/master/Documentation/kubernetes.md) 55 | - [flannel cni 插件](https://github.com/containernetworking/plugins/tree/master/plugins/meta/flannel) 56 | - [更多 cni 插件](https://github.com/containernetworking/plugins) 57 | 58 | ### 准备`Flannel DaemonSet` yaml配置文件 59 | 60 | 请阅读 `roles/flannel/templates/kube-flannel.yaml.j2` 内容,注意: 61 | 62 | + 本安装方式,flannel使用apiserver 存储数据,而不是 etcd 63 | + 配置相关RBAC 权限和 `service account` 64 | + 配置`ConfigMap`包含 CNI配置和 flannel配置(指定backend等),和`hosts`文件中相关设置对应 65 | + `DaemonSet Pod`包含两个容器,一个容器运行flannel本身,另一个init容器部署cni 配置文件 66 | + 为方便国内加速使用镜像 `jmgao1983/flannel:v0.9.1-amd64` (官方镜像在docker-hub上的转存) 67 | 68 | ### 安装 flannel网络 69 | 70 | + 安装之前必须确保kube-master和kube-node节点已经成功部署 71 | + 只需要在任意装有kubectl客户端的节点运行 kubectl create安装即可,脚本中选取NODE_ID=node1节点安装 72 | + 等待15s后(视网络拉取相关镜像速度),flannel 网络插件安装完成,删除之前kube-node安装时默认cni网络配置 73 | 74 | ### 验证flannel网络 75 | 76 | 执行flannel安装成功后可以验证如下:(需要等待镜像下载完成,有时候即便上一步已经配置了docker国内加速,还是可能比较慢,请确认以下容器运行起来以后,再执行后续验证步骤) 77 | 78 | ``` bash 79 | # kubectl get pod --all-namespaces 80 | NAMESPACE NAME READY STATUS RESTARTS AGE 81 | kube-system kube-flannel-ds-m8mzm 1/1 Running 0 3m 82 | kube-system kube-flannel-ds-mnj6j 1/1 Running 0 3m 83 | kube-system kube-flannel-ds-mxn6k 1/1 Running 0 3m 84 | ``` 85 | 在集群创建几个测试pod: `kubectl run test --image=busybox --replicas=3 sleep 30000` 86 | 87 | ``` bash 88 | # kubectl get pod --all-namespaces -o wide|head -n 4 89 | NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE 90 | default busy-5956b54c8b-ld4gb 1/1 Running 0 9m 172.20.2.7 192.168.1.1 91 | default busy-5956b54c8b-lj9l9 1/1 Running 0 9m 172.20.1.5 192.168.1.2 92 | default busy-5956b54c8b-wwpkz 1/1 Running 0 9m 172.20.0.6 192.168.1.3 93 | 94 | # 查看路由 95 | # ip route 96 | default via 192.168.1.254 dev ens3 onlink 97 | 192.168.1.0/24 dev ens3 proto kernel scope link src 192.168.1.1 98 | 172.17.0.0/16 dev docker0 proto kernel scope link src 172.17.0.1 linkdown 99 | 172.20.0.0/24 via 192.168.1.3 dev ens3 100 | 172.20.1.0/24 via 192.168.1.2 dev ens3 101 | 172.20.2.0/24 dev cni0 proto kernel scope link src 172.20.2.1 102 | ``` 103 | 现在各节点上分配 ping 这三个POD网段地址,确保能通: 104 | 105 | ``` bash 106 | ping 172.20.2.7 107 | ping 172.20.1.5 108 | ping 172.20.0.6 109 | ``` 110 | 111 | [前一篇](06-安装kube-node节点.md) -- [后一篇]() 112 | -------------------------------------------------------------------------------- /docs/guide/dashboard.md: -------------------------------------------------------------------------------- 1 | ## dashboard 2 | 3 | 本文档基于 dashboard 1.6.3版本,目前 dashboard 已出 1.8.0版本,在安全控制方面已有很大改进,后续更新新版本说明。 4 | 5 | ### 部署 6 | 7 | 配置文件参考 `https://github.com/kubernetes/kubernetes` 项目目录 `kubernetes/cluster/addons/dashboard` 8 | 9 | 安装很简单 `kubectl create -f manifests/dashboard/`,主要讲解一些注意事项 10 | 11 | 请在另外窗口打开 [kubernetes-dashboard.yaml](../../manifests/dashboard/kubernetes-dashboard.yaml) 12 | 13 | + 由于 kube-apiserver 启用了 RBAC授权,dashboard使用的 ServiceAccount `kubernetes-dashboard` 必须有相应的权限去访问apiserver(在新版本1.8.0中,该访问权限已按最小化方式授权),在1.6.3 版本,先粗放一点,把`kubernetes-dashboard` 与 集群角色 `cluster-admin` 绑定,这样dashboard就拥有了所有访问apiserver的权限。 14 | + 开发测试环境为了方便配置dashboard-service时候,指定了 `NodePort`方式暴露服务,这样集群外部可以使用 `http://NodeIP:NodePort` 方式直接访问 dashboard,生产环境建议关闭该访问途径。 15 | 16 | ### 验证 17 | 18 | ``` bash 19 | # 查看pod 运行状态 20 | kubectl get pod -n kube-system | grep dashboard 21 | kubernetes-dashboard-86bd8778bf-w4974 1/1 Running 0 12h 22 | # 查看dashboard service 23 | kubectl get svc -n kube-system|grep dashboard 24 | kubernetes-dashboard NodePort 10.68.7.67 80:5452/TCP 12h 25 | # 查看集群服务 26 | kubectl cluster-info|grep dashboard 27 | kubernetes-dashboard is running at https://192.168.1.10:6443/api/v1/namespaces/kube-system/services/kubernetes-dashboard/proxy 28 | # 查看pod 运行日志,关注有没有错误 29 | kubectl logs kubernetes-dashboard-86bd8778bf-w4974 -n kube-system 30 | ``` 31 | 32 | ### 访问 33 | 34 | 因为dashboard 作为k8s 原生UI,能够展示各种资源信息,甚至可以有修改、增加、删除权限,所以有必要对访问进行认证和控制,本项目预置部署的集群有以下安全设置:详见 [apiserver配置模板](../../roles/kube-master/templates/kube-apiserver.service.j2) 35 | 36 | + 启用 `TLS认证` `RBAC授权`等安全特性 37 | + 关闭 apiserver非安全端口8080的外部访问`--insecure-bind-address=127.0.0.1` 38 | + 关闭匿名认证`--anonymous-auth=false` 39 | + 补充启用基本密码认证 `--basic-auth-file=/etc/kubernetes/ssl/basic-auth.csv`,[密码文件模板](../../roles/kube-master/templates/basic-auth.csv.j2)中按照每行(密码,用户名,序号)的格式,可以定义多个用户 40 | 41 | #### 1. 临时访问:使用 `http://NodeIP:NodePort` 方式直接访问 dashboard,生产环境建议关闭该途径 42 | 43 | #### 2. 用户+密码访问:安全性比证书方式差点,务必保管好密码文件`basic-auth.csv` 44 | 45 | - 这里演示两种权限,使用admin 登陆dashboard拥有所有权限,使用readonly 登陆后仅查看权限,首先在 master节点文件 `/etc/kubernetes/ssl/basic-auth.csv` 确认用户名和密码,如果要增加或者修改用户,修改保存该文件后记得逐个重启你的master 节点 46 | - 为了演示用户密码访问,如果你已经完成证书访问方式,你可以在浏览器删除证书,或者访问时候浏览器询问你证书时不选证书 47 | - 2.1 设置用户admin 的RBAC 权限,如下运行配置文件 `kubectl create -f ui-admin-rbac.yaml` 48 | 49 | ``` bash 50 | kind: ClusterRole 51 | apiVersion: rbac.authorization.k8s.io/v1 52 | metadata: 53 | name: ui-admin 54 | rules: 55 | - apiGroups: 56 | - "" 57 | resources: 58 | - services 59 | - services/proxy 60 | verbs: 61 | - '*' 62 | 63 | --- 64 | apiVersion: rbac.authorization.k8s.io/v1 65 | kind: RoleBinding 66 | metadata: 67 | name: ui-admin-binding 68 | namespace: kube-system 69 | roleRef: 70 | apiGroup: rbac.authorization.k8s.io 71 | kind: ClusterRole 72 | name: ui-admin 73 | subjects: 74 | - apiGroup: rbac.authorization.k8s.io 75 | kind: User 76 | name: admin 77 | ``` 78 | - 2.2 设置用户readonly 的RBAC 权限,如下运行配置文件 `kubectl create -f ui-read-rbac.yaml` 79 | 80 | ``` bash 81 | kind: ClusterRole 82 | apiVersion: rbac.authorization.k8s.io/v1 83 | metadata: 84 | name: ui-read 85 | rules: 86 | - apiGroups: 87 | - "" 88 | resources: 89 | - services 90 | - services/proxy 91 | verbs: 92 | - get 93 | - list 94 | - watch 95 | 96 | --- 97 | apiVersion: rbac.authorization.k8s.io/v1 98 | kind: RoleBinding 99 | metadata: 100 | name: ui-read-binding 101 | namespace: kube-system 102 | roleRef: 103 | apiGroup: rbac.authorization.k8s.io 104 | kind: ClusterRole 105 | name: ui-read 106 | subjects: 107 | - apiGroup: rbac.authorization.k8s.io 108 | kind: User 109 | name: readonly 110 | ``` 111 | - 2.3 访问 `https://x.x.x.x:6443/api/v1/namespaces/kube-system/services/kubernetes-dashboard/proxy` 使用 admin登陆拥有所有权限,比如删除某个部署;使用 readonly登陆只有查看权限,尝试删除某个部署会提示错误 `forbidden: User \"readonly\" cannot delete services/proxy in the namespace \"kube-system\"` 112 | 113 | #### 3. 证书访问:最安全的方式,配置较复杂 114 | - 使用集群CA 生成客户端证书,可以根据需要生成权限不同的证书,这里为了演示直接使用 kubectl使用的证书和key(在03.kubectl.yml阶段生成),该证书拥有所有权限 115 | - 指定格式导出该证书,进入`/etc/kubernetes/ssl`目录,使用命令`openssl pkcs12 -export -in admin.pem -inkey admin-key.pem -out kube-admin.p12` 提示输入证书密码和确认密码,可以用密码再增加一层保护,也可以直接回车跳过,完成后目录下多了 `kube-admin.p12`文件,将它分发给授权的用户 116 | - 用户将 `kube-admin.p12` 双击导入证书即可,`IE` 和`Chrome` 中输入`https://x.x.x.x:6443/api/v1/namespaces/kube-system/services/kubernetes-dashboard/proxy` 或者 `https://x.x.x.x:6443/ui` 即可访问。补充:最新firefox需要在浏览器中单独导入 [选项] - [隐私与安全] - [证书/查看证书] - [您的证书] 页面点击 [导入] 该证书 117 | 118 | ### 小结 119 | 120 | + dashboard 版本 1.6.3 访问控制实现较复杂,文档中给出的例子也有助于你理解 RBAC的灵活控制能力,当然最好去[官方文档](https://kubernetes.io/docs/admin/authorization/rbac/)学习一下,这块篇幅不长 121 | + 由于还未部署 Heapster 插件,当前 dashboard 不能展示 Pod、Nodes 的 CPU、内存等 metric 图形,后续部署 heapster后自然能够看到 122 | + 本文中的权限设置仅供演示用,生产环境请在此基础上修改成适合你安全需求的方式 123 | 124 | [前一篇](kubedns.md) -- [目录](index.md) -- [后一篇](heapster.md) 125 | -------------------------------------------------------------------------------- /docs/guide/efk.md: -------------------------------------------------------------------------------- 1 | ## EFK 2 | -------------------------------------------------------------------------------- /docs/guide/harbor.md: -------------------------------------------------------------------------------- 1 | ## harbor 2 | 3 | Habor是由VMWare中国团队开源的容器镜像仓库。事实上,Habor是在Docker Registry上进行了相应的企业级扩展,从而获得了更加广泛的应用,这些新的企业级特性包括:管理用户界面,基于角色的访问控制 ,水平扩展,同步,AD/LDAP集成以及审计日志等。本文档仅说明部署单个基础harbor服务的步骤。 4 | 5 | ### 安装步骤 6 | 7 | 1. 在deploy节点下载最新的 [docker-compose](https://github.com/docker/compose/releases) 二进制文件,改名后把它放到项目 `/etc/ansible/bin`目录下,后续版本会一起打包进百度云盘`k8s.xxx.tar.gz`文件中,可以省略该步骤 8 | 9 | ``` bash 10 | wget https://github.com/docker/compose/releases/download/1.18.0/docker-compose-Linux-x86_64 11 | mv docker-compose-Linux-x86_64 /etc/ansible/bin/docker-compose 12 | ``` 13 | 2. 在deploy节点下载最新的 [harbor](https://github.com/vmware/harbor/releases) 离线安装包,把它放到项目 `/etc/ansible/down` 目录下,也可以从分享的百度云盘下载 14 | 15 | 3. 在deploy节点编辑/etc/ansible/hosts文件,可以参考 `example`目录下的模板,修改部分举例如下 16 | 17 | ``` bash 18 | # 如果启用harbor,请配置后面harbor相关参数 19 | [harbor] 20 | 192.168.1.8 NODE_IP="192.168.1.8" 21 | 22 | #私有仓库 harbor服务器 (域名或者IP) 23 | HARBOR_IP="192.168.1.8" 24 | HARBOR_DOMAIN="harbor.test.com" 25 | ``` 26 | 27 | 4. 在deploy节点执行 `cd /etc/ansible && ansible-playbook 11.harbor.yml`,完成harbor安装 28 | 29 | ### 安装讲解 30 | 31 | 根据 `11.harbor.yml`文件,harbor节点需要以下步骤: 32 | 33 | 1. role `prepare` 基础系统环境准备 34 | 1. role `docker` 安装docker 35 | 1. role `harbor` 安装harbor 36 | 37 | `kube-node`节点在harbor部署完之后,需要配置harbor的证书,并可以在hosts里面添加harbor的域名解析,如果你的环境中有dns服务器,可以跳过hosts文件设置 38 | 39 | 请在另外窗口打开 [roles/harbor/tasks/main.yml](../../roles/harbor/tasks/main.yml),对照以下讲解 40 | 41 | 1. 下载docker-compose可执行文件到$PATH目录 42 | 1. 自注册变量result判断是否已经安装harbor,避免重复安装问题 43 | 1. 解压harbor离线安装包到指定目录 44 | 1. 导入harbor所需 docker images 45 | 1. 创建harbor证书和私钥(复用集群的CA证书) 46 | 1. 修改harbor.cfg配置文件 47 | 1. 启动harbor安装脚本 48 | 49 | ### 验证harbor 50 | 51 | 1. 在harbor节点使用`docker ps -a` 查看harbor容器组件运行情况 52 | 1. 浏览器访问harbor节点的IP地址 `https://{{ NODE_IP }}`,使用账号 admin 和 密码 Harbor12345 (harbor.cfg 配置文件中的默认)登陆系统 53 | 54 | ### 在k8s集群使用harbor 55 | 56 | admin用户web登陆后可以方便的创建项目,并指定项目属性(公开或者私有);然后创建用户,并在项目`成员`选项中选择用户和权限; 57 | 58 | #### 镜像上传 59 | 60 | 在node上使用harbor私有镜像仓库首先需要在指定目录配置harbor的CA证书,详见 `11.harbor.yml`文件。 61 | 62 | 使用docker客户端登陆`harbor.test.com`,然后把镜像tag成 `harbor.test.com/$项目名/$镜像名:$TAG` 之后,即可使用docker push 上传 63 | 64 | ``` bash 65 | docker login harbor.test.com 66 | Username: 67 | Password: 68 | Login Succeeded 69 | docker tag busybox:latest harbor.test.com/library/busybox:latest 70 | docker push harbor.test.com/library/busybox:latest 71 | The push refers to a repository [harbor.test.com/library/busybox] 72 | 0271b8eebde3: Pushed 73 | latest: digest: sha256:91ef6c1c52b166be02645b8efee30d1ee65362024f7da41c404681561734c465 size: 527 74 | ``` 75 | #### k8s中使用harbor 76 | 77 | 1. 如果镜像保存在harbor中的公开项目中,那么只需要在yaml文件中简单指定harbor私有镜像即可,例如 78 | 79 | ``` bash 80 | apiVersion: v1 81 | kind: Pod 82 | metadata: 83 | name: test-busybox 84 | spec: 85 | containers: 86 | - name: test-busybox 87 | image: harbor.test.com/xxx/busybox:latest 88 | imagePullPolicy: Always 89 | ``` 90 | 91 | 2. 如果镜像保存在harbor中的私有项目中,那么yaml文件中使用该私有项目的镜像需要指定`imagePullSecrets`,例如 92 | 93 | ``` bash 94 | apiVersion: v1 95 | kind: Pod 96 | metadata: 97 | name: test-busybox 98 | spec: 99 | containers: 100 | - name: test-busybox 101 | image: harbor.test.com/xxx/busybox:latest 102 | imagePullPolicy: Always 103 | imagePullSecrets: 104 | - name: harborKey1 105 | ``` 106 | 其中 `harborKey1`可以用以下两种方式生成: 107 | 108 | + 1.使用 `kubectl create secret docker-registry harborkey1 --docker-server=harbor.test.com --docker-username=admin --docker-password=Harbor12345 --docker-email=team@test.com` 109 | + 2.使用yaml配置文件生成 110 | 111 | ``` bash 112 | //harborkey1.yaml 113 | apiVersion: v1 114 | kind: Secret 115 | metadata: 116 | name: harborkey1 117 | namespace: default 118 | data: 119 | .dockerconfigjson: {base64 -w 0 ~/.docker/config.json} 120 | type: kubernetes.io/dockerconfigjson 121 | ``` 122 | 前面docker login会在~/.docker下面创建一个config.json文件保存鉴权串,这里secret yaml的.dockerconfigjson后面的数据就是那个json文件的base64编码输出(-w 0让base64输出在单行上,避免折行) 123 | 124 | ### 管理harbor 125 | 126 | + 日志目录 `/var/log/harbor` 127 | + 数据目录 `/data` ,其中最主要是 `/data/database` 和 `/data/registry` 目录,如果你要彻底重新安装harbor,删除这两个目录即可 128 | 129 | 先进入harbor安装目录 `cd /root/local/harbor`,常规操作如下: 130 | 131 | 1. 暂停harbor `docker-compose stop` : docker容器stop,并不删除容器 132 | 2. 恢复harbor `docker-compose start` : 恢复docker容器运行 133 | 3. 停止harbor `docker-compose down -v` : 停止并删除docker容器 134 | 4. 启动harbor `docker-compose up -d` : 启动所有docker容器 135 | 136 | 修改harbor的运行配置,需要如下步骤: 137 | 138 | ``` bash 139 | # 停止 harbor 140 | docker-compose down -v 141 | # 修改配置 142 | vim harbor.cfg 143 | # 执行./prepare已更新配置到docker-compose.yml文件 144 | ./prepare 145 | # 启动 harbor 146 | docker-compose up -d 147 | ``` 148 | #### harbor 升级 149 | 150 | 以下步骤基于harbor 1.1.2 版本升级到 1.2.2版本 151 | 152 | ``` bash 153 | # 进入harbor解压缩后的目录,停止harbor 154 | cd /root/local/harbor 155 | docker-compose down 156 | 157 | # 备份这个目录 158 | cd .. 159 | mkdir -p /backup && mv harbor /backup/harbor 160 | 161 | # 下载更新的离线安装包,并解压 162 | tar zxvf harbor-offline-installer-v1.2.2.tgz -C /root/local 163 | 164 | # 使用官方数据库迁移工具,备份数据库,修改数据库连接用户和密码,创建数据库备份目录 165 | # 迁移工具使用docker镜像,镜像tag由待升级到目标harbor版本决定,这里由 1.1.2升级到1.2.2,所以使用 tag 1.2 166 | docker pull vmware/harbor-db-migrator:1.2 167 | mkdir -p /backup/db-1.1.2 168 | docker run -it --rm -e DB_USR=root -e DB_PWD=xxxx -v /data/database:/var/lib/mysql -v /backup/db-1.1.2:/harbor-migration/backup vmware/harbor-db-migrator:1.2 backup 169 | 170 | # 因为新老版本数据库结构不一样,需要数据库migration 171 | docker run -it --rm -e DB_USR=root -e DB_PWD=xxxx -v /data/database:/var/lib/mysql vmware/harbor-db-migrator:1.2 up head 172 | 173 | # 修改新版本 harbor.cfg配置,需要保持与老版本相关配置项保持一致,然后执行安装即可 174 | cd /root/local/harbor 175 | vi harbor.cfg 176 | ./install.sh 177 | 178 | [前一篇]() -- [目录](index.md) -- [后一篇]() 179 | -------------------------------------------------------------------------------- /docs/guide/heapster.md: -------------------------------------------------------------------------------- 1 | ## heapster 2 | 3 | `Heapster` 监控整个集群资源的过程:首先kubelet内置的cAdvisor收集本node节点的容器资源占用情况,然后heapster从kubelet提供的api采集节点和容器的资源占用,最后heapster 持久化数据存储到`influxdb`中(也可以是其他的存储后端,Google Cloud Monitoring等)。 4 | 5 | `Grafana` 则通过配置数据源指向上述 `influxdb`,从而界面化显示监控信息。 6 | 7 | ### 部署 8 | 9 | 访问 [heapster release](https://github.com/kubernetes/heapster)页面下载最新 release 1.4.3,参考目录`heapster-1.3.0/deploy/kube-config/influxdb`,因为这个官方release 在k8s1.8.4使用还是有不少问题,请在参考的基础上使用本项目提供的yaml文件 10 | 11 | 1. [grafana](../../manifests/heapster/grafana.yaml) 12 | 1. [heapster](../../manifests/heapster/heapster.yaml) 13 | 1. [influxdb](../../manifests/heapster/influxdb.yaml) 14 | 15 | 安装比较简单 `kubectl create -f /etc/ansible/manifests/heapster/`,主要讲一下注意事项 16 | 17 | #### grafana.yaml配置 18 | 19 | + 修改`heapster-grafana-amd64`镜像,v4.2.0版本修改成 v4.4.3版本,否则 grafana pod无法起来,报`CrashLoopBackOff`错误,详见[ISSUE](https://github.com/kubernetes/heapster/issues/1806) 20 | + 参数`- name: GF_SERVER_ROOT_URL`的设置要根据后续访问grafana的方式确定,如果使用 NodePort方式访问,必须设置成:`value: /`;如果使用apiserver proxy方式,必须设置成`value: /api/v1/namespaces/kube-system/services/monitoring-grafana/proxy/`,注意官方文件中预设的`value: /api/v1/proxy/namespaces/kube-system/services/monitoring-grafana/`已经不适合k8s 1.8.0版本了, 21 | + `kubernetes.io/cluster-service: 'true'` 和 `type: NodePort` 根据上述的访问方式设置,建议使用apiserver 方式,可以增加安全控制 22 | 23 | #### heapster.yaml配置 24 | 25 | + 需要配置 RBAC 把 ServiceAccount `heapster` 与集群预定义的集群角色 `system:heapster` 绑定,这样heapster pod才有相应权限去访问 apiserver 26 | 27 | #### influxdb.yaml配置 28 | 29 | + influxdb 官方建议使用命令行或 HTTP API 接口来查询数据库,从 v1.1.0 版本开始默认关闭 admin UI,这里参考[opsnull](https://github.com/opsnull/follow-me-install-kubernetes-cluster/blob/master/10-%E9%83%A8%E7%BD%B2Heapster%E6%8F%92%E4%BB%B6.md)给出的方法,增加ConfigMap配置,然后挂载到容器中,覆盖默认配置 30 | + 注意influxdb 这个版本只能使用 NodePort方式访问它的admin UI,才能正确连接数据库 31 | 32 | ### 验证 33 | 34 | ``` bash 35 | $ kubectl get pods -n kube-system | grep -E 'heapster|monitoring' 36 | heapster-3273315324-tmxbg 1/1 Running 0 11m 37 | monitoring-grafana-2255110352-94lpn 1/1 Running 0 11m 38 | monitoring-influxdb-884893134-3vb6n 1/1 Running 0 11m 39 | ``` 40 | 扩展检查Pods日志: 41 | ``` bash 42 | $ kubectl logs heapster-3273315324-tmxbg -n kube-system 43 | $ kubectl logs monitoring-grafana-2255110352-94lpn -n kube-system 44 | $ kubectl logs monitoring-influxdb-884893134-3vb6n -n kube-system 45 | ``` 46 | 部署完heapster,使用上一步介绍方法查看kubernets dashboard 界面,就可以看到各 Nodes、Pods 的 CPU、内存、负载等利用率曲线图,如果 dashboard上还无法看到利用率图,使用以下命令重启 dashboard pod: 47 | + 首先删除 `kubectl scale deploy kubernetes-dashboard --replicas=0 -n kube-system` 48 | + 然后新建 `kubectl scale deploy kubernetes-dashboard --replicas=1 -n kube-system` 49 | 50 | ### 访问 grafana 51 | 52 | #### 1.通过apiserver 访问(建议的方式) 53 | 54 | ``` bash 55 | kubectl cluster-info | grep grafana 56 | monitoring-grafana is running at https://x.x.x.x:6443/api/v1/namespaces/kube-system/services/monitoring-grafana/proxy 57 | ``` 58 | 请参考上一步 [访问dashboard](dashboard.md)同样的方式,使用证书或者密码认证,访问`https://x.x.x.x:6443/api/v1/namespaces/kube-system/services/monitoring-grafana/proxy`即可,如图可以点击[Home]选择查看 `Cluster` `Pods`的监控图形 59 | 60 | ![grafana](../../pics/grafana.png) 61 | 62 | #### 2.通过NodePort 访问 63 | 64 | + 修改 `Service` 允许 type: NodePort 65 | + 修改 `Deployment`中参数`- name: GF_SERVER_ROOT_URL`为 `value: /` 66 | + 如果之前grafana已经运行,使用 `kubectl replace --force -f /etc/ansible/manifests/heapster/grafana.yaml` 重启 grafana插件 67 | 68 | ``` bash 69 | kubectl get svc -n kube-system|grep grafana 70 | monitoring-grafana NodePort 10.68.135.50 80:5855/TCP 11m 71 | ``` 72 | 然后用浏览器访问 http://NodeIP:5855 73 | 74 | ### 访问 influxdb 75 | 76 | 官方建议使用命令行或 HTTP API 接口来查询`influxdb`数据库,如非必要就跳过此步骤 77 | 78 | 目前根据测试 k8s v1.8.4 使用 NodePort 方式访问 admin 界面后才能正常连接数据库 79 | 80 | ``` bash 81 | kubectl get svc -n kube-system|grep influxdb 82 | monitoring-influxdb NodePort 10.68.195.193 8086:3382/TCP,8083:7651/TCP 12h 83 | ``` 84 | + 如上例子,8083是管理页面端口,对外暴露的端口为7651 85 | + 8086 是数据连接端口,对外暴露的端口为3382 86 | 87 | 使用浏览器访问 http://NodeIP:7651,如图在页面的 “Connection Settings” 的 Host 中输入 node IP, Port 中输入 3382(由8086对外暴露的端口),点击 “Save” 即可 88 | 89 | ![influxdb](../../pics/influxdb.png) 90 | 91 | 92 | [前一篇](dashboard.md) -- [目录](index.md) -- [后一篇](ingress.md) 93 | -------------------------------------------------------------------------------- /docs/guide/hpa.md: -------------------------------------------------------------------------------- 1 | ## Horizontal Pod Autoscaling 2 | 3 | 自动水平伸缩,是指运行在k8s上的应用负载(POD),可以根据资源使用率进行自动扩容、缩容;我们知道应用的资源使用率通常都有高峰和低谷,所以k8s的`HPA`特性应运而生;它也是最能体现区别于传统运维的优势之一,不仅能够弹性伸缩,而且完全自动化! 4 | 5 | 根据 CPU 使用率或自定义 metrics 自动扩展 Pod 数量(支持 replication controller、deployment);k8s1.6版本之前是通过kubelet来获取监控指标,1.6版本之后是通过api server、heapster或者kube-aggregator来获取监控指标。 6 | 7 | ### Metrics支持 8 | 9 | 根据不同版本的API中,HPA autoscale时靠以下指标来判断资源使用率: 10 | - autoscaling/v1: CPU 11 | - autoscaling/v2alpha1 12 | - 内存 13 | - 自定义metrics 14 | - 多metrics组合: 根据每个metric的值计算出scale的值,并将最大的那个指作为扩容的最终结果 15 | 16 | ### 基础示例 17 | 18 | 本实验环境基于k8s 1.8 和 1.9,仅使用`autoscaling/v1` 版本API 19 | 20 | ``` bash 21 | # 创建deploy和service 22 | $ kubectl run php-apache --image=pilchard/hpa-example --requests=cpu=200m --expose --port=80 23 | 24 | # 创建autoscaler 25 | $ kubectl autoscale deploy php-apache --cpu-percent=50 --min=1 --max=10 26 | 27 | # 稍等查看hpa状态 28 | $ kubectl get hpa php-apache 29 | NAME REFERENCE TARGETS MINPODS MAXPODS REPLICAS AGE 30 | php-apache Deployment/php-apache 0% / 50% 1 10 1 1d 31 | 32 | # 增加负载 33 | $ kubectl run --rm -it load-generator --image=busybox /bin/sh 34 | Hit enter for command prompt 35 | $ while true; do wget -q -O- http://php-apache; done; 36 | 37 | # 稍等查看hpa显示负载增加,且副本数目增加为4 38 | $ kubectl get hpa php-apache 39 | NAME REFERENCE TARGETS MINPODS MAXPODS REPLICAS AGE 40 | php-apache Deployment/php-apache 430% / 50% 1 10 4 4m 41 | 42 | # 注意k8s为了避免频繁增删pod,对副本的增加速度有限制 43 | # 实验过程可以看到副本数目从1到4到8到10,大概都需要4~5分钟的缓冲期 44 | $ kubectl get hpa php-apache 45 | NAME REFERENCE TARGETS MINPODS MAXPODS REPLICAS AGE 46 | php-apache Deployment/php-apache 86% / 50% 1 10 8 9m 47 | $ kubectl get hpa php-apache 48 | NAME REFERENCE TARGETS MINPODS MAXPODS REPLICAS AGE 49 | php-apache Deployment/php-apache 52% / 50% 1 10 10 12m 50 | 51 | # 清除负载,CTRL+C 结束上述循环程序,稍后副本数目变回1 52 | $ kubectl get hpa php-apache 53 | NAME REFERENCE TARGETS MINPODS MAXPODS REPLICAS AGE 54 | php-apache Deployment/php-apache 0% / 50% 1 10 1 17m 55 | ``` 56 | 57 | -------------------------------------------------------------------------------- /docs/guide/index.md: -------------------------------------------------------------------------------- 1 | ## 使用指南 2 | 3 | ### 附加组件安装 4 | 5 | - 安装 [kubedns](kubedns.md) 6 | - 安装 [dashboard](dashboard.md) 7 | - 安装 [heapster](heapster.md) 8 | - 安装 [ingress](ingress.md) 9 | - 安装 efk 10 | - 安装 [harbor](harbor.md) 11 | 12 | ### 基础特性演示 13 | 14 | - 自动水平伸缩-基础 [Horizontal Pod Autoscaling](hpa.md) 15 | - 网络安全策略 [Network Policy](networkpolicy.md) 16 | 17 | ### 集群维护指南 18 | 19 | - 集群状态检查 20 | - 集群扩容 21 | - node 节点扩容 22 | - master 节点扩容 23 | - etcd 集群扩容 24 | - 清理集群 25 | 26 | ### 应用实践 27 | 28 | - 官方入门教程 29 | - Django 应用部署 30 | - Java tomcat 应用部署 31 | - NFS StorageClass 动态存储卷实践 32 | 33 | ### 其他 34 | 35 | -------------------------------------------------------------------------------- /docs/guide/ingress.md: -------------------------------------------------------------------------------- 1 | ## Ingress简介 2 | 3 | ingress就是从kubernetes集群外访问集群的入口,将用户的URL请求转发到不同的service上。ingress相当于nginx反向代理服务器,它包括的规则定义就是URL的路由信息;它的实现需要部署`Ingress controller`(比如 [traefik](https://github.com/containous/traefik) [ingress-nginx](https://github.com/kubernetes/ingress-nginx) 等),`Ingress controller`通过apiserver监听ingress和service的变化,并根据规则配置负载均衡并提供访问入口,达到服务发现的作用。 4 | 5 | + 未配置ingress: 6 | 7 | 集群外部 -> NodePort -> K8S Service 8 | 9 | + 配置ingress: 10 | 11 | 集群外部 -> Ingress -> K8S Service 12 | 13 | + 注意:ingress 本身也需要部署`Ingress controller`时暴露`NodePort`让外部访问 14 | 15 | ### 部署 Traefik 16 | 17 | Traefik 提供了一个简单好用 `Ingress controller`,下文基于它讲解一个简单的 ingress部署和测试例子。请查看yaml配置 [traefik-ingress.yaml](../../manifests/ingress/traefik-ingress.yaml),参考[traefik 官方k8s例子](https://github.com/containous/traefik/tree/master/examples/k8s) 18 | 19 | #### 安装 traefik ingress-controller 20 | 21 | ``` bash 22 | kubectl create -f /etc/ansible/manifests/ingress/traefik-ingress.yaml 23 | ``` 24 | + 注意需要配置 `RBAC`授权 25 | + 注意trafik `Service`中 `80`端口为 traefik ingress-controller的服务端口,`8080`端口为 traefik 的管理WEB界面;为后续配置方便指定`80` 端口暴露`NodePort`端口为 `23456`(对应于在hosts配置中`NODE_PORT_RANGE`范围内可用端口) 26 | 27 | #### 验证 traefik ingress-controller 28 | 29 | ``` bash 30 | # kubectl get deploy -n kube-system traefik-ingress-controller 31 | NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE 32 | traefik-ingress-controller 1 1 1 1 4m 33 | 34 | # kubectl get svc -n kube-system traefik-ingress-service 35 | NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE 36 | traefik-ingress-service NodePort 10.68.69.170 80:23456/TCP,8080:34815/TCP 4m 37 | ``` 38 | + 可以看到`traefik-ingress-service` 服务端口`80`暴露的nodePort确实为`23456` 39 | 40 | #### 测试 ingress 41 | 42 | + 首先创建测试用K8S应用,并且该应用服务不用nodePort暴露,而是用ingress方式让外部访问 43 | 44 | ``` bash 45 | kubectl run test-hello --image=nginx --expose --port=80 46 | ## 47 | # kubectl get deploy test-hello 48 | NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE 49 | test-hello 1 1 1 1 56s 50 | # kubectl get svc test-hello 51 | NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE 52 | test-hello ClusterIP 10.68.124.115 80/TCP 1m 53 | ``` 54 | + 然后为这个应用创建 ingress,`kubectl create -f /etc/ansible/manifests/ingress/test-hello.ing.yaml` 55 | 56 | ``` bash 57 | # test-hello.ing.yaml内容 58 | apiVersion: extensions/v1beta1 59 | kind: Ingress 60 | metadata: 61 | name: test-hello 62 | spec: 63 | rules: 64 | - host: hello.test.com 65 | http: 66 | paths: 67 | - path: / 68 | backend: 69 | serviceName: test-hello 70 | servicePort: 80 71 | ``` 72 | + 集群内部尝试访问: `curl -H Host:hello.test.com 10.68.124.115` 能够看到欢迎页面 `Welcome to nginx!`;在集群外部尝试访问(假定集群一个NodeIP为 192.168.1.1): `curl -H Host:hello.test.com 192.168.1.1:23456`,也能够看到欢迎页面 `Welcome to nginx!`,说明ingress测试成功 73 | 74 | + 最后我们可以为traefik WEB管理页面也创建一个ingress, `kubectl create -f /etc/ansible/manifests/ingress/traefik-ui.ing.yaml` 75 | 76 | ``` bash 77 | # traefik-ui.ing.yaml内容 78 | --- 79 | apiVersion: extensions/v1beta1 80 | kind: Ingress 81 | metadata: 82 | name: traefik-web-ui 83 | namespace: kube-system 84 | spec: 85 | rules: 86 | - host: traefik-ui.test.com 87 | http: 88 | paths: 89 | - path: / 90 | backend: 91 | serviceName: traefik-ingress-service 92 | servicePort: 8080 93 | ``` 94 | 这样在集群外部可以使用 `curl -H Host:traefik-ui.test.com 192.168.1.1:23456` 尝试访问WEB管理页面,返回 `Found.`说明 traefik-ui的ingress配置生效了。 95 | 96 | ### [可选] 部署`ingress-service`的代理 97 | 98 | 在客户端主机上可以通过修改本机 `hosts` 文件,如上例子,增加两条记录: 99 | 100 | ``` text 101 | 192.168.1.1 hello.test.com 102 | 192.168.1.1 traefik-ui.test.com 103 | ``` 104 | 打开浏览器输入域名 `http://hello.test.com:23456` 和 `http://traefik-ui.test.com:23456` 就可以访问k8s的应用服务了。 105 | 106 | 当然如果你的环境中有类似 nginx/haproxy 等代理,可以做代理转发以去掉 `23456`这个端口,这里以 haproxy演示下。 107 | 108 | 如果你的集群根据本项目部署了高可用方案,那么可以利用`LB` 节点haproxy 来做,当然如果生产环境K8S应用已经部署非常多,建议还是使用独立的 `nginx/haproxy`集群 109 | 110 | 在 LB 主备节点,修改 `/etc/haproxy/haproxy.cfg`类似如下: 111 | 112 | ``` bash 113 | global 114 | log /dev/log local0 115 | log /dev/log local1 notice 116 | chroot /var/lib/haproxy 117 | stats socket /run/haproxy/admin.sock mode 660 level admin 118 | stats timeout 30s 119 | user haproxy 120 | group haproxy 121 | daemon 122 | nbproc 1 123 | 124 | defaults 125 | log global 126 | timeout connect 5000 127 | timeout client 50000 128 | timeout server 50000 129 | 130 | listen kube-master 131 | bind 0.0.0.0:8443 132 | mode tcp 133 | option tcplog 134 | balance source 135 | # 根据实际kube-master 节点数量增减如下endpoints 136 | server s1 192.168.1.1:6443 check inter 10000 fall 2 rise 2 weight 1 137 | server s2 192.168.1.2:6443 check inter 10000 fall 2 rise 2 weight 1 138 | 139 | listen kube-node 140 | # 先确认 LB节点80端口可用 141 | bind 0.0.0.0:80 142 | mode tcp 143 | option tcplog 144 | balance source 145 | # 根据实际kube-node 节点数量增减如下endpoints 146 | server s1 192.168.1.1:23456 check inter 10000 fall 2 rise 2 weight 1 147 | server s2 192.168.1.2:23456 check inter 10000 fall 2 rise 2 weight 1 148 | server s3 192.168.1.3:23456 check inter 10000 fall 2 rise 2 weight 1 149 | ``` 150 | 修改保存后,重启haproxy服务; 151 | 152 | 这样我们就可以访问集群`master-VIP`的`80`端口,由haproxy代理转发到实际的node节点和nodePort端口上了。这时可以修改客户端本机 `hosts`文件如下:(假定 master-VIP=192.168.1.10) 153 | 154 | ``` text 155 | 192.168.1.10 hello.test.com 156 | 192.168.1.10 traefik-ui.test.com 157 | ``` 158 | 打开浏览器输入域名 `http://hello.test.com` 和 `http://traefik-ui.test.com`可以正常访问。 159 | 160 | 161 | [前一篇](heapster.md) -- [目录](index.md) -- [后一篇](efk.md) 162 | -------------------------------------------------------------------------------- /docs/guide/kubedns.md: -------------------------------------------------------------------------------- 1 | ## 部署 kubedns 2 | 3 | kubedns 是 k8s 集群首先需要部署的,集群中的其他 pods 使用它提供域名解析服务;主要可以解析 `集群服务名` 和 `Pod hostname`; 4 | 5 | 配置文件参考 `https://github.com/kubernetes/kubernetes` 项目目录 `kubernetes/cluster/addons/dns` 6 | 7 | ### 安装 8 | 9 | **kubectl create -f /etc/ansible/manifests/kubedns/[kubedns.yaml](../../manifests/kubedns/kubedns.yaml)** 10 | 11 | + 注意deploy中使用的 serviceAccount `kube-dns`,该预定义的 ClusterRoleBinding system:kube-dns 将 kube-system 命名空间的 kube-dns ServiceAccount 与 system:kube-dns ClusterRole 绑定, 因此POD 具有访问 kube-apiserver DNS 相关 API 的权限; 12 | + 集群 pod默认继承 node的dns 解析,修改 kubelet服务启动参数 --resolv-conf="",可以更改这个特性,详见 kubelet 启动参数 13 | 14 | ### 验证 kubedns 15 | 16 | 新建一个测试nginx服务 17 | 18 | `kubectl run nginx --image=nginx --expose --port=80` 19 | 20 | 确认nginx服务 21 | 22 | ``` bash 23 | kubectl get pod|grep nginx 24 | nginx-7cbc4b4d9c-fl46v 1/1 Running 0 1m 25 | kubectl get svc|grep nginx 26 | nginx ClusterIP 10.68.33.167 80/TCP 1m 27 | ``` 28 | 29 | 测试pod busybox 30 | 31 | ``` bash 32 | kubectl run busybox --rm -it --image=busybox /bin/sh 33 | If you don't see a command prompt, try pressing enter. 34 | / # cat /etc/resolv.conf 35 | nameserver 10.68.0.2 36 | search default.svc.cluster.local. svc.cluster.local. cluster.local. 37 | options ndots:5 38 | # 测试集群内部服务解析 39 | / # nslookup nginx 40 | Server: 10.68.0.2 41 | Address 1: 10.68.0.2 kube-dns.kube-system.svc.cluster.local 42 | 43 | Name: nginx 44 | Address 1: 10.68.33.167 nginx.default.svc.cluster.local 45 | / # nslookup kubernetes 46 | Server: 10.68.0.2 47 | Address 1: 10.68.0.2 kube-dns.kube-system.svc.cluster.local 48 | 49 | Name: kubernetes 50 | Address 1: 10.68.0.1 kubernetes.default.svc.cluster.local 51 | # 测试外部域名的解析,默认集成node的dns解析 52 | / # nslookup www.baidu.com 53 | Server: 10.68.0.2 54 | Address 1: 10.68.0.2 kube-dns.kube-system.svc.cluster.local 55 | 56 | Name: www.baidu.com 57 | Address 1: 180.97.33.108 58 | Address 2: 180.97.33.107 59 | / # 60 | ``` 61 | 62 | [前一篇](index.md) -- [目录](index.md) -- [后一篇](dashboard.md) 63 | -------------------------------------------------------------------------------- /docs/guide/networkpolicy.md: -------------------------------------------------------------------------------- 1 | ## Network Policy 2 | -------------------------------------------------------------------------------- /docs/quickStart.md: -------------------------------------------------------------------------------- 1 | ## 快速指南 2 | 3 | 以下为基于Ubuntu 16.04/CentOS 7.4 快速体验k8s集群的测试、开发环境--AllinOne部署,觉得比官方的minikube方便、简单很多。 4 | 5 | ### 1.基础系统配置 6 | 7 | + 推荐内存2G/硬盘20G以上 8 | + 最小化安装`Ubuntu 16.04 server`或者`CentOS 7 Minimal` 9 | + 配置基础网络、更新源、SSH登陆等 10 | 11 | ### 2.安装依赖工具 12 | 13 | Ubuntu 16.04 请执行以下脚本: 14 | 15 | ``` bash 16 | # 文档中脚本默认均以root用户执行 17 | apt-get update && apt-get upgrade -y && apt-get dist-upgrade -y 18 | # 删除不要的默认安装 19 | apt-get purge ufw lxd lxd-client lxcfs lxc-common 20 | # 安装依赖工具 21 | apt-get install python2.7 git python-pip 22 | # Ubuntu16.04可能需要配置以下软连接 23 | ln -s /usr/bin/python2.7 /usr/bin/python 24 | ``` 25 | CentOS 7 请执行以下脚本: 26 | 27 | ``` bash 28 | # 文档中脚本默认均以root用户执行 29 | # 安装 epel 源并更新 30 | yum install epel-release -y 31 | yum update 32 | # 删除不要的默认安装 33 | yum erase firewalld firewalld-filesystem python-firewall -y 34 | # 安装依赖工具 35 | yum install git python python-pip -y 36 | ``` 37 | ### 3.ansible安装及准备 38 | 39 | ``` bash 40 | # 安装ansible (国内如果安装太慢可以直接用pip阿里云加速) 41 | #pip install pip --upgrade 42 | #pip install ansible 43 | pip install pip --upgrade -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com 44 | pip install --no-cache-dir ansible -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com 45 | # 配置ansible ssh密钥登陆 46 | ssh-keygen -t rsa -b 2048 回车 回车 回车 47 | ssh-copy-id $IP #$IP为本虚机地址,按照提示输入yes 和root密码 48 | ``` 49 | ### 4.安装kubernetes集群 50 | ``` bash 51 | git clone https://github.com/gjmzj/kubeasz.git 52 | mv kubeasz /etc/ansible 53 | # 下载已打包好的binaries,并且解压缩到/etc/ansible/bin目录 54 | # 国内请从分享的百度云链接下载 https://pan.baidu.com/s/1c4RFaA 55 | # 如果你有合适网络环境也可以按照/down/download.sh自行从官网下载各种tar包到 ./down目录,并执行download.sh 56 | tar zxvf k8s.191.tar.gz 57 | mv bin/* /etc/ansible/bin 58 | # 配置ansible的hosts文件 59 | cd /etc/ansible 60 | cp example/hosts.allinone.example hosts 61 | 然后根据实际情况修改此hosts文件,所有节点都是本虚机IP 62 | # 采用一步安装或者分步安装 63 | ansible-playbook 90.setup.yml # 一步安装 64 | #ansible-playbook 01.prepare.yml 65 | #ansible-playbook 02.etcd.yml 66 | #ansible-playbook 03.kubectl.yml 67 | #ansible-playbook 04.docker.yml 68 | #ansible-playbook 05.kube-master.yml 69 | #ansible-playbook 06.kube-node.yml 70 | # 网络只可选择calico flannel一种安装 71 | #ansible-playbook 07.calico.yml 72 | #ansible-playbook 07.flannel.yml 73 | ``` 74 | 如果执行成功,k8s集群就安装好了。详细分步讲解请查看项目目录 `/docs` 下相关文档 75 | 76 | ### 5.验证安装 77 | ``` bash 78 | # 如果提示kubectl: command not found,退出重新ssh登陆一下,环境变量生效即可 79 | kubectl version 80 | kubectl get componentstatus # 可以看到scheduler/controller-manager/etcd等组件 Healthy 81 | kubectl cluster-info # 可以看到kubernetes master(apiserver)组件 running 82 | kubectl get node # 可以看到单 node Ready状态 83 | kubectl get pod --all-namespaces # 可以查看所有集群pod状态 84 | kubectl get svc --all-namespaces # 可以查看所有集群服务状态 85 | ``` 86 | ### 6.安装主要组件 87 | ``` bash 88 | # 安装kubedns 89 | kubectl create -f /etc/ansible/manifests/kubedns 90 | # 安装heapster 91 | kubectl create -f /etc/ansible/manifests/heapster 92 | # 安装dashboard 93 | kubectl create -f /etc/ansible/manifests/dashboard 94 | ``` 95 | + 更新后`dashboard`已经默认关闭非安全端口访问,请使用`https://xx.xx.xx.xx:6443/api/v1/namespaces/kube-system/services/kubernetes-dashboard/proxy`访问,并用默认用户 `admin:test1234` 登陆,更多内容请查阅[dashboard文档](guide/dashboard.md) 96 | 97 | ### 7.清理集群 98 | 99 | 以上步骤创建的K8S开发测试环境请尽情折腾,碰到错误尽量通过查看日志、上网搜索、提交`issues`等方式解决;当然如果是彻底奔溃了,可以清理集群后重新创建。 100 | 101 | 一步清理:`ansible-playbook 99.clean.yml` 102 | -------------------------------------------------------------------------------- /docs/upgrade.md: -------------------------------------------------------------------------------- 1 | ## 升级注意事项 2 | 3 | ### v1.8 >>> v1.9 4 | 5 | + 1.下载最新项目代码 `cd /etc/ansible && git pull origin master` 6 | + 2.下载新的二进制 `k8s.190.tar.gz` 解压并覆盖 `/etc/ansible/bin/` 目录下文件 7 | + 3.更新集群 `cd /etc/ansible && ansible-playbook 90.setup.yml` 8 | + 4.[可选]升级`calico-kube-controllers`相关,在任一node节点执行如下 9 | 10 | ``` bash 11 | cd /root/kube-system/calico 12 | kubectl delete deploy calico-kube-controllers -n kube-system 13 | kubectl create -f calico-kube-controllers.yaml 14 | ``` 15 | 16 | 注1:升级过程会短暂中断集群中已经运行的应用;如果你想要零中断升级,可以在熟悉项目安装原理基础上自行尝试,或者关注后续项目[使用指南]中的文档更新 17 | 18 | 注2:k8s集群v1.8升级v1.9.0,目前测试不用修改任何服务参数,只要替换二进制文件; 19 | -------------------------------------------------------------------------------- /down/download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #主要组件版本如下 3 | export K8S_VER=v1.9.1 4 | export ETCD_VER=v3.2.13 5 | export DOCKER_VER=17.12.0-ce 6 | export CNI_VER=v0.6.0 7 | export DOCKER_COMPOSE=1.18.0 8 | export HARBOR=v1.2.2 9 | 10 | echo "\n建议直接下载本人打包好的所有必要二进制包k8s-***.all.tar.gz,然后解压到bin目录" 11 | echo "\n建议不使用此脚本,如果你想升级组件或者实验,请通读该脚本,必要时适当修改后使用" 12 | echo "\n注意1:请按照以下链接手动下载二进制包到down目录中" 13 | echo "\n注意2:如果还没有手工下载tar包,请Ctrl-c结束此脚本" 14 | 15 | echo "\n----download k8s binary at:" 16 | echo https://dl.k8s.io/${K8S_VER}/kubernetes-server-linux-amd64.tar.gz 17 | 18 | echo "\n----download etcd binary at:" 19 | echo https://github.com/coreos/etcd/releases/download/${ETCD_VER}/etcd-${ETCD_VER}-linux-amd64.tar.gz 20 | echo https://storage.googleapis.com/etcd/${ETCD_VER}/etcd-${ETCD_VER}-linux-amd64.tar.gz 21 | 22 | echo "\n----download docker binary at:" 23 | echo https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_VER}.tgz 24 | 25 | echo "\n----download ca tools at:" 26 | echo https://pkg.cfssl.org/R1.2/cfssl_linux-amd64 27 | echo https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64 28 | echo https://pkg.cfssl.org/R1.2/cfssl-certinfo_linux-amd64 29 | 30 | echo "\n----download docker-compose at:" 31 | echo https://github.com/docker/compose/releases/download/${DOCKER_COMPOSE}/docker-compose-Linux-x86_64 32 | 33 | echo "\n----download harbor-offline-installer at:" 34 | echo https://github.com/vmware/harbor/releases/download/${HARBOR}/harbor-offline-installer-${HARBOR}.tgz 35 | 36 | echo "\n----download cni plugins at:" 37 | echo https://github.com/containernetworking/plugins/releases/download/${CNI_VER}/cni-${CNI_VER}.tgz 38 | 39 | sleep 30 40 | 41 | ### 准备证书工具程序 42 | echo "\n准备证书工具程序..." 43 | if [ -f "cfssl_linux-amd64" ]; then 44 | mv cfssl_linux-amd64 ../bin/cfssl 45 | else 46 | echo 请先下载https://pkg.cfssl.org/R1.2/cfssl_linux-amd64 47 | fi 48 | if [ -f "cfssljson_linux-amd64" ]; then 49 | mv cfssljson_linux-amd64 ../bin/cfssljson 50 | else 51 | echo 请先下载https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64 52 | fi 53 | if [ -f "cfssl-certinfo_linux-amd64" ]; then 54 | mv cfssl-certinfo_linux-amd64 ../bin/cfssl-certinfo 55 | else 56 | echo 请先下载https://pkg.cfssl.org/R1.2/cfssl-certinfo_linux-amd64 57 | fi 58 | 59 | ### 准备etcd程序 60 | echo "\n准备etcd二进制程序..." 61 | if [ -f "etcd-${ETCD_VER}-linux-amd64.tar.gz" ]; then 62 | echo "\nextracting etcd binaries..." 63 | tar zxf etcd-${ETCD_VER}-linux-amd64.tar.gz 64 | mv etcd-${ETCD_VER}-linux-amd64/etcd* ../bin 65 | else 66 | echo 请先下载etcd-${ETCD_VER}-linux-amd64.tar.gz 67 | fi 68 | 69 | ### 准备kubernetes程序 70 | echo "\n准备kubernetes二进制程序..." 71 | if [ -f "kubernetes-server-linux-amd64.tar.gz" ]; then 72 | echo "\nextracting kubernetes binaries..." 73 | tar zxf kubernetes-server-linux-amd64.tar.gz 74 | mv kubernetes/server/bin/kube-apiserver ../bin 75 | mv kubernetes/server/bin/kube-controller-manager ../bin 76 | mv kubernetes/server/bin/kubectl ../bin 77 | mv kubernetes/server/bin/kubelet ../bin 78 | mv kubernetes/server/bin/kube-proxy ../bin 79 | mv kubernetes/server/bin/kube-scheduler ../bin 80 | else 81 | echo 请先下载kubernetes-server-linux-amd64.tar.gz 82 | fi 83 | 84 | ### 准备docker程序 85 | echo "\n准备docker二进制程序..." 86 | if [ -f "docker-${DOCKER_VER}.tgz" ]; then 87 | echo "\nextracting docker binaries..." 88 | tar zxf docker-${DOCKER_VER}.tgz 89 | mv docker/docker* ../bin 90 | if [ -f "docker/completion/bash/docker" ]; then 91 | mv -f docker/completion/bash/docker ../roles/docker/files/docker 92 | fi 93 | else 94 | echo 请先下载docker-${DOCKER_VER}.tgz 95 | fi 96 | 97 | ### 准备cni plugins,仅安装flannel需要,安装calico由容器专门下载cni plugins 98 | echo "\n准备cni plugins,仅安装flannel需要,安装calico由容器专门下载cni plugins..." 99 | if [ -f "cni-${CNI_VER}.tgz" ]; then 100 | echo "\nextracting cni plugins binaries..." 101 | tar zxf cni-${CNI_VER}.tgz 102 | mv bridge ../bin 103 | mv flannel ../bin 104 | mv host-local ../bin 105 | mv loopback ../bin 106 | mv portmap ../bin 107 | else 108 | echo 请先下载cni-${CNI_VER}.tgz 109 | fi 110 | -------------------------------------------------------------------------------- /example/hosts.allinone.example: -------------------------------------------------------------------------------- 1 | # 部署节点:运行ansible 脚本的节点 2 | [deploy] 3 | 192.168.1.1 4 | 5 | # etcd集群请提供如下NODE_NAME、NODE_IP变量 6 | # 请注意etcd集群必须是1,3,5,7...奇数个节点 7 | [etcd] 8 | 192.168.1.1 NODE_NAME=etcd1 NODE_IP="192.168.1.1" 9 | 10 | [kube-master] 11 | 192.168.1.1 NODE_IP="192.168.1.1" 12 | 13 | #确保node节点有变量NODE_ID=node1 14 | [kube-node] 15 | 192.168.1.1 NODE_ID=node1 NODE_IP="192.168.1.1" 16 | 17 | [kube-cluster:children] 18 | kube-node 19 | kube-master 20 | 21 | # 如果启用harbor,请配置后面harbor相关参数 22 | [harbor] 23 | #192.168.1.8 NODE_IP="192.168.1.8" 24 | 25 | # 预留组,后续添加node节点使用 26 | [new-node] 27 | #192.168.1.xx NODE_ID=node6 NODE_IP="192.168.1.xx" 28 | 29 | [all:vars] 30 | # ---------集群主要参数--------------- 31 | #集群 MASTER IP 32 | MASTER_IP="192.168.1.1" 33 | 34 | #集群 APISERVER 35 | KUBE_APISERVER="https://192.168.1.1:6443" 36 | 37 | #pause镜像地址 38 | POD_INFRA_CONTAINER_IMAGE=mirrorgooglecontainers/pause-amd64:3.0 39 | 40 | #TLS Bootstrapping 使用的 Token,使用 head -c 16 /dev/urandom | od -An -t x | tr -d ' ' 生成 41 | BOOTSTRAP_TOKEN="d18f94b5fa585c7123f56803d925d2e7" 42 | 43 | # 集群网络插件,目前支持calico和flannel 44 | CLUSTER_NETWORK="calico" 45 | 46 | # 部分calico相关配置,更全配置可以去roles/calico/templates/calico.yaml.j2自定义 47 | # 设置 CALICO_IPV4POOL_IPIP=“off”,可以提高网络性能,条件限制详见 05.安装calico网络组件.md 48 | CALICO_IPV4POOL_IPIP="always" 49 | # 设置 calico-node使用的host IP,bgp邻居通过该地址建立,可手动指定端口"interface=eth0"或使用如下自动发现 50 | IP_AUTODETECTION_METHOD="can-reach=223.5.5.5" 51 | 52 | # 部分flannel配置,详见roles/flannel/templates/kube-flannel.yaml.j2 53 | FLANNEL_BACKEND="vxlan" 54 | 55 | # 服务网段 (Service CIDR),部署前路由不可达,部署后集群内使用 IP:Port 可达 56 | SERVICE_CIDR="10.68.0.0/16" 57 | 58 | # POD 网段 (Cluster CIDR),部署前路由不可达,**部署后**路由可达 59 | CLUSTER_CIDR="172.20.0.0/16" 60 | 61 | # 服务端口范围 (NodePort Range) 62 | NODE_PORT_RANGE="20000-40000" 63 | 64 | # kubernetes 服务 IP (预分配,一般是 SERVICE_CIDR 中第一个IP) 65 | CLUSTER_KUBERNETES_SVC_IP="10.68.0.1" 66 | 67 | # 集群 DNS 服务 IP (从 SERVICE_CIDR 中预分配) 68 | CLUSTER_DNS_SVC_IP="10.68.0.2" 69 | 70 | # 集群 DNS 域名 71 | CLUSTER_DNS_DOMAIN="cluster.local." 72 | 73 | # etcd 集群间通信的IP和端口, **根据实际 etcd 集群成员设置** 74 | ETCD_NODES="etcd1=https://192.168.1.1:2380" 75 | 76 | # etcd 集群服务地址列表, **根据实际 etcd 集群成员设置** 77 | ETCD_ENDPOINTS="https://192.168.1.1:2379" 78 | 79 | # 集群basic auth 使用的用户名和密码 80 | BASIC_AUTH_USER="admin" 81 | BASIC_AUTH_PASS="test1234" 82 | 83 | # ---------附加参数-------------------- 84 | #默认二进制文件目录 85 | bin_dir="/root/local/bin" 86 | 87 | #证书目录 88 | ca_dir="/etc/kubernetes/ssl" 89 | 90 | #部署目录,即 ansible 工作目录 91 | base_dir="/etc/ansible" 92 | 93 | #私有仓库 harbor服务器 (域名或者IP) 94 | #HARBOR_IP="192.168.1.8" 95 | #HARBOR_DOMAIN="harbor.yourdomain.com" 96 | -------------------------------------------------------------------------------- /example/hosts.m-masters.example: -------------------------------------------------------------------------------- 1 | # 部署节点:运行这份 ansible 脚本的节点 2 | [deploy] 3 | 192.168.1.1 4 | 5 | # etcd集群请提供如下NODE_NAME、NODE_IP变量 6 | # 请注意etcd集群必须是1,3,5,7...奇数个节点 7 | [etcd] 8 | 192.168.1.1 NODE_NAME=etcd1 NODE_IP="192.168.1.1" 9 | 192.168.1.2 NODE_NAME=etcd2 NODE_IP="192.168.1.2" 10 | 192.168.1.3 NODE_NAME=etcd3 NODE_IP="192.168.1.3" 11 | 12 | [kube-master] 13 | 192.168.1.1 NODE_IP="192.168.1.1" 14 | 192.168.1.2 NODE_IP="192.168.1.2" 15 | 16 | # 负载均衡至少两个节点,安装 haproxy+keepalived 17 | # 根据master节点数量同步修改roles/lb/templates/haproxy.cfg.j2 18 | [lb] 19 | 192.168.1.1 LB_IF="eth0" LB_ROLE=backup 20 | 192.168.1.2 LB_IF="eth0" LB_ROLE=master 21 | [lb:vars] 22 | LB_EP1="192.168.1.1:6443" # api-server 实际成员地址端口 23 | LB_EP2="192.168.1.2:6443" # api-server 实际成员地址端口 24 | MASTER_IP="192.168.1.10" # api-server 虚地址 25 | MASTER_PORT="8443" # api-server 服务端口 26 | 27 | #确保node节点有变量NODE_ID=node1 28 | [kube-node] 29 | 192.168.1.2 NODE_ID=node1 NODE_IP="192.168.1.2" 30 | 192.168.1.3 NODE_ID=node2 NODE_IP="192.168.1.3" 31 | 192.168.1.4 NODE_ID=node3 NODE_IP="192.168.1.4" 32 | 33 | [kube-cluster:children] 34 | kube-node 35 | kube-master 36 | 37 | # 如果启用harbor,请配置后面harbor相关参数 38 | [harbor] 39 | #192.168.1.8 NODE_IP="192.168.1.8" 40 | 41 | # 预留组,后续添加node节点使用 42 | [new-node] 43 | #192.168.1.xx NODE_ID=node6 NODE_IP="192.168.1.xx" 44 | #192.168.1.xx NODE_ID=node7 NODE_IP="192.168.1.xx" 45 | 46 | [all:vars] 47 | # ---------集群主要参数--------------- 48 | #集群 MASTER IP, 需要负载均衡,一般为VIP地址 49 | MASTER_IP="192.168.1.10" 50 | KUBE_APISERVER="https://192.168.1.10:8443" 51 | 52 | #pause镜像地址 53 | POD_INFRA_CONTAINER_IMAGE=mirrorgooglecontainers/pause-amd64:3.0 54 | 55 | #TLS Bootstrapping 使用的 Token,使用 head -c 16 /dev/urandom | od -An -t x | tr -d ' ' 生成 56 | BOOTSTRAP_TOKEN="c30302226d4b810e08731702d3890f50" 57 | 58 | # 集群网络插件,目前支持calico和flannel 59 | CLUSTER_NETWORK="calico" 60 | 61 | # 部分calico相关配置,更全配置可以去roles/calico/templates/calico.yaml.j2自定义 62 | # 设置 CALICO_IPV4POOL_IPIP=“off”,可以提高网络性能,条件限制详见 05.安装calico网络组件.md 63 | CALICO_IPV4POOL_IPIP="always" 64 | # 设置 calico-node使用的host IP,bgp邻居通过该地址建立,可手动指定端口"interface=eth0"或使用如下自动发现 65 | IP_AUTODETECTION_METHOD="can-reach=223.5.5.5" 66 | 67 | # 部分flannel配置,详见roles/flannel/templates/kube-flannel.yaml.j2 68 | FLANNEL_BACKEND="vxlan" 69 | 70 | # 服务网段 (Service CIDR),部署前路由不可达,部署后集群内使用 IP:Port 可达 71 | SERVICE_CIDR="10.68.0.0/16" 72 | 73 | # POD 网段 (Cluster CIDR),部署前路由不可达,**部署后**路由可达 74 | CLUSTER_CIDR="172.20.0.0/16" 75 | 76 | # 服务端口范围 (NodePort Range) 77 | NODE_PORT_RANGE="20000-40000" 78 | 79 | # kubernetes 服务 IP (预分配,一般是 SERVICE_CIDR 中第一个IP) 80 | CLUSTER_KUBERNETES_SVC_IP="10.68.0.1" 81 | 82 | # 集群 DNS 服务 IP (从 SERVICE_CIDR 中预分配) 83 | CLUSTER_DNS_SVC_IP="10.68.0.2" 84 | 85 | # 集群 DNS 域名 86 | CLUSTER_DNS_DOMAIN="cluster.local." 87 | 88 | # etcd 集群间通信的IP和端口, **根据实际 etcd 集群成员设置** 89 | ETCD_NODES="etcd1=https://192.168.1.1:2380,etcd2=https://192.168.1.2:2380,etcd3=https://192.168.1.3:2380" 90 | 91 | # etcd 集群服务地址列表, **根据实际 etcd 集群成员设置** 92 | ETCD_ENDPOINTS="https://192.168.1.1:2379,https://192.168.1.2:2379,https://192.168.1.3:2379" 93 | 94 | # 集群basic auth 使用的用户名和密码 95 | BASIC_AUTH_USER="admin" 96 | BASIC_AUTH_PASS="test1234" 97 | 98 | # ---------附加参数-------------------- 99 | #默认二进制文件目录 100 | bin_dir="/root/local/bin" 101 | 102 | #证书目录 103 | ca_dir="/etc/kubernetes/ssl" 104 | 105 | #部署目录,即 ansible 工作目录,建议不要修改 106 | base_dir="/etc/ansible" 107 | 108 | #私有仓库 harbor服务器 (域名或者IP) 109 | #HARBOR_IP="192.168.1.8" 110 | #HARBOR_DOMAIN="harbor.yourdomain.com" 111 | -------------------------------------------------------------------------------- /example/hosts.s-master.example: -------------------------------------------------------------------------------- 1 | # 部署节点:运行ansible 脚本的节点 2 | [deploy] 3 | 192.168.1.1 4 | 5 | # etcd集群请提供如下NODE_NAME、NODE_IP变量 6 | # 请注意etcd集群必须是1,3,5,7...奇数个节点 7 | [etcd] 8 | 192.168.1.1 NODE_NAME=etcd1 NODE_IP="192.168.1.1" 9 | 192.168.1.2 NODE_NAME=etcd2 NODE_IP="192.168.1.2" 10 | 192.168.1.3 NODE_NAME=etcd3 NODE_IP="192.168.1.3" 11 | 12 | [kube-master] 13 | 192.168.1.1 NODE_IP="192.168.1.1" 14 | 15 | #确保node节点有变量NODE_ID=node1 16 | [kube-node] 17 | 192.168.1.1 NODE_ID=node1 NODE_IP="192.168.1.1" 18 | 192.168.1.2 NODE_ID=node2 NODE_IP="192.168.1.2" 19 | 192.168.1.3 NODE_ID=node3 NODE_IP="192.168.1.3" 20 | 21 | [kube-cluster:children] 22 | kube-node 23 | kube-master 24 | 25 | # 如果启用harbor,请配置后面harbor相关参数 26 | [harbor] 27 | #192.168.1.8 NODE_IP="192.168.1.8" 28 | 29 | # 预留组,后续添加node节点使用 30 | [new-node] 31 | #192.168.1.xx NODE_ID=node6 NODE_IP="192.168.1.xx" 32 | 33 | [all:vars] 34 | # ---------集群主要参数--------------- 35 | #集群 MASTER IP 36 | MASTER_IP="192.168.1.1" 37 | 38 | #集群 APISERVER 39 | KUBE_APISERVER="https://192.168.1.1:6443" 40 | 41 | #pause镜像地址 42 | POD_INFRA_CONTAINER_IMAGE=mirrorgooglecontainers/pause-amd64:3.0 43 | 44 | #TLS Bootstrapping 使用的 Token,使用 head -c 16 /dev/urandom | od -An -t x | tr -d ' ' 生成 45 | BOOTSTRAP_TOKEN="d18f94b5fa585c7123f56803d925d2e7" 46 | 47 | # 集群网络插件,目前支持calico和flannel 48 | CLUSTER_NETWORK="calico" 49 | 50 | # 部分calico相关配置,更全配置可以去roles/calico/templates/calico.yaml.j2自定义 51 | # 设置 CALICO_IPV4POOL_IPIP=“off”,可以提高网络性能,条件限制详见 05.安装calico网络组件.md 52 | CALICO_IPV4POOL_IPIP="always" 53 | # 设置 calico-node使用的host IP,bgp邻居通过该地址建立,可手动指定端口"interface=eth0"或使用如下自动发现 54 | IP_AUTODETECTION_METHOD="can-reach=223.5.5.5" 55 | 56 | # 部分flannel配置,详见roles/flannel/templates/kube-flannel.yaml.j2 57 | FLANNEL_BACKEND="vxlan" 58 | 59 | # 服务网段 (Service CIDR),部署前路由不可达,部署后集群内使用 IP:Port 可达 60 | SERVICE_CIDR="10.68.0.0/16" 61 | 62 | # POD 网段 (Cluster CIDR),部署前路由不可达,**部署后**路由可达 63 | CLUSTER_CIDR="172.20.0.0/16" 64 | 65 | # 服务端口范围 (NodePort Range) 66 | NODE_PORT_RANGE="20000-40000" 67 | 68 | # kubernetes 服务 IP (预分配,一般是 SERVICE_CIDR 中第一个IP) 69 | CLUSTER_KUBERNETES_SVC_IP="10.68.0.1" 70 | 71 | # 集群 DNS 服务 IP (从 SERVICE_CIDR 中预分配) 72 | CLUSTER_DNS_SVC_IP="10.68.0.2" 73 | 74 | # 集群 DNS 域名 75 | CLUSTER_DNS_DOMAIN="cluster.local." 76 | 77 | # etcd 集群间通信的IP和端口, **根据实际 etcd 集群成员设置** 78 | ETCD_NODES="etcd1=https://192.168.1.1:2380,etcd2=https://192.168.1.2:2380,etcd3=https://192.168.1.3:2380" 79 | 80 | # etcd 集群服务地址列表, **根据实际 etcd 集群成员设置** 81 | ETCD_ENDPOINTS="https://192.168.1.1:2379,https://192.168.1.2:2379,https://192.168.1.3:2379" 82 | 83 | # 集群basic auth 使用的用户名和密码 84 | BASIC_AUTH_USER="admin" 85 | BASIC_AUTH_PASS="test1234" 86 | 87 | # ---------附加参数-------------------- 88 | #默认二进制文件目录 89 | bin_dir="/root/local/bin" 90 | 91 | #证书目录 92 | ca_dir="/etc/kubernetes/ssl" 93 | 94 | #部署目录,即 ansible 工作目录 95 | base_dir="/etc/ansible" 96 | 97 | #私有仓库 harbor服务器 (域名或者IP) 98 | #HARBOR_IP="192.168.1.8" 99 | #HARBOR_DOMAIN="harbor.yourdomain.com" 100 | -------------------------------------------------------------------------------- /manifests/dashboard/kubernetes-dashboard.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Configuration to deploy release version of the Dashboard UI compatible with 16 | # Kubernetes 1.6 (RBAC enabled). 17 | # 18 | # Example usage: kubectl create -f 19 | 20 | apiVersion: v1 21 | kind: ServiceAccount 22 | metadata: 23 | labels: 24 | k8s-app: kubernetes-dashboard 25 | name: kubernetes-dashboard 26 | namespace: kube-system 27 | --- 28 | apiVersion: rbac.authorization.k8s.io/v1 29 | kind: ClusterRoleBinding 30 | metadata: 31 | name: kubernetes-dashboard 32 | labels: 33 | k8s-app: kubernetes-dashboard 34 | roleRef: 35 | apiGroup: rbac.authorization.k8s.io 36 | kind: ClusterRole 37 | name: cluster-admin 38 | subjects: 39 | - kind: ServiceAccount 40 | name: kubernetes-dashboard 41 | namespace: kube-system 42 | --- 43 | kind: Deployment 44 | apiVersion: apps/v1 45 | metadata: 46 | labels: 47 | k8s-app: kubernetes-dashboard 48 | name: kubernetes-dashboard 49 | namespace: kube-system 50 | spec: 51 | replicas: 1 52 | revisionHistoryLimit: 10 53 | selector: 54 | matchLabels: 55 | k8s-app: kubernetes-dashboard 56 | template: 57 | metadata: 58 | labels: 59 | k8s-app: kubernetes-dashboard 60 | spec: 61 | containers: 62 | - name: kubernetes-dashboard 63 | #image: gcr.io/google_containers/kubernetes-dashboard-amd64:v1.6.3 64 | image: mirrorgooglecontainers/kubernetes-dashboard-amd64:v1.6.3 65 | ports: 66 | - containerPort: 9090 67 | protocol: TCP 68 | args: 69 | # Uncomment the following line to manually specify Kubernetes API server Host 70 | # If not specified, Dashboard will attempt to auto discover the API server and connect 71 | # to it. Uncomment only if the default does not work. 72 | # - --apiserver-host=http://my-address:port 73 | livenessProbe: 74 | httpGet: 75 | path: / 76 | port: 9090 77 | initialDelaySeconds: 30 78 | timeoutSeconds: 30 79 | serviceAccountName: kubernetes-dashboard 80 | # Comment the following tolerations if Dashboard must not be deployed on master 81 | tolerations: 82 | - key: node-role.kubernetes.io/master 83 | effect: NoSchedule 84 | --- 85 | kind: Service 86 | apiVersion: v1 87 | metadata: 88 | labels: 89 | k8s-app: kubernetes-dashboard 90 | kubernetes.io/cluster-service: "true" 91 | addonmanager.kubernetes.io/mode: Reconcile 92 | name: kubernetes-dashboard 93 | namespace: kube-system 94 | spec: 95 | ports: 96 | - port: 80 97 | targetPort: 9090 98 | selector: 99 | k8s-app: kubernetes-dashboard 100 | type: NodePort 101 | -------------------------------------------------------------------------------- /manifests/dashboard/ui-admin-rbac.yaml: -------------------------------------------------------------------------------- 1 | kind: ClusterRole 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | metadata: 4 | name: ui-admin 5 | rules: 6 | - apiGroups: 7 | - "" 8 | resources: 9 | - services 10 | - services/proxy 11 | verbs: 12 | - '*' 13 | 14 | --- 15 | apiVersion: rbac.authorization.k8s.io/v1 16 | kind: RoleBinding 17 | metadata: 18 | name: ui-admin-binding 19 | namespace: kube-system 20 | roleRef: 21 | apiGroup: rbac.authorization.k8s.io 22 | kind: ClusterRole 23 | name: ui-admin 24 | subjects: 25 | - apiGroup: rbac.authorization.k8s.io 26 | kind: User 27 | name: admin 28 | -------------------------------------------------------------------------------- /manifests/dashboard/ui-read-rbac.yaml: -------------------------------------------------------------------------------- 1 | kind: ClusterRole 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | metadata: 4 | name: ui-read 5 | rules: 6 | - apiGroups: 7 | - "" 8 | resources: 9 | - services 10 | - services/proxy 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | 16 | --- 17 | apiVersion: rbac.authorization.k8s.io/v1 18 | kind: RoleBinding 19 | metadata: 20 | name: ui-read-binding 21 | namespace: kube-system 22 | roleRef: 23 | apiGroup: rbac.authorization.k8s.io 24 | kind: ClusterRole 25 | name: ui-read 26 | subjects: 27 | - apiGroup: rbac.authorization.k8s.io 28 | kind: User 29 | name: readonly 30 | -------------------------------------------------------------------------------- /manifests/heapster/grafana.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: monitoring-grafana 6 | namespace: kube-system 7 | spec: 8 | replicas: 1 9 | selector: 10 | matchLabels: 11 | k8s-app: grafana 12 | template: 13 | metadata: 14 | labels: 15 | task: monitoring 16 | k8s-app: grafana 17 | spec: 18 | containers: 19 | - name: grafana 20 | #image: gcr.io/google_containers/heapster-grafana-amd64:v4.2.0 21 | image: mirrorgooglecontainers/heapster-grafana-amd64:v4.4.3 22 | ports: 23 | - containerPort: 3000 24 | protocol: TCP 25 | volumeMounts: 26 | - mountPath: /var 27 | name: grafana-storage 28 | env: 29 | - name: INFLUXDB_HOST 30 | value: monitoring-influxdb 31 | - name: GF_SERVER_HTTP_PORT 32 | value: "3000" 33 | # The following env variables are required to make Grafana accessible via 34 | # the kubernetes api-server proxy. On production clusters, we recommend 35 | # removing these env variables, setup auth for grafana, and expose the grafana 36 | # service using a LoadBalancer or a public IP. 37 | - name: GF_AUTH_BASIC_ENABLED 38 | value: "false" 39 | - name: GF_AUTH_ANONYMOUS_ENABLED 40 | value: "true" 41 | - name: GF_AUTH_ANONYMOUS_ORG_ROLE 42 | value: Admin 43 | - name: GF_SERVER_ROOT_URL 44 | # If you're only using the API Server proxy, set this value instead: 45 | value: /api/v1/namespaces/kube-system/services/monitoring-grafana/proxy/ 46 | #value: / 47 | volumes: 48 | - name: grafana-storage 49 | emptyDir: {} 50 | --- 51 | apiVersion: v1 52 | kind: Service 53 | metadata: 54 | labels: 55 | # For use as a Cluster add-on (https://github.com/kubernetes/kubernetes/tree/master/cluster/addons) 56 | # If you are NOT using this as an addon, you should comment out this line. 57 | kubernetes.io/cluster-service: 'true' 58 | kubernetes.io/name: monitoring-grafana 59 | name: monitoring-grafana 60 | namespace: kube-system 61 | spec: 62 | # In a production setup, we recommend accessing Grafana through an external Loadbalancer 63 | # or through a public IP. 64 | # type: LoadBalancer 65 | # You could also use NodePort to expose the service at a randomly-generated port 66 | # type: NodePort 67 | ports: 68 | - port: 80 69 | targetPort: 3000 70 | selector: 71 | k8s-app: grafana 72 | -------------------------------------------------------------------------------- /manifests/heapster/heapster.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: heapster 6 | namespace: kube-system 7 | --- 8 | 9 | apiVersion: rbac.authorization.k8s.io/v1 10 | kind: ClusterRoleBinding 11 | metadata: 12 | name: heapster 13 | subjects: 14 | - kind: ServiceAccount 15 | name: heapster 16 | namespace: kube-system 17 | roleRef: 18 | kind: ClusterRole 19 | name: system:heapster 20 | apiGroup: rbac.authorization.k8s.io 21 | --- 22 | 23 | apiVersion: apps/v1 24 | kind: Deployment 25 | metadata: 26 | name: heapster 27 | namespace: kube-system 28 | spec: 29 | replicas: 1 30 | selector: 31 | matchLabels: 32 | k8s-app: heapster 33 | template: 34 | metadata: 35 | labels: 36 | task: monitoring 37 | k8s-app: heapster 38 | spec: 39 | serviceAccountName: heapster 40 | containers: 41 | - name: heapster 42 | #image: gcr.io/google_containers/heapster-amd64:v1.3.0 43 | image: mirrorgooglecontainers/heapster-amd64:v1.3.0 44 | imagePullPolicy: IfNotPresent 45 | command: 46 | - /heapster 47 | - --source=kubernetes:https://kubernetes.default 48 | - --sink=influxdb:http://monitoring-influxdb.kube-system.svc:8086 49 | --- 50 | apiVersion: v1 51 | kind: Service 52 | metadata: 53 | labels: 54 | task: monitoring 55 | # For use as a Cluster add-on (https://github.com/kubernetes/kubernetes/tree/master/cluster/addons) 56 | # If you are NOT using this as an addon, you should comment out this line. 57 | #kubernetes.io/cluster-service: 'true' 58 | kubernetes.io/name: Heapster 59 | name: heapster 60 | namespace: kube-system 61 | spec: 62 | ports: 63 | - port: 80 64 | targetPort: 8082 65 | selector: 66 | k8s-app: heapster 67 | -------------------------------------------------------------------------------- /manifests/heapster/influxdb.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: monitoring-influxdb 6 | namespace: kube-system 7 | spec: 8 | replicas: 1 9 | selector: 10 | matchLabels: 11 | k8s-app: influxdb 12 | template: 13 | metadata: 14 | labels: 15 | task: monitoring 16 | k8s-app: influxdb 17 | spec: 18 | containers: 19 | - name: influxdb 20 | #image: gcr.io/google_containers/heapster-influxdb-amd64:v1.1.1 21 | image: mirrorgooglecontainers/heapster-influxdb-amd64:v1.1.1 22 | volumeMounts: 23 | - mountPath: /data 24 | name: influxdb-storage 25 | - mountPath: /etc/ 26 | name: influxdb-config 27 | volumes: 28 | - name: influxdb-storage 29 | emptyDir: {} 30 | - name: influxdb-config 31 | configMap: 32 | name: influxdb-config 33 | --- 34 | apiVersion: v1 35 | kind: Service 36 | metadata: 37 | labels: 38 | task: monitoring 39 | # For use as a Cluster add-on (https://github.com/kubernetes/kubernetes/tree/master/cluster/addons) 40 | # If you are NOT using this as an addon, you should comment out this line. 41 | # kubernetes.io/cluster-service: 'true' 42 | kubernetes.io/name: monitoring-influxdb 43 | name: monitoring-influxdb 44 | namespace: kube-system 45 | spec: 46 | type: NodePort 47 | ports: 48 | - port: 8086 49 | targetPort: 8086 50 | name: http 51 | - port: 8083 52 | targetPort: 8083 53 | name: admin 54 | selector: 55 | k8s-app: influxdb 56 | --- 57 | 58 | apiVersion: v1 59 | kind: ConfigMap 60 | metadata: 61 | name: influxdb-config 62 | namespace: kube-system 63 | data: 64 | config.toml: | 65 | reporting-disabled = true 66 | bind-address = ":8088" 67 | 68 | [meta] 69 | dir = "/data/meta" 70 | retention-autocreate = true 71 | logging-enabled = true 72 | 73 | [data] 74 | dir = "/data/data" 75 | wal-dir = "/data/wal" 76 | query-log-enabled = true 77 | cache-max-memory-size = 1073741824 78 | cache-snapshot-memory-size = 26214400 79 | cache-snapshot-write-cold-duration = "10m0s" 80 | compact-full-write-cold-duration = "4h0m0s" 81 | max-series-per-database = 1000000 82 | max-values-per-tag = 100000 83 | trace-logging-enabled = false 84 | 85 | [coordinator] 86 | write-timeout = "10s" 87 | max-concurrent-queries = 0 88 | query-timeout = "0s" 89 | log-queries-after = "0s" 90 | max-select-point = 0 91 | max-select-series = 0 92 | max-select-buckets = 0 93 | 94 | [retention] 95 | enabled = true 96 | check-interval = "30m0s" 97 | 98 | [admin] 99 | enabled = true 100 | bind-address = ":8083" 101 | https-enabled = false 102 | https-certificate = "/etc/ssl/influxdb.pem" 103 | 104 | [shard-precreation] 105 | enabled = true 106 | check-interval = "10m0s" 107 | advance-period = "30m0s" 108 | 109 | [monitor] 110 | store-enabled = true 111 | store-database = "_internal" 112 | store-interval = "10s" 113 | 114 | [subscriber] 115 | enabled = true 116 | http-timeout = "30s" 117 | insecure-skip-verify = false 118 | ca-certs = "" 119 | write-concurrency = 40 120 | write-buffer-size = 1000 121 | 122 | [http] 123 | enabled = true 124 | bind-address = ":8086" 125 | auth-enabled = false 126 | log-enabled = true 127 | write-tracing = false 128 | pprof-enabled = false 129 | https-enabled = false 130 | https-certificate = "/etc/ssl/influxdb.pem" 131 | https-private-key = "" 132 | max-row-limit = 10000 133 | max-connection-limit = 0 134 | shared-secret = "" 135 | realm = "InfluxDB" 136 | unix-socket-enabled = false 137 | bind-socket = "/var/run/influxdb.sock" 138 | 139 | [[graphite]] 140 | enabled = false 141 | bind-address = ":2003" 142 | database = "graphite" 143 | retention-policy = "" 144 | protocol = "tcp" 145 | batch-size = 5000 146 | batch-pending = 10 147 | batch-timeout = "1s" 148 | consistency-level = "one" 149 | separator = "." 150 | udp-read-buffer = 0 151 | 152 | [[collectd]] 153 | enabled = false 154 | bind-address = ":25826" 155 | database = "collectd" 156 | retention-policy = "" 157 | batch-size = 5000 158 | batch-pending = 10 159 | batch-timeout = "10s" 160 | read-buffer = 0 161 | typesdb = "/usr/share/collectd/types.db" 162 | 163 | [[opentsdb]] 164 | enabled = false 165 | bind-address = ":4242" 166 | database = "opentsdb" 167 | retention-policy = "" 168 | consistency-level = "one" 169 | tls-enabled = false 170 | certificate = "/etc/ssl/influxdb.pem" 171 | batch-size = 1000 172 | batch-pending = 5 173 | batch-timeout = "1s" 174 | log-point-errors = true 175 | 176 | [[udp]] 177 | enabled = false 178 | bind-address = ":8089" 179 | database = "udp" 180 | retention-policy = "" 181 | batch-size = 5000 182 | batch-pending = 10 183 | read-buffer = 0 184 | batch-timeout = "1s" 185 | precision = "" 186 | 187 | [continuous_queries] 188 | log-enabled = true 189 | enabled = true 190 | run-interval = "1s" 191 | -------------------------------------------------------------------------------- /manifests/ingress/test-hello.ing.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Ingress 3 | metadata: 4 | name: test-hello 5 | spec: 6 | rules: 7 | - host: hello.test.com 8 | http: 9 | paths: 10 | - path: / 11 | backend: 12 | serviceName: test-hello 13 | servicePort: 80 14 | -------------------------------------------------------------------------------- /manifests/ingress/traefik-ingress.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: ClusterRole 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | metadata: 5 | name: traefik-ingress-controller 6 | rules: 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - pods 11 | - services 12 | - endpoints 13 | - secrets 14 | verbs: 15 | - get 16 | - list 17 | - watch 18 | - apiGroups: 19 | - extensions 20 | resources: 21 | - ingresses 22 | verbs: 23 | - get 24 | - list 25 | - watch 26 | --- 27 | kind: ClusterRoleBinding 28 | apiVersion: rbac.authorization.k8s.io/v1 29 | metadata: 30 | name: traefik-ingress-controller 31 | roleRef: 32 | apiGroup: rbac.authorization.k8s.io 33 | kind: ClusterRole 34 | name: traefik-ingress-controller 35 | subjects: 36 | - kind: ServiceAccount 37 | name: traefik-ingress-controller 38 | namespace: kube-system 39 | --- 40 | apiVersion: v1 41 | kind: ServiceAccount 42 | metadata: 43 | name: traefik-ingress-controller 44 | namespace: kube-system 45 | --- 46 | kind: Deployment 47 | apiVersion: apps/v1 48 | metadata: 49 | name: traefik-ingress-controller 50 | namespace: kube-system 51 | labels: 52 | k8s-app: traefik-ingress-lb 53 | spec: 54 | replicas: 1 55 | selector: 56 | matchLabels: 57 | k8s-app: traefik-ingress-lb 58 | template: 59 | metadata: 60 | labels: 61 | k8s-app: traefik-ingress-lb 62 | name: traefik-ingress-lb 63 | spec: 64 | serviceAccountName: traefik-ingress-controller 65 | terminationGracePeriodSeconds: 60 66 | containers: 67 | - image: traefik 68 | name: traefik-ingress-lb 69 | args: 70 | - --web 71 | - --kubernetes 72 | --- 73 | kind: Service 74 | apiVersion: v1 75 | metadata: 76 | name: traefik-ingress-service 77 | namespace: kube-system 78 | spec: 79 | selector: 80 | k8s-app: traefik-ingress-lb 81 | ports: 82 | - protocol: TCP 83 | # 该端口为 traefik ingress-controller的服务端口 84 | port: 80 85 | # 集群hosts文件中设置的 NODE_PORT_RANGE 作为 NodePort的可用范围 86 | # 从默认20000~40000之间选一个可用端口,让ingress-controller暴露给外部的访问 87 | nodePort: 23456 88 | name: web 89 | - protocol: TCP 90 | # 该端口为 traefik 的管理WEB界面 91 | port: 8080 92 | name: admin 93 | type: NodePort 94 | -------------------------------------------------------------------------------- /manifests/ingress/traefik-ui.ing.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: extensions/v1beta1 3 | kind: Ingress 4 | metadata: 5 | name: traefik-web-ui 6 | namespace: kube-system 7 | spec: 8 | rules: 9 | - host: traefik-ui.test.com 10 | http: 11 | paths: 12 | - path: / 13 | backend: 14 | serviceName: traefik-ingress-service 15 | servicePort: 8080 16 | -------------------------------------------------------------------------------- /manifests/kubedns/kubedns.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | name: kube-dns 6 | namespace: kube-system 7 | labels: 8 | addonmanager.kubernetes.io/mode: EnsureExists 9 | 10 | --- 11 | apiVersion: v1 12 | kind: ServiceAccount 13 | metadata: 14 | name: kube-dns 15 | namespace: kube-system 16 | labels: 17 | #kubernetes.io/cluster-service: "true" 18 | addonmanager.kubernetes.io/mode: Reconcile 19 | 20 | --- 21 | apiVersion: v1 22 | kind: Service 23 | metadata: 24 | name: kube-dns 25 | namespace: kube-system 26 | labels: 27 | k8s-app: kube-dns 28 | #kubernetes.io/cluster-service: "true" 29 | addonmanager.kubernetes.io/mode: Reconcile 30 | kubernetes.io/name: "KubeDNS" 31 | spec: 32 | selector: 33 | k8s-app: kube-dns 34 | clusterIP: 10.68.0.2 35 | ports: 36 | - name: dns 37 | port: 53 38 | protocol: UDP 39 | - name: dns-tcp 40 | port: 53 41 | protocol: TCP 42 | 43 | --- 44 | apiVersion: apps/v1 45 | kind: Deployment 46 | metadata: 47 | name: kube-dns 48 | namespace: kube-system 49 | labels: 50 | k8s-app: kube-dns 51 | #kubernetes.io/cluster-service: "true" 52 | addonmanager.kubernetes.io/mode: Reconcile 53 | spec: 54 | # replicas: not specified here: 55 | # 1. In order to make Addon Manager do not reconcile this replicas parameter. 56 | # 2. Default is 1. 57 | # 3. Will be tuned in real time if DNS horizontal auto-scaling is turned on. 58 | strategy: 59 | rollingUpdate: 60 | maxSurge: 10% 61 | maxUnavailable: 0 62 | selector: 63 | matchLabels: 64 | k8s-app: kube-dns 65 | template: 66 | metadata: 67 | labels: 68 | k8s-app: kube-dns 69 | annotations: 70 | scheduler.alpha.kubernetes.io/critical-pod: '' 71 | spec: 72 | tolerations: 73 | - key: "CriticalAddonsOnly" 74 | operator: "Exists" 75 | volumes: 76 | - name: kube-dns-config 77 | configMap: 78 | name: kube-dns 79 | optional: true 80 | containers: 81 | - name: kubedns 82 | #image: gcr.io/google_containers/k8s-dns-kube-dns-amd64:1.14.5 83 | image: mirrorgooglecontainers/k8s-dns-kube-dns-amd64:1.14.5 84 | resources: 85 | # TODO: Set memory limits when we've profiled the container for large 86 | # clusters, then set request = limit to keep this container in 87 | # guaranteed class. Currently, this container falls into the 88 | # "burstable" category so the kubelet doesn't backoff from restarting it. 89 | limits: 90 | memory: 170Mi 91 | requests: 92 | cpu: 100m 93 | memory: 70Mi 94 | livenessProbe: 95 | httpGet: 96 | path: /healthcheck/kubedns 97 | port: 10054 98 | scheme: HTTP 99 | initialDelaySeconds: 60 100 | timeoutSeconds: 5 101 | successThreshold: 1 102 | failureThreshold: 5 103 | readinessProbe: 104 | httpGet: 105 | path: /readiness 106 | port: 8081 107 | scheme: HTTP 108 | # we poll on pod startup for the Kubernetes master service and 109 | # only setup the /readiness HTTP server once that's available. 110 | initialDelaySeconds: 3 111 | timeoutSeconds: 5 112 | args: 113 | - --domain=cluster.local. 114 | - --dns-port=10053 115 | - --config-dir=/kube-dns-config 116 | - --v=2 117 | env: 118 | - name: PROMETHEUS_PORT 119 | value: "10055" 120 | ports: 121 | - containerPort: 10053 122 | name: dns-local 123 | protocol: UDP 124 | - containerPort: 10053 125 | name: dns-tcp-local 126 | protocol: TCP 127 | - containerPort: 10055 128 | name: metrics 129 | protocol: TCP 130 | volumeMounts: 131 | - name: kube-dns-config 132 | mountPath: /kube-dns-config 133 | - name: dnsmasq 134 | #image: gcr.io/google_containers/k8s-dns-dnsmasq-nanny-amd64:1.14.5 135 | image: mirrorgooglecontainers/k8s-dns-dnsmasq-nanny-amd64:1.14.5 136 | livenessProbe: 137 | httpGet: 138 | path: /healthcheck/dnsmasq 139 | port: 10054 140 | scheme: HTTP 141 | initialDelaySeconds: 60 142 | timeoutSeconds: 5 143 | successThreshold: 1 144 | failureThreshold: 5 145 | args: 146 | - -v=2 147 | - -logtostderr 148 | - -configDir=/etc/k8s/dns/dnsmasq-nanny 149 | - -restartDnsmasq=true 150 | - -- 151 | - -k 152 | - --cache-size=1000 153 | - --log-facility=- 154 | - --server=/cluster.local./127.0.0.1#10053 155 | - --server=/in-addr.arpa/127.0.0.1#10053 156 | - --server=/ip6.arpa/127.0.0.1#10053 157 | ports: 158 | - containerPort: 53 159 | name: dns 160 | protocol: UDP 161 | - containerPort: 53 162 | name: dns-tcp 163 | protocol: TCP 164 | # see: https://github.com/kubernetes/kubernetes/issues/29055 for details 165 | resources: 166 | requests: 167 | cpu: 150m 168 | memory: 20Mi 169 | volumeMounts: 170 | - name: kube-dns-config 171 | mountPath: /etc/k8s/dns/dnsmasq-nanny 172 | - name: sidecar 173 | #image: gcr.io/google_containers/k8s-dns-sidecar-amd64:1.14.5 174 | image: mirrorgooglecontainers/k8s-dns-sidecar-amd64:1.14.5 175 | livenessProbe: 176 | httpGet: 177 | path: /metrics 178 | port: 10054 179 | scheme: HTTP 180 | initialDelaySeconds: 60 181 | timeoutSeconds: 5 182 | successThreshold: 1 183 | failureThreshold: 5 184 | args: 185 | - --v=2 186 | - --logtostderr 187 | - --probe=kubedns,127.0.0.1:10053,kubernetes.default.svc.cluster.local.,5,A 188 | - --probe=dnsmasq,127.0.0.1:53,kubernetes.default.svc.cluster.local.,5,A 189 | ports: 190 | - containerPort: 10054 191 | name: metrics 192 | protocol: TCP 193 | resources: 194 | requests: 195 | memory: 20Mi 196 | cpu: 10m 197 | dnsPolicy: Default # Don't use cluster DNS. 198 | serviceAccountName: kube-dns 199 | -------------------------------------------------------------------------------- /manifests/kubedns/readme.md: -------------------------------------------------------------------------------- 1 | ### 说明 2 | 3 | + 本目录为k8s集群的插件 kube-dns的配置目录 4 | + 因kubedns.yaml文件中参数(CLUSTER_DNS_SVC_IP, CLUSTER_DNS_DOMAIN)根据hosts文件设置而定,需要使用ansible template模块替换参数后生成 5 | + 运行 `ansible-playbook 01.prepare.yml`后会重新生成该目录下的kubedns.yaml 文件 6 | + kubedns.yaml [模板文件](../../roles/deploy/templates/kubedns.yaml.j2) 7 | -------------------------------------------------------------------------------- /pics/alipay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mendickxiao/kubeasz/c8987d051027b6d157da57041bb9d4a4c38075dd/pics/alipay.png -------------------------------------------------------------------------------- /pics/ansible.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mendickxiao/kubeasz/c8987d051027b6d157da57041bb9d4a4c38075dd/pics/ansible.jpg -------------------------------------------------------------------------------- /pics/docker.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mendickxiao/kubeasz/c8987d051027b6d157da57041bb9d4a4c38075dd/pics/docker.jpg -------------------------------------------------------------------------------- /pics/grafana.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mendickxiao/kubeasz/c8987d051027b6d157da57041bb9d4a4c38075dd/pics/grafana.png -------------------------------------------------------------------------------- /pics/influxdb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mendickxiao/kubeasz/c8987d051027b6d157da57041bb9d4a4c38075dd/pics/influxdb.png -------------------------------------------------------------------------------- /pics/kube.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mendickxiao/kubeasz/c8987d051027b6d157da57041bb9d4a4c38075dd/pics/kube.jpg -------------------------------------------------------------------------------- /roles/calico/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 创建calico 相关目录 2 | file: name={{ item }} state=directory 3 | with_items: 4 | - /etc/calico/ssl 5 | - /root/local/kube-system/calico 6 | 7 | - name: 复制CA 证书到calico 证书目录 8 | copy: src={{ ca_dir }}/ca.pem dest=/etc/calico/ssl/ca.pem 9 | 10 | - name: 创建calico 证书请求 11 | template: src=calico-csr.json.j2 dest=/etc/calico/ssl/calico-csr.json 12 | 13 | - name: 创建 calico证书和私钥 14 | shell: "cd /etc/calico/ssl && {{ bin_dir }}/cfssl gencert \ 15 | -ca={{ ca_dir }}/ca.pem \ 16 | -ca-key={{ ca_dir }}/ca-key.pem \ 17 | -config={{ ca_dir }}/ca-config.json \ 18 | -profile=kubernetes calico-csr.json | {{ bin_dir }}/cfssljson -bare calico" 19 | 20 | - name: 准备 calico DaemonSet yaml文件 21 | template: src=calico.yaml.j2 dest=/root/local/kube-system/calico/calico.yaml 22 | 23 | - name: 准备 calico rbac文件 24 | template: src=calico-rbac.yaml.j2 dest=/root/local/kube-system/calico/calico-rbac.yaml 25 | 26 | # 只需单节点执行一次,重复执行的报错可以忽略 27 | - name: 运行 calico网络 28 | shell: "{{ bin_dir }}/kubectl create -f /root/local/kube-system/calico/ && sleep 15" 29 | when: NODE_ID is defined and NODE_ID == "node1" 30 | ignore_errors: true 31 | 32 | # 删除原有cni配置 33 | - name: 删除默认cni配置 34 | file: path=/etc/cni/net.d/10-default.conf state=absent 35 | 36 | # 删除原有cni插件网卡mynet0 37 | - name: 删除默认cni插件网卡mynet0 38 | shell: "ip link del mynet0" 39 | ignore_errors: true 40 | 41 | # [可选]cni calico plugins 已经在calico.yaml完成自动安装 42 | - name: 下载calicoctl 客户端 43 | copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755 44 | with_items: 45 | #- calico 46 | #- calico-ipam 47 | #- loopback 48 | - calicoctl 49 | 50 | - name: 准备 calicoctl配置文件 51 | template: src=calicoctl.cfg.j2 dest=/etc/calico/calicoctl.cfg 52 | -------------------------------------------------------------------------------- /roles/calico/templates/calico-csr.json.j2: -------------------------------------------------------------------------------- 1 | { 2 | "CN": "calico", 3 | "hosts": [], 4 | "key": { 5 | "algo": "rsa", 6 | "size": 2048 7 | }, 8 | "names": [ 9 | { 10 | "C": "CN", 11 | "ST": "HangZhou", 12 | "L": "XS", 13 | "O": "k8s", 14 | "OU": "System" 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /roles/calico/templates/calico-rbac.yaml.j2: -------------------------------------------------------------------------------- 1 | # Calico Version v2.6.5 2 | # https://docs.projectcalico.org/v2.6/releases#v2.6.5 3 | 4 | --- 5 | 6 | kind: ClusterRole 7 | apiVersion: rbac.authorization.k8s.io/v1 8 | metadata: 9 | name: calico-kube-controllers 10 | rules: 11 | - apiGroups: 12 | - "" 13 | - extensions 14 | resources: 15 | - pods 16 | - namespaces 17 | - networkpolicies 18 | - nodes 19 | verbs: 20 | - watch 21 | - list 22 | --- 23 | kind: ClusterRoleBinding 24 | apiVersion: rbac.authorization.k8s.io/v1 25 | metadata: 26 | name: calico-kube-controllers 27 | roleRef: 28 | apiGroup: rbac.authorization.k8s.io 29 | kind: ClusterRole 30 | name: calico-kube-controllers 31 | subjects: 32 | - kind: ServiceAccount 33 | name: calico-kube-controllers 34 | namespace: kube-system 35 | 36 | --- 37 | 38 | kind: ClusterRole 39 | apiVersion: rbac.authorization.k8s.io/v1 40 | metadata: 41 | name: calico-node 42 | rules: 43 | - apiGroups: [""] 44 | resources: 45 | - pods 46 | - nodes 47 | verbs: 48 | - get 49 | 50 | --- 51 | 52 | apiVersion: rbac.authorization.k8s.io/v1 53 | kind: ClusterRoleBinding 54 | metadata: 55 | name: calico-node 56 | roleRef: 57 | apiGroup: rbac.authorization.k8s.io 58 | kind: ClusterRole 59 | name: calico-node 60 | subjects: 61 | - kind: ServiceAccount 62 | name: calico-node 63 | namespace: kube-system 64 | 65 | -------------------------------------------------------------------------------- /roles/calico/templates/calico.yaml.j2: -------------------------------------------------------------------------------- 1 | # Calico Version v2.6.5 2 | # https://docs.projectcalico.org/v2.6/releases#v2.6.5 3 | # This manifest includes the following component versions: 4 | # calico/node:v2.6.5 5 | # calico/cni:v1.11.2 6 | # calico/kube-controllers:v1.0.2 7 | 8 | # This ConfigMap is used to configure a self-hosted Calico installation. 9 | kind: ConfigMap 10 | apiVersion: v1 11 | metadata: 12 | name: calico-config 13 | namespace: kube-system 14 | data: 15 | # Configure this with the location of your etcd cluster. 16 | etcd_endpoints: "{{ ETCD_ENDPOINTS }}" 17 | 18 | # Configure the Calico backend to use. 19 | calico_backend: "bird" 20 | 21 | # The CNI network configuration to install on each node. 22 | cni_network_config: |- 23 | { 24 | "name": "k8s-pod-network", 25 | "cniVersion": "0.1.0", 26 | "type": "calico", 27 | "etcd_endpoints": "{{ ETCD_ENDPOINTS }}", 28 | "etcd_key_file": "/etc/calico/ssl/calico-key.pem", 29 | "etcd_cert_file": "/etc/calico/ssl/calico.pem", 30 | "etcd_ca_cert_file": "/etc/calico/ssl/ca.pem", 31 | "log_level": "info", 32 | "mtu": 1500, 33 | "ipam": { 34 | "type": "calico-ipam" 35 | }, 36 | "policy": { 37 | "type": "k8s" 38 | }, 39 | "kubernetes": { 40 | "kubeconfig": "/root/.kube/config" 41 | } 42 | } 43 | 44 | # If you're using TLS enabled etcd uncomment the following. 45 | # You must also populate the Secret below with these files. 46 | etcd_ca: "/calico-secrets/ca.pem" 47 | etcd_cert: "/calico-secrets/calico.pem" 48 | etcd_key: "/calico-secrets/calico-key.pem" 49 | --- 50 | 51 | # This manifest installs the calico/node container, as well 52 | # as the Calico CNI plugins and network config on 53 | # each master and worker node in a Kubernetes cluster. 54 | kind: DaemonSet 55 | apiVersion: extensions/v1beta1 56 | metadata: 57 | name: calico-node 58 | namespace: kube-system 59 | labels: 60 | k8s-app: calico-node 61 | spec: 62 | selector: 63 | matchLabels: 64 | k8s-app: calico-node 65 | template: 66 | metadata: 67 | labels: 68 | k8s-app: calico-node 69 | annotations: 70 | scheduler.alpha.kubernetes.io/critical-pod: '' 71 | scheduler.alpha.kubernetes.io/tolerations: | 72 | [{"key": "dedicated", "value": "master", "effect": "NoSchedule" }, 73 | {"key":"CriticalAddonsOnly", "operator":"Exists"}] 74 | spec: 75 | hostNetwork: true 76 | serviceAccountName: calico-node 77 | # Minimize downtime during a rolling upgrade or deletion; tell Kubernetes to do a "force 78 | # deletion": https://kubernetes.io/docs/concepts/workloads/pods/pod/#termination-of-pods. 79 | terminationGracePeriodSeconds: 0 80 | containers: 81 | # Runs calico/node container on each Kubernetes node. This 82 | # container programs network policy and routes on each 83 | # host. 84 | - name: calico-node 85 | #image: quay.io/calico/node:v2.6.5 86 | image: calico/node:v2.6.5 87 | env: 88 | # The location of the Calico etcd cluster. 89 | - name: ETCD_ENDPOINTS 90 | valueFrom: 91 | configMapKeyRef: 92 | name: calico-config 93 | key: etcd_endpoints 94 | # Choose the backend to use. 95 | - name: CALICO_NETWORKING_BACKEND 96 | valueFrom: 97 | configMapKeyRef: 98 | name: calico-config 99 | key: calico_backend 100 | # Cluster type to identify the deployment type 101 | - name: CLUSTER_TYPE 102 | value: "k8s,bgp" 103 | # Disable file logging so `kubectl logs` works. 104 | - name: CALICO_DISABLE_FILE_LOGGING 105 | value: "true" 106 | # Set Felix endpoint to host default action to ACCEPT. 107 | - name: FELIX_DEFAULTENDPOINTTOHOSTACTION 108 | value: "ACCEPT" 109 | # Configure the IP Pool from which Pod IPs will be chosen. 110 | - name: CALICO_IPV4POOL_CIDR 111 | value: "{{ CLUSTER_CIDR }}" 112 | - name: CALICO_IPV4POOL_IPIP 113 | value: "{{ CALICO_IPV4POOL_IPIP }}" 114 | # Set noderef for node controller. 115 | - name: CALICO_K8S_NODE_REF 116 | valueFrom: 117 | fieldRef: 118 | fieldPath: spec.nodeName 119 | # Disable IPv6 on Kubernetes. 120 | - name: FELIX_IPV6SUPPORT 121 | value: "false" 122 | # Set Felix logging to "info" 123 | - name: FELIX_LOGSEVERITYSCREEN 124 | value: "info" 125 | # Set MTU for tunnel device used if ipip is enabled 126 | - name: FELIX_IPINIPMTU 127 | value: "1440" 128 | # Location of the CA certificate for etcd. 129 | - name: ETCD_CA_CERT_FILE 130 | valueFrom: 131 | configMapKeyRef: 132 | name: calico-config 133 | key: etcd_ca 134 | # Location of the client key for etcd. 135 | - name: ETCD_KEY_FILE 136 | valueFrom: 137 | configMapKeyRef: 138 | name: calico-config 139 | key: etcd_key 140 | # Location of the client certificate for etcd. 141 | - name: ETCD_CERT_FILE 142 | valueFrom: 143 | configMapKeyRef: 144 | name: calico-config 145 | key: etcd_cert 146 | # Auto-detect the BGP IP address. 147 | - name: IP 148 | value: "" 149 | - name: IP_AUTODETECTION_METHOD 150 | value: "{{ IP_AUTODETECTION_METHOD }}" 151 | - name: FELIX_HEALTHENABLED 152 | value: "true" 153 | securityContext: 154 | privileged: true 155 | resources: 156 | requests: 157 | cpu: 250m 158 | livenessProbe: 159 | httpGet: 160 | path: /liveness 161 | port: 9099 162 | periodSeconds: 10 163 | initialDelaySeconds: 10 164 | failureThreshold: 6 165 | readinessProbe: 166 | httpGet: 167 | path: /readiness 168 | port: 9099 169 | periodSeconds: 10 170 | volumeMounts: 171 | - mountPath: /lib/modules 172 | name: lib-modules 173 | readOnly: true 174 | - mountPath: /var/run/calico 175 | name: var-run-calico 176 | readOnly: false 177 | - mountPath: /calico-secrets 178 | name: etcd-certs 179 | # This container installs the Calico CNI binaries 180 | # and CNI network config file on each node. 181 | - name: install-cni 182 | #image: quay.io/calico/cni:v1.11.2 183 | image: calico/cni:v1.11.2 184 | command: ["/install-cni.sh"] 185 | env: 186 | # The location of the Calico etcd cluster. 187 | - name: ETCD_ENDPOINTS 188 | valueFrom: 189 | configMapKeyRef: 190 | name: calico-config 191 | key: etcd_endpoints 192 | # The CNI network config to install on each node. 193 | - name: CNI_NETWORK_CONFIG 194 | valueFrom: 195 | configMapKeyRef: 196 | name: calico-config 197 | key: cni_network_config 198 | volumeMounts: 199 | - mountPath: /host/opt/cni/bin 200 | name: cni-bin-dir 201 | - mountPath: /host/etc/cni/net.d 202 | name: cni-net-dir 203 | - mountPath: /calico-secrets 204 | name: etcd-certs 205 | volumes: 206 | # Used by calico/node. 207 | - name: lib-modules 208 | hostPath: 209 | path: /lib/modules 210 | - name: var-run-calico 211 | hostPath: 212 | path: /var/run/calico 213 | # Used to install CNI. 214 | - name: cni-bin-dir 215 | hostPath: 216 | path: {{ bin_dir }} 217 | - name: cni-net-dir 218 | hostPath: 219 | path: /etc/cni/net.d 220 | # Mount in the etcd TLS secrets. 221 | - name: etcd-certs 222 | hostPath: 223 | path: /etc/calico/ssl 224 | 225 | --- 226 | 227 | # This manifest deploys the Calico Kubernetes controllers. 228 | # See https://github.com/projectcalico/kube-controllers 229 | apiVersion: extensions/v1beta1 230 | kind: Deployment 231 | metadata: 232 | name: calico-kube-controllers 233 | namespace: kube-system 234 | labels: 235 | k8s-app: calico-kube-controllers 236 | annotations: 237 | scheduler.alpha.kubernetes.io/critical-pod: '' 238 | scheduler.alpha.kubernetes.io/tolerations: | 239 | [{"key": "dedicated", "value": "master", "effect": "NoSchedule" }, 240 | {"key":"CriticalAddonsOnly", "operator":"Exists"}] 241 | spec: 242 | # The controllers can only have a single active instance. 243 | replicas: 1 244 | strategy: 245 | type: Recreate 246 | selector: 247 | matchLabels: 248 | k8s-app: calico-kube-controllers 249 | template: 250 | metadata: 251 | name: calico-kube-controllers 252 | namespace: kube-system 253 | labels: 254 | k8s-app: calico-kube-controllers 255 | spec: 256 | # The controllers must run in the host network namespace so that 257 | # it isn't governed by policy that would prevent it from working. 258 | hostNetwork: true 259 | serviceAccountName: calico-kube-controllers 260 | containers: 261 | - name: calico-kube-controllers 262 | #image: quay.io/calico/kube-controllers:v1.0.2 263 | image: calico/kube-controllers:v1.0.2 264 | env: 265 | # The location of the Calico etcd cluster. 266 | - name: ETCD_ENDPOINTS 267 | valueFrom: 268 | configMapKeyRef: 269 | name: calico-config 270 | key: etcd_endpoints 271 | # Location of the CA certificate for etcd. 272 | - name: ETCD_CA_CERT_FILE 273 | valueFrom: 274 | configMapKeyRef: 275 | name: calico-config 276 | key: etcd_ca 277 | # Location of the client key for etcd. 278 | - name: ETCD_KEY_FILE 279 | valueFrom: 280 | configMapKeyRef: 281 | name: calico-config 282 | key: etcd_key 283 | # Location of the client certificate for etcd. 284 | - name: ETCD_CERT_FILE 285 | valueFrom: 286 | configMapKeyRef: 287 | name: calico-config 288 | key: etcd_cert 289 | # Choose which controllers to run. 290 | - name: ENABLED_CONTROLLERS 291 | value: policy,profile,workloadendpoint,node 292 | volumeMounts: 293 | # Mount in the etcd TLS secrets. 294 | - mountPath: /calico-secrets 295 | name: etcd-certs 296 | volumes: 297 | # Mount in the etcd TLS secrets. 298 | - name: etcd-certs 299 | hostPath: 300 | path: /etc/calico/ssl 301 | 302 | --- 303 | 304 | apiVersion: v1 305 | kind: ServiceAccount 306 | metadata: 307 | name: calico-kube-controllers 308 | namespace: kube-system 309 | 310 | --- 311 | 312 | apiVersion: v1 313 | kind: ServiceAccount 314 | metadata: 315 | name: calico-node 316 | namespace: kube-system 317 | -------------------------------------------------------------------------------- /roles/calico/templates/calicoctl.cfg.j2: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: calicoApiConfig 3 | metadata: 4 | spec: 5 | datastoreType: "etcdv2" 6 | etcdEndpoints: {{ ETCD_ENDPOINTS }} 7 | etcdKeyFile: /etc/calico/ssl/calico-key.pem 8 | etcdCertFile: /etc/calico/ssl/calico.pem 9 | etcdCACertFile: /etc/calico/ssl/ca.pem 10 | -------------------------------------------------------------------------------- /roles/deploy/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: prepare some dirs 2 | file: name={{ item }} state=directory 3 | with_items: 4 | - "{{ bin_dir }}" 5 | - "{{ ca_dir }}" 6 | - "{{ base_dir }}/roles/prepare/files/" 7 | 8 | - name: 下载证书工具 CFSSL 9 | copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755 10 | with_items: 11 | - cfssl 12 | - cfssl-certinfo 13 | - cfssljson 14 | 15 | - name: 准备CA配置文件 16 | template: src=ca-config.json.j2 dest={{ ca_dir }}/ca-config.json 17 | 18 | - name: 准备CA签名请求 19 | template: src=ca-csr.json.j2 dest={{ ca_dir }}/ca-csr.json 20 | 21 | - name: 生成 CA 证书和私钥 22 | shell: "cd {{ ca_dir }} && {{ bin_dir }}/cfssl gencert -initca ca-csr.json | {{ bin_dir }}/cfssljson -bare ca" 23 | 24 | # 为了保证整个安装的幂等性,如果已经生成过CA证书,就使用已经存在的CA;删除/roles/prepare/files/ca* 可以使用新CA 证书 25 | - name: 准备分发 CA证书 26 | copy: src={{ ca_dir }}/{{ item }} dest={{ base_dir }}/roles/prepare/files/{{ item }} force=no 27 | with_items: 28 | - ca.pem 29 | - ca-key.pem 30 | - ca.csr 31 | - ca-config.json 32 | 33 | # kubedns.yaml文件中部分参数根据hosts文件设置而定,因此需要用template模块替换参数 34 | - name: 准备 kubedns的部署文件 kubedns.yaml 35 | template: src=kubedns.yaml.j2 dest={{ base_dir }}/manifests/kubedns/kubedns.yaml 36 | 37 | -------------------------------------------------------------------------------- /roles/deploy/templates/ca-config.json.j2: -------------------------------------------------------------------------------- 1 | { 2 | "signing": { 3 | "default": { 4 | "expiry": "87600h" 5 | }, 6 | "profiles": { 7 | "kubernetes": { 8 | "usages": [ 9 | "signing", 10 | "key encipherment", 11 | "server auth", 12 | "client auth" 13 | ], 14 | "expiry": "87600h" 15 | } 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /roles/deploy/templates/ca-csr.json.j2: -------------------------------------------------------------------------------- 1 | { 2 | "CN": "kubernetes", 3 | "key": { 4 | "algo": "rsa", 5 | "size": 2048 6 | }, 7 | "names": [ 8 | { 9 | "C": "CN", 10 | "ST": "HangZhou", 11 | "L": "XS", 12 | "O": "k8s", 13 | "OU": "System" 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /roles/deploy/templates/kubedns.yaml.j2: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | name: kube-dns 6 | namespace: kube-system 7 | labels: 8 | addonmanager.kubernetes.io/mode: EnsureExists 9 | 10 | --- 11 | apiVersion: v1 12 | kind: ServiceAccount 13 | metadata: 14 | name: kube-dns 15 | namespace: kube-system 16 | labels: 17 | #kubernetes.io/cluster-service: "true" 18 | addonmanager.kubernetes.io/mode: Reconcile 19 | 20 | --- 21 | apiVersion: v1 22 | kind: Service 23 | metadata: 24 | name: kube-dns 25 | namespace: kube-system 26 | labels: 27 | k8s-app: kube-dns 28 | #kubernetes.io/cluster-service: "true" 29 | addonmanager.kubernetes.io/mode: Reconcile 30 | kubernetes.io/name: "KubeDNS" 31 | spec: 32 | selector: 33 | k8s-app: kube-dns 34 | clusterIP: {{ CLUSTER_DNS_SVC_IP }} 35 | ports: 36 | - name: dns 37 | port: 53 38 | protocol: UDP 39 | - name: dns-tcp 40 | port: 53 41 | protocol: TCP 42 | 43 | --- 44 | apiVersion: apps/v1 45 | kind: Deployment 46 | metadata: 47 | name: kube-dns 48 | namespace: kube-system 49 | labels: 50 | k8s-app: kube-dns 51 | #kubernetes.io/cluster-service: "true" 52 | addonmanager.kubernetes.io/mode: Reconcile 53 | spec: 54 | # replicas: not specified here: 55 | # 1. In order to make Addon Manager do not reconcile this replicas parameter. 56 | # 2. Default is 1. 57 | # 3. Will be tuned in real time if DNS horizontal auto-scaling is turned on. 58 | strategy: 59 | rollingUpdate: 60 | maxSurge: 10% 61 | maxUnavailable: 0 62 | selector: 63 | matchLabels: 64 | k8s-app: kube-dns 65 | template: 66 | metadata: 67 | labels: 68 | k8s-app: kube-dns 69 | annotations: 70 | scheduler.alpha.kubernetes.io/critical-pod: '' 71 | spec: 72 | tolerations: 73 | - key: "CriticalAddonsOnly" 74 | operator: "Exists" 75 | volumes: 76 | - name: kube-dns-config 77 | configMap: 78 | name: kube-dns 79 | optional: true 80 | containers: 81 | - name: kubedns 82 | #image: gcr.io/google_containers/k8s-dns-kube-dns-amd64:1.14.5 83 | image: mirrorgooglecontainers/k8s-dns-kube-dns-amd64:1.14.5 84 | resources: 85 | # TODO: Set memory limits when we've profiled the container for large 86 | # clusters, then set request = limit to keep this container in 87 | # guaranteed class. Currently, this container falls into the 88 | # "burstable" category so the kubelet doesn't backoff from restarting it. 89 | limits: 90 | memory: 170Mi 91 | requests: 92 | cpu: 100m 93 | memory: 70Mi 94 | livenessProbe: 95 | httpGet: 96 | path: /healthcheck/kubedns 97 | port: 10054 98 | scheme: HTTP 99 | initialDelaySeconds: 60 100 | timeoutSeconds: 5 101 | successThreshold: 1 102 | failureThreshold: 5 103 | readinessProbe: 104 | httpGet: 105 | path: /readiness 106 | port: 8081 107 | scheme: HTTP 108 | # we poll on pod startup for the Kubernetes master service and 109 | # only setup the /readiness HTTP server once that's available. 110 | initialDelaySeconds: 3 111 | timeoutSeconds: 5 112 | args: 113 | - --domain={{ CLUSTER_DNS_DOMAIN }} 114 | - --dns-port=10053 115 | - --config-dir=/kube-dns-config 116 | - --v=2 117 | env: 118 | - name: PROMETHEUS_PORT 119 | value: "10055" 120 | ports: 121 | - containerPort: 10053 122 | name: dns-local 123 | protocol: UDP 124 | - containerPort: 10053 125 | name: dns-tcp-local 126 | protocol: TCP 127 | - containerPort: 10055 128 | name: metrics 129 | protocol: TCP 130 | volumeMounts: 131 | - name: kube-dns-config 132 | mountPath: /kube-dns-config 133 | - name: dnsmasq 134 | #image: gcr.io/google_containers/k8s-dns-dnsmasq-nanny-amd64:1.14.5 135 | image: mirrorgooglecontainers/k8s-dns-dnsmasq-nanny-amd64:1.14.5 136 | livenessProbe: 137 | httpGet: 138 | path: /healthcheck/dnsmasq 139 | port: 10054 140 | scheme: HTTP 141 | initialDelaySeconds: 60 142 | timeoutSeconds: 5 143 | successThreshold: 1 144 | failureThreshold: 5 145 | args: 146 | - -v=2 147 | - -logtostderr 148 | - -configDir=/etc/k8s/dns/dnsmasq-nanny 149 | - -restartDnsmasq=true 150 | - -- 151 | - -k 152 | - --cache-size=1000 153 | - --log-facility=- 154 | - --server=/{{ CLUSTER_DNS_DOMAIN }}/127.0.0.1#10053 155 | - --server=/in-addr.arpa/127.0.0.1#10053 156 | - --server=/ip6.arpa/127.0.0.1#10053 157 | ports: 158 | - containerPort: 53 159 | name: dns 160 | protocol: UDP 161 | - containerPort: 53 162 | name: dns-tcp 163 | protocol: TCP 164 | # see: https://github.com/kubernetes/kubernetes/issues/29055 for details 165 | resources: 166 | requests: 167 | cpu: 150m 168 | memory: 20Mi 169 | volumeMounts: 170 | - name: kube-dns-config 171 | mountPath: /etc/k8s/dns/dnsmasq-nanny 172 | - name: sidecar 173 | #image: gcr.io/google_containers/k8s-dns-sidecar-amd64:1.14.5 174 | image: mirrorgooglecontainers/k8s-dns-sidecar-amd64:1.14.5 175 | livenessProbe: 176 | httpGet: 177 | path: /metrics 178 | port: 10054 179 | scheme: HTTP 180 | initialDelaySeconds: 60 181 | timeoutSeconds: 5 182 | successThreshold: 1 183 | failureThreshold: 5 184 | args: 185 | - --v=2 186 | - --logtostderr 187 | - --probe=kubedns,127.0.0.1:10053,kubernetes.default.svc.{{ CLUSTER_DNS_DOMAIN }},5,A 188 | - --probe=dnsmasq,127.0.0.1:53,kubernetes.default.svc.{{ CLUSTER_DNS_DOMAIN }},5,A 189 | ports: 190 | - containerPort: 10054 191 | name: metrics 192 | protocol: TCP 193 | resources: 194 | requests: 195 | memory: 20Mi 196 | cpu: 10m 197 | dnsPolicy: Default # Don't use cluster DNS. 198 | serviceAccountName: kube-dns 199 | -------------------------------------------------------------------------------- /roles/docker/files/daemon.json: -------------------------------------------------------------------------------- 1 | { 2 | "registry-mirrors": ["https://registry.docker-cn.com"], 3 | "max-concurrent-downloads": 6 4 | } 5 | -------------------------------------------------------------------------------- /roles/docker/files/docker-tag: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | curl -s -S "https://registry.hub.docker.com/v2/repositories/$@/tags/" | jq '."results"[]["name"]' |sort 3 | -------------------------------------------------------------------------------- /roles/docker/tasks/main.yml: -------------------------------------------------------------------------------- 1 | ## ---------docker daemon配置部分----------- 2 | - name: 下载 docker 二进制文件 3 | copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755 4 | with_items: 5 | - docker-containerd 6 | - docker-containerd-shim 7 | - docker-init 8 | - docker-runc 9 | - docker 10 | - docker-containerd-ctr 11 | - dockerd 12 | - docker-proxy 13 | 14 | - name: docker命令自动补全 15 | copy: src=docker dest=/etc/bash_completion.d/docker mode=0644 16 | 17 | - name: docker国内镜像加速 18 | copy: src=daemon.json dest=/etc/docker/daemon.json 19 | 20 | - name: flush-iptables 21 | shell: "iptables -F && iptables -X \ 22 | && iptables -F -t nat && iptables -X -t nat \ 23 | && iptables -F -t raw && iptables -X -t raw \ 24 | && iptables -F -t mangle && iptables -X -t mangle" 25 | 26 | - name: 创建docker的systemd unit文件 27 | template: src=docker.service.j2 dest=/etc/systemd/system/docker.service 28 | 29 | - name: 开启docker 服务 30 | shell: systemctl daemon-reload && systemctl enable docker && systemctl restart docker 31 | 32 | ## 可选 ------安装docker查询镜像 tag的小工具---- 33 | # 先拉取下节点的ansible setup信息,起到缓存效果,否则后续when 判断可能失败 34 | - name: 缓存ansilbe setup信息 35 | setup: gather_subset=min 36 | tags: docker-tag 37 | 38 | - name: apt安装轻量JSON处理程序 39 | apt: name=jq state=latest 40 | when: ansible_distribution == "Ubuntu" and ansible_distribution_major_version == "16" 41 | tags: docker-tag 42 | 43 | - name: yum安装轻量JSON处理程序 44 | yum: name=jq state=latest 45 | when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7" 46 | tags: docker-tag 47 | 48 | - name: 下载 docker-tag 49 | copy: src=docker-tag dest={{ bin_dir }}/docker-tag mode=0755 50 | tags: docker-tag 51 | -------------------------------------------------------------------------------- /roles/docker/templates/docker.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Docker Application Container Engine 3 | Documentation=http://docs.docker.io 4 | 5 | [Service] 6 | Environment="PATH={{ bin_dir }}:/bin:/sbin:/usr/bin:/usr/sbin" 7 | ExecStart={{ bin_dir }}/dockerd --log-level=error 8 | ExecStartPost=/sbin/iptables -I FORWARD -s 0.0.0.0/0 -j ACCEPT 9 | ExecReload=/bin/kill -s HUP $MAINPID 10 | Restart=on-failure 11 | RestartSec=5 12 | LimitNOFILE=infinity 13 | LimitNPROC=infinity 14 | LimitCORE=infinity 15 | Delegate=yes 16 | KillMode=process 17 | 18 | [Install] 19 | WantedBy=multi-user.target 20 | -------------------------------------------------------------------------------- /roles/etcd/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 下载etcd二进制文件 2 | copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755 3 | with_items: 4 | - etcd 5 | - etcdctl 6 | 7 | - name: 创建etcd证书目录 8 | file: name=/etc/etcd/ssl state=directory 9 | 10 | - name: 创建etcd证书请求 11 | template: src=etcd-csr.json.j2 dest=/etc/etcd/ssl/etcd-csr.json 12 | 13 | - name: 创建 etcd证书和私钥 14 | shell: "cd /etc/etcd/ssl && {{ bin_dir }}/cfssl gencert \ 15 | -ca={{ ca_dir }}/ca.pem \ 16 | -ca-key={{ ca_dir }}/ca-key.pem \ 17 | -config={{ ca_dir }}/ca-config.json \ 18 | -profile=kubernetes etcd-csr.json | {{ bin_dir }}/cfssljson -bare etcd" 19 | 20 | - name: 创建etcd工作目录 21 | file: name=/var/lib/etcd state=directory 22 | 23 | - name: 创建etcd的systemd unit文件 24 | template: src=etcd.service.j2 dest=/etc/systemd/system/etcd.service 25 | 26 | - name: 开启etcd服务 27 | shell: systemctl daemon-reload && systemctl enable etcd && systemctl restart etcd 28 | -------------------------------------------------------------------------------- /roles/etcd/templates/etcd-csr.json.j2: -------------------------------------------------------------------------------- 1 | { 2 | "CN": "etcd", 3 | "hosts": [ 4 | "127.0.0.1", 5 | "{{ NODE_IP }}" 6 | ], 7 | "key": { 8 | "algo": "rsa", 9 | "size": 2048 10 | }, 11 | "names": [ 12 | { 13 | "C": "CN", 14 | "ST": "HangZhou", 15 | "L": "XS", 16 | "O": "k8s", 17 | "OU": "System" 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /roles/etcd/templates/etcd.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Etcd Server 3 | After=network.target 4 | After=network-online.target 5 | Wants=network-online.target 6 | Documentation=https://github.com/coreos 7 | 8 | [Service] 9 | Type=notify 10 | WorkingDirectory=/var/lib/etcd/ 11 | ExecStart={{ bin_dir }}/etcd \ 12 | --name={{ NODE_NAME }} \ 13 | --cert-file=/etc/etcd/ssl/etcd.pem \ 14 | --key-file=/etc/etcd/ssl/etcd-key.pem \ 15 | --peer-cert-file=/etc/etcd/ssl/etcd.pem \ 16 | --peer-key-file=/etc/etcd/ssl/etcd-key.pem \ 17 | --trusted-ca-file={{ ca_dir }}/ca.pem \ 18 | --peer-trusted-ca-file={{ ca_dir }}/ca.pem \ 19 | --initial-advertise-peer-urls=https://{{ NODE_IP }}:2380 \ 20 | --listen-peer-urls=https://{{ NODE_IP }}:2380 \ 21 | --listen-client-urls=https://{{ NODE_IP }}:2379,http://127.0.0.1:2379 \ 22 | --advertise-client-urls=https://{{ NODE_IP }}:2379 \ 23 | --initial-cluster-token=etcd-cluster-0 \ 24 | --initial-cluster={{ ETCD_NODES }} \ 25 | --initial-cluster-state=new \ 26 | --data-dir=/var/lib/etcd 27 | Restart=on-failure 28 | RestartSec=5 29 | LimitNOFILE=65536 30 | 31 | [Install] 32 | WantedBy=multi-user.target 33 | -------------------------------------------------------------------------------- /roles/flannel/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 创建flannel cni 相关目录 2 | file: name={{ item }} state=directory 3 | with_items: 4 | - /etc/cni/net.d 5 | - /root/local/kube-system/flannel 6 | 7 | - name: 下载flannel cni plugins 8 | copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755 9 | with_items: 10 | - bridge 11 | - flannel 12 | - host-local 13 | - loopback 14 | - portmap 15 | 16 | - name: 准备 flannel DaemonSet yaml文件 17 | template: src=kube-flannel.yaml.j2 dest=/root/local/kube-system/flannel/kube-flannel.yaml 18 | 19 | # 只需单节点执行一次,重复执行的报错可以忽略 20 | - name: 运行 flannel网络 21 | shell: "{{ bin_dir }}/kubectl create -f /root/local/kube-system/flannel/ && sleep 15" 22 | when: NODE_ID is defined and NODE_ID == "node1" 23 | ignore_errors: true 24 | 25 | # 删除原有cni配置 26 | - name: 删除默认cni配置 27 | file: path=/etc/cni/net.d/10-default.conf state=absent 28 | 29 | -------------------------------------------------------------------------------- /roles/flannel/templates/kube-flannel.yaml.j2: -------------------------------------------------------------------------------- 1 | --- 2 | kind: ClusterRole 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | metadata: 5 | name: flannel 6 | rules: 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - pods 11 | verbs: 12 | - get 13 | - apiGroups: 14 | - "" 15 | resources: 16 | - nodes 17 | verbs: 18 | - list 19 | - watch 20 | - apiGroups: 21 | - "" 22 | resources: 23 | - nodes/status 24 | verbs: 25 | - patch 26 | --- 27 | kind: ClusterRoleBinding 28 | apiVersion: rbac.authorization.k8s.io/v1 29 | metadata: 30 | name: flannel 31 | roleRef: 32 | apiGroup: rbac.authorization.k8s.io 33 | kind: ClusterRole 34 | name: flannel 35 | subjects: 36 | - kind: ServiceAccount 37 | name: flannel 38 | namespace: kube-system 39 | --- 40 | apiVersion: v1 41 | kind: ServiceAccount 42 | metadata: 43 | name: flannel 44 | namespace: kube-system 45 | --- 46 | kind: ConfigMap 47 | apiVersion: v1 48 | metadata: 49 | name: kube-flannel-cfg 50 | namespace: kube-system 51 | labels: 52 | tier: node 53 | app: flannel 54 | data: 55 | cni-conf.json: | 56 | { 57 | "name": "cbr0", 58 | "plugins": [ 59 | { 60 | "type": "flannel", 61 | "delegate": { 62 | "hairpinMode": true, 63 | "isDefaultGateway": true 64 | } 65 | }, 66 | { 67 | "type": "portmap", 68 | "capabilities": { 69 | "portMappings": true 70 | } 71 | } 72 | ] 73 | } 74 | net-conf.json: | 75 | { 76 | "Network": "{{ CLUSTER_CIDR }}", 77 | "Backend": { 78 | "Type": "{{ FLANNEL_BACKEND }}" 79 | } 80 | } 81 | --- 82 | apiVersion: extensions/v1beta1 83 | kind: DaemonSet 84 | metadata: 85 | name: kube-flannel-ds 86 | namespace: kube-system 87 | labels: 88 | tier: node 89 | app: flannel 90 | spec: 91 | template: 92 | metadata: 93 | labels: 94 | tier: node 95 | app: flannel 96 | spec: 97 | hostNetwork: true 98 | nodeSelector: 99 | beta.kubernetes.io/arch: amd64 100 | tolerations: 101 | - key: node-role.kubernetes.io/master 102 | operator: Exists 103 | effect: NoSchedule 104 | serviceAccountName: flannel 105 | initContainers: 106 | - name: install-cni 107 | image: jmgao1983/flannel:v0.9.1-amd64 108 | #image: quay.io/coreos/flannel:v0.9.1-amd64 109 | command: 110 | - cp 111 | args: 112 | - -f 113 | - /etc/kube-flannel/cni-conf.json 114 | - /etc/cni/net.d/10-flannel.conflist 115 | volumeMounts: 116 | - name: cni 117 | mountPath: /etc/cni/net.d 118 | - name: flannel-cfg 119 | mountPath: /etc/kube-flannel/ 120 | containers: 121 | - name: kube-flannel 122 | #image: quay.io/coreos/flannel:v0.9.1-amd64 123 | image: jmgao1983/flannel:v0.9.1-amd64 124 | command: 125 | - /opt/bin/flanneld 126 | args: 127 | - --ip-masq 128 | - --kube-subnet-mgr 129 | resources: 130 | requests: 131 | cpu: "100m" 132 | memory: "50Mi" 133 | limits: 134 | cpu: "100m" 135 | memory: "50Mi" 136 | securityContext: 137 | privileged: true 138 | env: 139 | - name: POD_NAME 140 | valueFrom: 141 | fieldRef: 142 | fieldPath: metadata.name 143 | - name: POD_NAMESPACE 144 | valueFrom: 145 | fieldRef: 146 | fieldPath: metadata.namespace 147 | volumeMounts: 148 | - name: run 149 | mountPath: /run 150 | - name: flannel-cfg 151 | mountPath: /etc/kube-flannel/ 152 | volumes: 153 | - name: run 154 | hostPath: 155 | path: /run 156 | - name: cni 157 | hostPath: 158 | path: /etc/cni/net.d 159 | - name: flannel-cfg 160 | configMap: 161 | name: kube-flannel-cfg 162 | -------------------------------------------------------------------------------- /roles/harbor/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 下载docker compose 二进制文件 2 | copy: src={{ base_dir }}/bin/docker-compose dest={{ bin_dir }}/docker-compose mode=0755 3 | 4 | # 注册变量result,根据result结果判断是否已经安装过harbor 5 | # result|failed 说明没有安装过harbor,下一步进行安装 6 | # result|succeeded 说明已经安装过harbor,下一步跳过安装 7 | - name: 注册变量result 8 | command: ls /data/registry 9 | register: result 10 | ignore_errors: True 11 | 12 | - name: 解压harbor离线安装包 13 | unarchive: 14 | src: "{{ base_dir }}/down/harbor-offline-installer-v1.2.2.tgz" 15 | dest: /root/local 16 | copy: yes 17 | keep_newer: yes 18 | when: result|failed 19 | 20 | - name: 导入harbor所需 docker images 21 | shell: "{{ bin_dir }}/docker load -i /root/local/harbor/harbor.v1.2.2.tar.gz" 22 | when: result|failed 23 | 24 | - name: 创建harbor证书请求 25 | template: src=harbor-csr.json.j2 dest={{ ca_dir }}/harbor-csr.json 26 | when: result|failed 27 | 28 | - name: 创建harbor证书和私钥 29 | shell: "cd {{ ca_dir }} && {{ bin_dir }}/cfssl gencert \ 30 | -ca={{ ca_dir }}/ca.pem \ 31 | -ca-key={{ ca_dir }}/ca-key.pem \ 32 | -config={{ ca_dir }}/ca-config.json \ 33 | -profile=kubernetes harbor-csr.json | {{ bin_dir }}/cfssljson -bare harbor" 34 | when: result|failed 35 | 36 | - name: 配置 harbor.cfg 文件 37 | template: src=harbor.cfg.j2 dest=/root/local/harbor/harbor.cfg 38 | when: result|failed 39 | 40 | - name: 安装 harbor 41 | shell: "cd /root/local/harbor && \ 42 | export PATH={{ bin_dir }}:$PATH && \ 43 | ./install.sh" 44 | when: result|failed 45 | -------------------------------------------------------------------------------- /roles/harbor/templates/harbor-csr.json.j2: -------------------------------------------------------------------------------- 1 | { 2 | "CN": "harbor", 3 | "hosts": [ 4 | "127.0.0.1", 5 | "{{ NODE_IP }}", 6 | "{{ HARBOR_DOMAIN }}" 7 | ], 8 | "key": { 9 | "algo": "rsa", 10 | "size": 2048 11 | }, 12 | "names": [ 13 | { 14 | "C": "CN", 15 | "ST": "HangZhou", 16 | "L": "XS", 17 | "O": "k8s", 18 | "OU": "System" 19 | } 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /roles/harbor/templates/harbor.cfg.j2: -------------------------------------------------------------------------------- 1 | ## Configuration file of Harbor 2 | 3 | #The IP address or hostname to access admin UI and registry service. 4 | #DO NOT use localhost or 127.0.0.1, because Harbor needs to be accessed by external clients. 5 | hostname = {{ NODE_IP }} 6 | 7 | #The protocol for accessing the UI and token/notification service, by default it is http. 8 | #It can be set to https if ssl is enabled on nginx. 9 | ui_url_protocol = https 10 | 11 | #The password for the root user of mysql db, change this before any production use. 12 | db_password = Harbor12345 13 | 14 | #Maximum number of job workers in job service 15 | max_job_workers = 3 16 | 17 | #Determine whether or not to generate certificate for the registry's token. 18 | #If the value is on, the prepare script creates new root cert and private key 19 | #for generating token to access the registry. If the value is off the default key/cert will be used. 20 | #This flag also controls the creation of the notary signer's cert. 21 | customize_crt = on 22 | 23 | #The path of cert and key files for nginx, they are applied only the protocol is set to https 24 | ssl_cert = {{ ca_dir }}/harbor.pem 25 | ssl_cert_key = {{ ca_dir }}/harbor-key.pem 26 | 27 | #The path of secretkey storage 28 | secretkey_path = /data 29 | 30 | #Admiral's url, comment this attribute, or set its value to NA when Harbor is standalone 31 | admiral_url = NA 32 | 33 | #The password of the Clair's postgres database, only effective when Harbor is deployed with Clair. 34 | #Please update it before deployment, subsequent update will cause Clair's API server and Harbor unable to access Clair's database. 35 | clair_db_password = password 36 | 37 | #NOTES: The properties between BEGIN INITIAL PROPERTIES and END INITIAL PROPERTIES 38 | #only take effect in the first boot, the subsequent changes of these properties 39 | #should be performed on web ui 40 | 41 | #************************BEGIN INITIAL PROPERTIES************************ 42 | 43 | #Email account settings for sending out password resetting emails. 44 | 45 | #Email server uses the given username and password to authenticate on TLS connections to host and act as identity. 46 | #Identity left blank to act as username. 47 | email_identity = 48 | 49 | email_server = smtp.mydomain.com 50 | email_server_port = 25 51 | email_username = sample_admin@mydomain.com 52 | email_password = abc 53 | email_from = admin 54 | email_ssl = false 55 | 56 | ##The initial password of Harbor admin, only works for the first time when Harbor starts. 57 | #It has no effect after the first launch of Harbor. 58 | #Change the admin password from UI after launching Harbor. 59 | harbor_admin_password = Harbor12345 60 | 61 | ##By default the auth mode is db_auth, i.e. the credentials are stored in a local database. 62 | #Set it to ldap_auth if you want to verify a user's credentials against an LDAP server. 63 | auth_mode = db_auth 64 | 65 | #The url for an ldap endpoint. 66 | ldap_url = ldaps://ldap.mydomain.com 67 | 68 | #A user's DN who has the permission to search the LDAP/AD server. 69 | #If your LDAP/AD server does not support anonymous search, you should configure this DN and ldap_search_pwd. 70 | #ldap_searchdn = uid=searchuser,ou=people,dc=mydomain,dc=com 71 | 72 | #the password of the ldap_searchdn 73 | #ldap_search_pwd = password 74 | 75 | #The base DN from which to look up a user in LDAP/AD 76 | ldap_basedn = ou=people,dc=mydomain,dc=com 77 | 78 | #Search filter for LDAP/AD, make sure the syntax of the filter is correct. 79 | #ldap_filter = (objectClass=person) 80 | 81 | # The attribute used in a search to match a user, it could be uid, cn, email, sAMAccountName or other attributes depending on your LDAP/AD 82 | ldap_uid = uid 83 | 84 | #the scope to search for users, 1-LDAP_SCOPE_BASE, 2-LDAP_SCOPE_ONELEVEL, 3-LDAP_SCOPE_SUBTREE 85 | ldap_scope = 3 86 | 87 | #Timeout (in seconds) when connecting to an LDAP Server. The default value (and most reasonable) is 5 seconds. 88 | ldap_timeout = 5 89 | 90 | #Turn on or off the self-registration feature 91 | self_registration = on 92 | 93 | #The expiration time (in minute) of token created by token service, default is 30 minutes 94 | token_expiration = 30 95 | 96 | #The flag to control what users have permission to create projects 97 | #The default value "everyone" allows everyone to creates a project. 98 | #Set to "adminonly" so that only admin user can create project. 99 | project_creation_restriction = everyone 100 | 101 | #Determine whether the job service should verify the ssl cert when it connects to a remote registry. 102 | #Set this flag to off when the remote registry uses a self-signed or untrusted certificate. 103 | verify_remote_cert = on 104 | #************************END INITIAL PROPERTIES************************ 105 | ############# 106 | 107 | -------------------------------------------------------------------------------- /roles/kube-master/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 下载 kube-master 二进制 2 | copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755 3 | with_items: 4 | - kube-apiserver 5 | - kube-controller-manager 6 | - kube-scheduler 7 | - kubectl 8 | - kube-proxy 9 | - kubelet 10 | 11 | - name: 创建 kubernetes 证书签名请求 12 | template: src=kubernetes-csr.json.j2 dest={{ ca_dir }}/kubernetes-csr.json 13 | 14 | - name: 创建 kubernetes 证书和私钥 15 | shell: "cd {{ ca_dir }} && {{ bin_dir }}/cfssl gencert \ 16 | -ca={{ ca_dir }}/ca.pem \ 17 | -ca-key={{ ca_dir }}/ca-key.pem \ 18 | -config={{ ca_dir }}/ca-config.json \ 19 | -profile=kubernetes kubernetes-csr.json | {{ bin_dir }}/cfssljson -bare kubernetes" 20 | 21 | - name: 创建 token.csv 22 | template: src=token.csv.j2 dest={{ ca_dir }}/token.csv 23 | 24 | - name: 创建 basic-auth.csv 25 | template: src=basic-auth.csv.j2 dest={{ ca_dir }}/basic-auth.csv 26 | 27 | - name: 创建kube-apiserver的systemd unit文件 28 | template: src=kube-apiserver.service.j2 dest=/etc/systemd/system/kube-apiserver.service 29 | 30 | - name: 创建kube-controller-manager的systemd unit文件 31 | template: src=kube-controller-manager.service.j2 dest=/etc/systemd/system/kube-controller-manager.service 32 | 33 | - name: 创建kube-scheduler的systemd unit文件 34 | template: src=kube-scheduler.service.j2 dest=/etc/systemd/system/kube-scheduler.service 35 | 36 | - name: daemon-reload 37 | shell: systemctl daemon-reload 38 | 39 | - name: enable-kube-apiserver 40 | shell: systemctl enable kube-apiserver 41 | 42 | - name: enable-kube-controller-manager 43 | shell: systemctl enable kube-controller-manager 44 | 45 | - name: enable-kube-scheduler 46 | shell: systemctl enable kube-scheduler 47 | 48 | - name: start-kube-apiserver 49 | shell: systemctl restart kube-apiserver 50 | 51 | - name: start-kube-controller-manager 52 | shell: systemctl restart kube-controller-manager 53 | 54 | - name: start-kube-scheduler 55 | shell: systemctl restart kube-scheduler 56 | -------------------------------------------------------------------------------- /roles/kube-master/templates/basic-auth.csv.j2: -------------------------------------------------------------------------------- 1 | {{ BASIC_AUTH_PASS }},{{ BASIC_AUTH_USER }},1 2 | readonly,readonly,2 3 | -------------------------------------------------------------------------------- /roles/kube-master/templates/kube-apiserver.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Kubernetes API Server 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 4 | After=network.target 5 | 6 | [Service] 7 | ExecStart={{ bin_dir }}/kube-apiserver \ 8 | --admission-control=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,ResourceQuota,NodeRestriction \ 9 | --bind-address={{ NODE_IP }} \ 10 | --insecure-bind-address=127.0.0.1 \ 11 | --authorization-mode=Node,RBAC \ 12 | --kubelet-https=true \ 13 | --anonymous-auth=false \ 14 | --basic-auth-file={{ ca_dir }}/basic-auth.csv \ 15 | --enable-bootstrap-token-auth \ 16 | --token-auth-file={{ ca_dir }}/token.csv \ 17 | --service-cluster-ip-range={{ SERVICE_CIDR }} \ 18 | --service-node-port-range={{ NODE_PORT_RANGE }} \ 19 | --tls-cert-file={{ ca_dir }}/kubernetes.pem \ 20 | --tls-private-key-file={{ ca_dir }}/kubernetes-key.pem \ 21 | --client-ca-file={{ ca_dir }}/ca.pem \ 22 | --service-account-key-file={{ ca_dir }}/ca-key.pem \ 23 | --etcd-cafile={{ ca_dir }}/ca.pem \ 24 | --etcd-certfile={{ ca_dir }}/kubernetes.pem \ 25 | --etcd-keyfile={{ ca_dir }}/kubernetes-key.pem \ 26 | --etcd-servers={{ ETCD_ENDPOINTS }} \ 27 | --enable-swagger-ui=true \ 28 | --allow-privileged=true \ 29 | --audit-log-maxage=30 \ 30 | --audit-log-maxbackup=3 \ 31 | --audit-log-maxsize=100 \ 32 | --audit-log-path=/var/lib/audit.log \ 33 | --event-ttl=1h \ 34 | --v=2 35 | Restart=on-failure 36 | RestartSec=5 37 | Type=notify 38 | LimitNOFILE=65536 39 | 40 | [Install] 41 | WantedBy=multi-user.target 42 | -------------------------------------------------------------------------------- /roles/kube-master/templates/kube-controller-manager.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Kubernetes Controller Manager 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 4 | 5 | [Service] 6 | ExecStart={{ bin_dir }}/kube-controller-manager \ 7 | --address=127.0.0.1 \ 8 | --master=http://127.0.0.1:8080 \ 9 | --allocate-node-cidrs=true \ 10 | --service-cluster-ip-range={{ SERVICE_CIDR }} \ 11 | --cluster-cidr={{ CLUSTER_CIDR }} \ 12 | --cluster-name=kubernetes \ 13 | --cluster-signing-cert-file={{ ca_dir }}/ca.pem \ 14 | --cluster-signing-key-file={{ ca_dir }}/ca-key.pem \ 15 | --service-account-private-key-file={{ ca_dir }}/ca-key.pem \ 16 | --root-ca-file={{ ca_dir }}/ca.pem \ 17 | --horizontal-pod-autoscaler-use-rest-clients=false \ 18 | --leader-elect=true \ 19 | --v=2 20 | Restart=on-failure 21 | RestartSec=5 22 | 23 | [Install] 24 | WantedBy=multi-user.target 25 | -------------------------------------------------------------------------------- /roles/kube-master/templates/kube-scheduler.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Kubernetes Scheduler 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 4 | 5 | [Service] 6 | ExecStart={{ bin_dir }}/kube-scheduler \ 7 | --address=127.0.0.1 \ 8 | --master=http://127.0.0.1:8080 \ 9 | --leader-elect=true \ 10 | --v=2 11 | Restart=on-failure 12 | RestartSec=5 13 | 14 | [Install] 15 | WantedBy=multi-user.target 16 | -------------------------------------------------------------------------------- /roles/kube-master/templates/kubernetes-csr.json.j2: -------------------------------------------------------------------------------- 1 | { 2 | "CN": "kubernetes", 3 | "hosts": [ 4 | "127.0.0.1", 5 | "{{ MASTER_IP }}", 6 | "{{ NODE_IP }}", 7 | "{{ CLUSTER_KUBERNETES_SVC_IP }}", 8 | "kubernetes", 9 | "kubernetes.default", 10 | "kubernetes.default.svc", 11 | "kubernetes.default.svc.cluster", 12 | "kubernetes.default.svc.cluster.local" 13 | ], 14 | "key": { 15 | "algo": "rsa", 16 | "size": 2048 17 | }, 18 | "names": [ 19 | { 20 | "C": "CN", 21 | "ST": "HangZhou", 22 | "L": "XS", 23 | "O": "k8s", 24 | "OU": "System" 25 | } 26 | ] 27 | } 28 | -------------------------------------------------------------------------------- /roles/kube-master/templates/token.csv.j2: -------------------------------------------------------------------------------- 1 | {{ BOOTSTRAP_TOKEN }},kubelet-bootstrap,10001,"system:kubelet-bootstrap" 2 | -------------------------------------------------------------------------------- /roles/kube-node/tasks/main.yml: -------------------------------------------------------------------------------- 1 | # 创建kubelet,kube-proxy工作目录和cni配置目录 2 | - name: 创建kube-node 相关目录 3 | file: name={{ item }} state=directory 4 | with_items: 5 | - /var/lib/kubelet 6 | - /var/lib/kube-proxy 7 | - /etc/cni/net.d 8 | 9 | - name: 下载 kubelet,kube-proxy 二进制和基础 cni plugins 10 | copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755 11 | with_items: 12 | - kubelet 13 | - kube-proxy 14 | - bridge 15 | - host-local 16 | - loopback 17 | 18 | ##----------kubelet 配置部分-------------- 19 | # kubelet 启动时向 kube-apiserver 发送 TLS bootstrapping 请求,需要绑定该角色 20 | # 只需单节点执行一次,重复执行的报错可以忽略 21 | # 增加15s等待kube-apiserver正常工作 22 | - name: kubelet-bootstrap-setting 23 | shell: "sleep 15 && {{ bin_dir }}/kubectl create clusterrolebinding kubelet-bootstrap \ 24 | --clusterrole=system:node-bootstrapper --user=kubelet-bootstrap" 25 | when: NODE_ID is defined and NODE_ID == "node1" 26 | ignore_errors: true 27 | 28 | #创建bootstrap.kubeconfig配置文件 29 | - name: 设置集群参数 30 | shell: "{{ bin_dir }}/kubectl config set-cluster kubernetes \ 31 | --certificate-authority={{ ca_dir }}/ca.pem \ 32 | --embed-certs=true \ 33 | --server={{ KUBE_APISERVER }} \ 34 | --kubeconfig=bootstrap.kubeconfig" 35 | - name: 设置客户端认证参数 36 | shell: "{{ bin_dir }}/kubectl config set-credentials kubelet-bootstrap \ 37 | --token={{ BOOTSTRAP_TOKEN }} \ 38 | --kubeconfig=bootstrap.kubeconfig" 39 | - name: 设置上下文参数 40 | shell: "{{ bin_dir }}/kubectl config set-context default \ 41 | --cluster=kubernetes \ 42 | --user=kubelet-bootstrap \ 43 | --kubeconfig=bootstrap.kubeconfig" 44 | - name: 选择默认上下文 45 | shell: "{{ bin_dir }}/kubectl config use-context default --kubeconfig=bootstrap.kubeconfig" 46 | 47 | - name: 安装bootstrap.kubeconfig配置文件 48 | shell: "mv $HOME/bootstrap.kubeconfig /etc/kubernetes/bootstrap.kubeconfig" 49 | 50 | - name: 准备 cni配置文件 51 | template: src=cni-default.conf.j2 dest=/etc/cni/net.d/10-default.conf 52 | 53 | - name: 创建kubelet的systemd unit文件 54 | template: src=kubelet.service.j2 dest=/etc/systemd/system/kubelet.service 55 | tags: kubelet 56 | 57 | - name: 开启kubelet 服务 58 | shell: systemctl daemon-reload && systemctl enable kubelet && systemctl restart kubelet 59 | tags: kubelet 60 | 61 | - name: approve-kubelet-csr 62 | shell: "sleep 15 && {{ bin_dir }}/kubectl get csr|grep 'Pending' | awk 'NR>0{print $1}'| xargs {{ bin_dir }}/kubectl certificate approve" 63 | when: NODE_ID is defined and NODE_ID == "node1" 64 | ignore_errors: true 65 | 66 | ##-------kube-proxy部分---------------- 67 | - name: 准备kube-proxy 证书签名请求 68 | template: src=kube-proxy-csr.json.j2 dest={{ ca_dir }}/kube-proxy-csr.json 69 | 70 | - name: 创建 kube-proxy证书与私钥 71 | shell: "cd {{ ca_dir }} && {{ bin_dir }}/cfssl gencert \ 72 | -ca={{ ca_dir }}/ca.pem \ 73 | -ca-key={{ ca_dir }}/ca-key.pem \ 74 | -config={{ ca_dir }}/ca-config.json \ 75 | -profile=kubernetes kube-proxy-csr.json | {{ bin_dir }}/cfssljson -bare kube-proxy" 76 | 77 | #创建kube-proxy.kubeconfig配置文件 78 | - name: 设置集群参数 79 | shell: "{{ bin_dir }}/kubectl config set-cluster kubernetes \ 80 | --certificate-authority={{ ca_dir }}/ca.pem \ 81 | --embed-certs=true \ 82 | --server={{ KUBE_APISERVER }} \ 83 | --kubeconfig=kube-proxy.kubeconfig" 84 | - name: 设置客户端认证参数 85 | shell: "{{ bin_dir }}/kubectl config set-credentials kube-proxy \ 86 | --client-certificate={{ ca_dir }}/kube-proxy.pem \ 87 | --client-key={{ ca_dir }}/kube-proxy-key.pem \ 88 | --embed-certs=true \ 89 | --kubeconfig=kube-proxy.kubeconfig" 90 | - name: 设置上下文参数 91 | shell: "{{ bin_dir }}/kubectl config set-context default \ 92 | --cluster=kubernetes \ 93 | --user=kube-proxy \ 94 | --kubeconfig=kube-proxy.kubeconfig" 95 | - name: 选择默认上下文 96 | shell: "{{ bin_dir }}/kubectl config use-context default --kubeconfig=kube-proxy.kubeconfig" 97 | 98 | - name: 安装kube-proxy.kubeconfig配置文件 99 | shell: "mv $HOME/kube-proxy.kubeconfig /etc/kubernetes/kube-proxy.kubeconfig" 100 | 101 | - name: 创建kube-proxy 服务文件 102 | tags: reload-kube-proxy 103 | template: src=kube-proxy.service.j2 dest=/etc/systemd/system/kube-proxy.service 104 | 105 | - name: 开启kube-proxy 服务 106 | tags: reload-kube-proxy 107 | shell: systemctl daemon-reload && systemctl enable kube-proxy && systemctl restart kube-proxy 108 | 109 | -------------------------------------------------------------------------------- /roles/kube-node/templates/cni-default.conf.j2: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mynet", 3 | "type": "bridge", 4 | "bridge": "mynet0", 5 | "isDefaultGateway": true, 6 | "ipMasq": true, 7 | "hairpinMode": true, 8 | "ipam": { 9 | "type": "host-local", 10 | "subnet": "{{ CLUSTER_CIDR }}" 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /roles/kube-node/templates/kube-proxy-csr.json.j2: -------------------------------------------------------------------------------- 1 | { 2 | "CN": "system:kube-proxy", 3 | "hosts": [], 4 | "key": { 5 | "algo": "rsa", 6 | "size": 2048 7 | }, 8 | "names": [ 9 | { 10 | "C": "CN", 11 | "ST": "HangZhou", 12 | "L": "XS", 13 | "O": "k8s", 14 | "OU": "System" 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /roles/kube-node/templates/kube-proxy.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Kubernetes Kube-Proxy Server 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 4 | After=network.target 5 | 6 | [Service] 7 | # kube-proxy 根据 --cluster-cidr 判断集群内部和外部流量,指定 --cluster-cidr 或 --masquerade-all 选项后 8 | # kube-proxy 会对访问 Service IP 的请求做 SNAT,这个特性与calico 实现 network policy冲突,因此禁用 9 | WorkingDirectory=/var/lib/kube-proxy 10 | ExecStart={{ bin_dir }}/kube-proxy \ 11 | --bind-address={{ NODE_IP }} \ 12 | --hostname-override={{ NODE_IP }} \ 13 | --kubeconfig=/etc/kubernetes/kube-proxy.kubeconfig \ 14 | --logtostderr=true \ 15 | --v=2 16 | Restart=on-failure 17 | RestartSec=5 18 | LimitNOFILE=65536 19 | 20 | [Install] 21 | WantedBy=multi-user.target 22 | -------------------------------------------------------------------------------- /roles/kube-node/templates/kubelet.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Kubernetes Kubelet 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes 4 | After=docker.service 5 | Requires=docker.service 6 | 7 | [Service] 8 | WorkingDirectory=/var/lib/kubelet 9 | #--pod-infra-container-image=registry.access.redhat.com/rhel7/pod-infrastructure:latest 10 | ExecStart={{ bin_dir }}/kubelet \ 11 | --address={{ NODE_IP }} \ 12 | --hostname-override={{ NODE_IP }} \ 13 | --pod-infra-container-image={{ POD_INFRA_CONTAINER_IMAGE }} \ 14 | --experimental-bootstrap-kubeconfig=/etc/kubernetes/bootstrap.kubeconfig \ 15 | --kubeconfig=/etc/kubernetes/kubelet.kubeconfig \ 16 | --cert-dir={{ ca_dir }} \ 17 | --network-plugin=cni \ 18 | --cni-conf-dir=/etc/cni/net.d \ 19 | --cni-bin-dir={{ bin_dir }} \ 20 | --cluster-dns={{ CLUSTER_DNS_SVC_IP }} \ 21 | --cluster-domain={{ CLUSTER_DNS_DOMAIN }} \ 22 | --hairpin-mode hairpin-veth \ 23 | --allow-privileged=true \ 24 | --fail-swap-on=false \ 25 | --logtostderr=true \ 26 | --v=2 27 | #kubelet cAdvisor 默认在所有接口监听 4194 端口的请求, 以下iptables限制内网访问 28 | ExecStartPost=/sbin/iptables -A INPUT -s 10.0.0.0/8 -p tcp --dport 4194 -j ACCEPT 29 | ExecStartPost=/sbin/iptables -A INPUT -s 172.16.0.0/12 -p tcp --dport 4194 -j ACCEPT 30 | ExecStartPost=/sbin/iptables -A INPUT -s 192.168.0.0/16 -p tcp --dport 4194 -j ACCEPT 31 | ExecStartPost=/sbin/iptables -A INPUT -p tcp --dport 4194 -j DROP 32 | Restart=on-failure 33 | RestartSec=5 34 | 35 | [Install] 36 | WantedBy=multi-user.target 37 | -------------------------------------------------------------------------------- /roles/kubectl/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 下载kubectl二进制 2 | copy: src={{ base_dir }}/bin/kubectl dest={{ bin_dir }}/kubectl mode=0755 3 | 4 | - name: 准备kubectl使用的admin 证书签名请求 5 | template: src=admin-csr.json.j2 dest={{ ca_dir }}/admin-csr.json 6 | 7 | - name: 创建 admin证书与私钥 8 | shell: "cd {{ ca_dir }} && {{ bin_dir }}/cfssl gencert \ 9 | -ca={{ ca_dir }}/ca.pem \ 10 | -ca-key={{ ca_dir }}/ca-key.pem \ 11 | -config={{ ca_dir }}/ca-config.json \ 12 | -profile=kubernetes admin-csr.json | {{ bin_dir }}/cfssljson -bare admin" 13 | 14 | # 创建kubectl kubeconfig 文件 15 | - name: 设置集群参数 16 | shell: "{{ bin_dir }}/kubectl config set-cluster kubernetes \ 17 | --certificate-authority={{ ca_dir }}/ca.pem \ 18 | --embed-certs=true \ 19 | --server={{ KUBE_APISERVER }}" 20 | - name: 设置客户端认证参数 21 | shell: "{{ bin_dir }}/kubectl config set-credentials admin \ 22 | --client-certificate={{ ca_dir }}/admin.pem \ 23 | --embed-certs=true \ 24 | --client-key={{ ca_dir }}/admin-key.pem" 25 | - name: 设置上下文参数 26 | shell: "{{ bin_dir }}/kubectl config set-context kubernetes \ 27 | --cluster=kubernetes --user=admin" 28 | - name: 选择默认上下文 29 | shell: "{{ bin_dir }}/kubectl config use-context kubernetes" 30 | -------------------------------------------------------------------------------- /roles/kubectl/templates/admin-csr.json.j2: -------------------------------------------------------------------------------- 1 | { 2 | "CN": "admin", 3 | "hosts": [], 4 | "key": { 5 | "algo": "rsa", 6 | "size": 2048 7 | }, 8 | "names": [ 9 | { 10 | "C": "CN", 11 | "ST": "HangZhou", 12 | "L": "XS", 13 | "O": "system:masters", 14 | "OU": "System" 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /roles/lb/tasks/main.yml: -------------------------------------------------------------------------------- 1 | # 先拉取下节点的ansible setup信息,起到缓存效果,否则后续when 判断可能失败 2 | - name: 缓存ansilbe setup信息 3 | setup: gather_subset=min 4 | 5 | - name: apt更新缓存刷新 6 | apt: update_cache=yes cache_valid_time=72000 7 | when: ansible_distribution == "Ubuntu" and ansible_distribution_major_version == "16" 8 | 9 | - name: apt安装 haproxy 10 | apt: name=haproxy state=latest 11 | when: ansible_distribution == "Ubuntu" and ansible_distribution_major_version == "16" 12 | 13 | - name: yum安装 haproxy 14 | yum: name=haproxy state=latest 15 | when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7" 16 | 17 | - name: 创建haproxy配置目录 18 | file: name=/etc/haproxy state=directory 19 | 20 | - name: 修改centos的haproxy.service 21 | template: src=haproxy.service.j2 dest=/usr/lib/systemd/system/haproxy.service 22 | when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7" 23 | 24 | - name: 配置 haproxy 25 | template: src=haproxy.cfg.j2 dest=/etc/haproxy/haproxy.cfg 26 | 27 | - name: apt安装 keepalived 28 | apt: name=keepalived state=latest 29 | when: ansible_distribution == "Ubuntu" and ansible_distribution_major_version == "16" 30 | 31 | - name: yum安装 keepalived 32 | yum: name=keepalived state=latest 33 | when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7" 34 | 35 | # CentOS 需要安装psmisc 才能使用命令killall,它在keepalive的监测脚本中使用到 36 | - name: yum安装 psmisc 37 | yum: name=psmisc state=latest 38 | when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7" 39 | 40 | - name: 创建keepalived配置目录 41 | file: name=/etc/keepalived state=directory 42 | 43 | - name: 配置 keepalived 主节点 44 | template: src=keepalived-master.conf.j2 dest=/etc/keepalived/keepalived.conf 45 | when: LB_ROLE == "master" 46 | 47 | - name: 配置 keepalived 备节点 48 | template: src=keepalived-backup.conf.j2 dest=/etc/keepalived/keepalived.conf 49 | when: LB_ROLE == "backup" 50 | 51 | - name: daemon-reload for haproxy.service 52 | shell: systemctl daemon-reload 53 | 54 | - name: 重启haproxy服务 55 | shell: systemctl enable haproxy && systemctl restart haproxy 56 | 57 | - name: 重启keepalived服务 58 | shell: systemctl enable keepalived && systemctl restart keepalived 59 | -------------------------------------------------------------------------------- /roles/lb/templates/haproxy.cfg.j2: -------------------------------------------------------------------------------- 1 | global 2 | log /dev/log local0 3 | log /dev/log local1 notice 4 | chroot /var/lib/haproxy 5 | stats socket /run/haproxy/admin.sock mode 660 level admin 6 | stats timeout 30s 7 | user haproxy 8 | group haproxy 9 | daemon 10 | nbproc 1 11 | 12 | defaults 13 | log global 14 | timeout connect 5000 15 | timeout client 50000 16 | timeout server 50000 17 | 18 | listen kube-master 19 | bind 0.0.0.0:{{ MASTER_PORT }} 20 | mode tcp 21 | option tcplog 22 | balance source 23 | server s1 {{ LB_EP1 }} check inter 10000 fall 2 rise 2 weight 1 24 | server s2 {{ LB_EP2 }} check inter 10000 fall 2 rise 2 weight 1 25 | -------------------------------------------------------------------------------- /roles/lb/templates/haproxy.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=HAProxy Load Balancer 3 | After=syslog.target network.target 4 | 5 | [Service] 6 | EnvironmentFile=/etc/sysconfig/haproxy 7 | ExecStartPre=/usr/bin/mkdir -p /run/haproxy 8 | ExecStart=/usr/sbin/haproxy-systemd-wrapper -f /etc/haproxy/haproxy.cfg -p /run/haproxy.pid $OPTIONS 9 | ExecReload=/bin/kill -USR2 $MAINPID 10 | KillMode=mixed 11 | 12 | [Install] 13 | WantedBy=multi-user.target 14 | -------------------------------------------------------------------------------- /roles/lb/templates/keepalived-backup.conf.j2: -------------------------------------------------------------------------------- 1 | global_defs { 2 | router_id lb-backup 3 | } 4 | 5 | vrrp_instance VI-kube-master { 6 | state BACKUP 7 | priority 110 8 | dont_track_primary 9 | interface {{ LB_IF }} 10 | virtual_router_id 51 11 | advert_int 3 12 | virtual_ipaddress { 13 | {{ MASTER_IP }} 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /roles/lb/templates/keepalived-master.conf.j2: -------------------------------------------------------------------------------- 1 | global_defs { 2 | router_id lb-master 3 | } 4 | 5 | vrrp_script check-haproxy { 6 | script "killall -0 haproxy" 7 | interval 5 8 | weight -30 9 | } 10 | 11 | vrrp_instance VI-kube-master { 12 | state MASTER 13 | priority 120 14 | dont_track_primary 15 | interface {{ LB_IF }} 16 | virtual_router_id 51 17 | advert_int 3 18 | track_script { 19 | check-haproxy 20 | } 21 | virtual_ipaddress { 22 | {{ MASTER_IP }} 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /roles/prepare/files/95-k8s-sysctl.conf: -------------------------------------------------------------------------------- 1 | net.ipv4.ip_forward = 1 2 | net.bridge.bridge-nf-call-iptables = 1 3 | net.bridge.bridge-nf-call-ip6tables = 1 4 | net.bridge.bridge-nf-call-arptables = 1 5 | -------------------------------------------------------------------------------- /roles/prepare/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: prepare some dirs 2 | file: name={{ item }} state=directory 3 | with_items: 4 | - "{{ bin_dir }}" 5 | - "{{ ca_dir }}" 6 | - /root/.kube 7 | - /etc/docker 8 | 9 | #- name: 集群hosts文件更新 10 | # copy: src=hosts.j2 dest=/etc/hosts 11 | 12 | - name: 写入环境变量$PATH 13 | shell: "sed -i '/export PATH=/d' /etc/profile && \ 14 | echo export PATH={{ bin_dir }}:$PATH >> /etc/profile" 15 | 16 | - name: 下载证书工具 CFSSL 17 | copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755 18 | with_items: 19 | - cfssl 20 | - cfssl-certinfo 21 | - cfssljson 22 | 23 | - name: 分发CA 证书 24 | copy: src={{ item }} dest={{ ca_dir }}/{{ item }} mode=0644 25 | with_items: 26 | - ca.pem 27 | - ca-key.pem 28 | - ca.csr 29 | - ca-config.json 30 | 31 | # 先拉取下节点的ansible setup信息,起到缓存效果,否则后续when 判断可能失败 32 | - name: 缓存ansilbe setup信息 33 | setup: gather_subset=min 34 | 35 | # 删除默认安装 36 | - name: 删除ubuntu默认安装 37 | when: ansible_distribution == "Ubuntu" 38 | apt: name={{ item }} state=absent 39 | with_items: 40 | - ufw 41 | - lxd 42 | - lxd-client 43 | - lxcfs 44 | - lxc-common 45 | 46 | # 删除默认安装 47 | - name: 删除centos默认安装 48 | when: ansible_distribution == "CentOS" 49 | yum: name={{ item }} state=absent 50 | with_items: 51 | - firewalld 52 | - firewalld-filesystem 53 | - python-firewall 54 | 55 | - name: 关闭 selinux 56 | shell: "setenforce 0 && echo SELINUX=disabled > /etc/selinux/config" 57 | when: ansible_distribution == "CentOS" 58 | ignore_errors: true 59 | 60 | # 设置系统参数for k8s 61 | # 消除docker info 警告WARNING: bridge-nf-call-ip[6]tables is disabled 62 | - name: 设置系统参数 63 | copy: src=95-k8s-sysctl.conf dest=/etc/sysctl.d/95-k8s-sysctl.conf 64 | 65 | - name: 生效系统参数 66 | shell: "sysctl -p /etc/sysctl.d/95-k8s-sysctl.conf" 67 | ignore_errors: true 68 | --------------------------------------------------------------------------------