├── .gitignore
├── 01.prepare.yml
├── 02.etcd.yml
├── 03.kubectl.yml
├── 04.docker.yml
├── 05.kube-master.yml
├── 06.kube-node.yml
├── 07.calico.yml
├── 07.flannel.yml
├── 11.harbor.yml
├── 20.addnode.yml
├── 90.setup.yml
├── 99.clean.yml
├── LICENSE
├── README.md
├── ansible.cfg
├── bin
    └── VERSION.md
├── branch.md
├── docs
    ├── 00-集群规划和基础参数设定.md
    ├── 01-创建CA证书和环境配置.md
    ├── 02-安装etcd集群.md
    ├── 03-配置kubectl命令行工具.md
    ├── 04-安装docker服务.md
    ├── 05-安装kube-master节点.md
    ├── 06-安装kube-node节点.md
    ├── 07-安装calico网络组件.md
    ├── 07-安装flannel网络组件.md
    ├── guide
    │   ├── dashboard.md
    │   ├── efk.md
    │   ├── harbor.md
    │   ├── heapster.md
    │   ├── hpa.md
    │   ├── index.md
    │   ├── ingress.md
    │   ├── kubedns.md
    │   └── networkpolicy.md
    ├── quickStart.md
    └── upgrade.md
├── down
    └── download.sh
├── example
    ├── hosts.allinone.example
    ├── hosts.m-masters.example
    └── hosts.s-master.example
├── manifests
    ├── dashboard
    │   ├── kubernetes-dashboard.yaml
    │   ├── ui-admin-rbac.yaml
    │   └── ui-read-rbac.yaml
    ├── heapster
    │   ├── grafana.yaml
    │   ├── heapster.yaml
    │   └── influxdb.yaml
    ├── ingress
    │   ├── test-hello.ing.yaml
    │   ├── traefik-ingress.yaml
    │   └── traefik-ui.ing.yaml
    └── kubedns
    │   ├── kubedns.yaml
    │   └── readme.md
├── pics
    ├── alipay.png
    ├── ansible.jpg
    ├── docker.jpg
    ├── grafana.png
    ├── influxdb.png
    └── kube.jpg
└── roles
    ├── calico
        ├── tasks
        │   └── main.yml
        └── templates
        │   ├── calico-csr.json.j2
        │   ├── calico-rbac.yaml.j2
        │   ├── calico.yaml.j2
        │   └── calicoctl.cfg.j2
    ├── deploy
        ├── tasks
        │   └── main.yml
        └── templates
        │   ├── ca-config.json.j2
        │   ├── ca-csr.json.j2
        │   └── kubedns.yaml.j2
    ├── docker
        ├── files
        │   ├── daemon.json
        │   ├── docker
        │   └── docker-tag
        ├── tasks
        │   └── main.yml
        └── templates
        │   └── docker.service.j2
    ├── etcd
        ├── tasks
        │   └── main.yml
        └── templates
        │   ├── etcd-csr.json.j2
        │   └── etcd.service.j2
    ├── flannel
        ├── tasks
        │   └── main.yml
        └── templates
        │   └── kube-flannel.yaml.j2
    ├── harbor
        ├── tasks
        │   └── main.yml
        └── templates
        │   ├── harbor-csr.json.j2
        │   └── harbor.cfg.j2
    ├── kube-master
        ├── tasks
        │   └── main.yml
        └── templates
        │   ├── basic-auth.csv.j2
        │   ├── kube-apiserver.service.j2
        │   ├── kube-controller-manager.service.j2
        │   ├── kube-scheduler.service.j2
        │   ├── kubernetes-csr.json.j2
        │   └── token.csv.j2
    ├── kube-node
        ├── tasks
        │   └── main.yml
        └── templates
        │   ├── cni-default.conf.j2
        │   ├── kube-proxy-csr.json.j2
        │   ├── kube-proxy.service.j2
        │   └── kubelet.service.j2
    ├── kubectl
        ├── tasks
        │   └── main.yml
        └── templates
        │   └── admin-csr.json.j2
    ├── lb
        ├── tasks
        │   └── main.yml
        └── templates
        │   ├── haproxy.cfg.j2
        │   ├── haproxy.service.j2
        │   ├── keepalived-backup.conf.j2
        │   └── keepalived-master.conf.j2
    └── prepare
        ├── files
            └── 95-k8s-sysctl.conf
        └── tasks
            └── main.yml


/.gitignore:
--------------------------------------------------------------------------------
1 | down/*
2 | !down/download.sh
3 | bin/*
4 | !bin/VERSION.md
5 | hosts
6 | *.crt
7 | *.pem
8 | roles/prepare/files/ca*
9 | 


--------------------------------------------------------------------------------
/01.prepare.yml:
--------------------------------------------------------------------------------
 1 | # 在deploy节点生成CA相关证书，以及kubedns.yaml配置文件
 2 | - hosts: deploy
 3 |   roles:
 4 |   - deploy
 5 | 
 6 | # 集群节点的公共配置任务
 7 | - hosts:
 8 |   - kube-cluster
 9 |   - deploy
10 |   - etcd
11 |   - lb
12 |   roles:
13 |   - prepare
14 | 
15 | # [可选]多master部署时的负载均衡配置
16 | - hosts: lb
17 |   roles:
18 |   - lb
19 | 


--------------------------------------------------------------------------------
/02.etcd.yml:
--------------------------------------------------------------------------------
1 | - hosts: etcd
2 |   roles:
3 |   - etcd
4 | 


--------------------------------------------------------------------------------
/03.kubectl.yml:
--------------------------------------------------------------------------------
1 | - hosts: 
2 |   - kube-cluster
3 |   - deploy
4 |   roles:
5 |   - kubectl
6 | 


--------------------------------------------------------------------------------
/04.docker.yml:
--------------------------------------------------------------------------------
1 | - hosts: kube-cluster
2 |   gather_facts: True
3 |   roles:
4 |   - docker
5 | 


--------------------------------------------------------------------------------
/05.kube-master.yml:
--------------------------------------------------------------------------------
1 | - hosts: kube-master
2 |   roles:
3 |   - kube-master
4 | 


--------------------------------------------------------------------------------
/06.kube-node.yml:
--------------------------------------------------------------------------------
1 | - hosts: kube-node
2 |   roles:
3 |   - kube-node
4 | 


--------------------------------------------------------------------------------
/07.calico.yml:
--------------------------------------------------------------------------------
1 | - hosts: kube-cluster
2 |   roles:
3 |   - calico 
4 | 


--------------------------------------------------------------------------------
/07.flannel.yml:
--------------------------------------------------------------------------------
1 | - hosts: kube-cluster
2 |   roles:
3 |   - flannel 
4 | 


--------------------------------------------------------------------------------
/11.harbor.yml:
--------------------------------------------------------------------------------
 1 | - hosts: harbor
 2 |   roles:
 3 |   - prepare
 4 |   - docker
 5 |   - harbor
 6 | 
 7 | - hosts: kube-node
 8 |   tasks:
 9 |   - name: harbor证书目录创建
10 |     file: name=/etc/docker/certs.d/{{ HARBOR_DOMAIN }} state=directory
11 | 
12 |   - name: harbor服务器证书安装
13 |     copy: src={{ base_dir }}/roles/prepare/files/ca.pem dest=/etc/docker/certs.d/{{ HARBOR_DOMAIN }}/ca.crt
14 | 
15 | # 如果你的环境中有dns服务器，可以跳过hosts文件设置
16 |   - name: 增加harbor的hosts解析
17 |     shell: "sed -i '/{{ HARBOR_DOMAIN }}/d' /etc/hosts && \
18 | 	echo {{ HARBOR_IP }} {{ HARBOR_DOMAIN }} >> /etc/hosts"
19 | 


--------------------------------------------------------------------------------
/20.addnode.yml:
--------------------------------------------------------------------------------
 1 | - hosts: new-node
 2 |   roles:
 3 |   - prepare
 4 |   - kubectl
 5 |   - docker
 6 |   - calico
 7 |   - kube-node
 8 | 
 9 | - hosts: deploy
10 |   tasks:
11 |   - name: 批准新增node节点
12 |     shell: "sleep 15 && {{ bin_dir }}/kubectl get csr|grep 'Pending' | awk 'NR>0{print $1}'| xargs {{ bin_dir }}/kubectl certificate approve"
13 |     ignore_errors: true
14 | 


--------------------------------------------------------------------------------
/90.setup.yml:
--------------------------------------------------------------------------------
 1 | # 在deploy节点生成CA相关证书，以供整个集群使用
 2 | # 以及初始化kubedns.yaml配置文件
 3 | - hosts: deploy
 4 |   roles:
 5 |   - deploy
 6 | 
 7 | # 集群节点的公共配置任务
 8 | - hosts:
 9 |   - kube-cluster
10 |   - deploy
11 |   - etcd
12 |   - lb
13 |   roles:
14 |   - prepare
15 | 
16 | # [可选]多master部署时的负载均衡配置
17 | - hosts: lb
18 |   roles:
19 |   - lb
20 | 
21 | # 创建etcd集群
22 | - hosts: etcd
23 |   roles:
24 |   - etcd
25 | 
26 | # kubectl 客户端配置
27 | - hosts: 
28 |   - kube-cluster
29 |   - deploy
30 |   roles:
31 |   - kubectl
32 | 
33 | # docker服务安装
34 | - hosts: kube-cluster
35 |   roles:
36 |   - docker
37 | 
38 | # master 节点部署
39 | - hosts: kube-master
40 |   roles:
41 |   - kube-master
42 | 
43 | # node 节点部署
44 | - hosts: kube-node
45 |   roles:
46 |   - kube-node
47 | 
48 | # 集群网络插件部署，只能选择一种安装
49 | - hosts: kube-cluster
50 |   roles:
51 |   - { role: calico, when: "CLUSTER_NETWORK == 'calico'" }
52 |   - { role: flannel, when: "CLUSTER_NETWORK == 'flannel'" }
53 | 


--------------------------------------------------------------------------------
/99.clean.yml:
--------------------------------------------------------------------------------
  1 | # 警告：此脚本将清理整个K8S集群，包括所有POD、ETCD数据等
  2 | # 请三思后运行此脚本 ansible-playbook 99.clean.yml
  3 | 
  4 | - hosts:
  5 |   - kube-node
  6 |   - new-node
  7 |   tasks:
  8 |   - name: stop kube-node service
  9 |     shell: "systemctl stop kubelet kube-proxy"
 10 |     ignore_errors: true
 11 | 
 12 |   - name: umount kubelet 挂载的目录
 13 |     shell: "mount | grep '/var/lib/kubelet'| awk '{print $3}'|xargs umount"
 14 |     ignore_errors: true
 15 | 
 16 |   - name: 清理目录和文件
 17 |     file: name={{ item }} state=absent
 18 |     with_items:
 19 |     - "/var/lib/kubelet/"
 20 |     - "/var/lib/kube-proxy/"
 21 |     - "/etc/kubernetes/"
 22 |     - "/etc/systemd/system/kubelet.service"
 23 |     - "/etc/systemd/system/kube-proxy.service"
 24 |    # - "/root/local/bin/"
 25 | 
 26 | - hosts: kube-master
 27 |   tasks:
 28 |   - name: stop kube-master service
 29 |     shell: "systemctl stop kube-apiserver kube-controller-manager kube-scheduler"
 30 |     ignore_errors: true
 31 | 
 32 |   - name: 清理目录和文件
 33 |     file: name={{ item }} state=absent
 34 |     with_items:
 35 |     - "/var/run/kubernetes"
 36 |     - "/etc/systemd/system/kube-apiserver.service"
 37 |     - "/etc/systemd/system/kube-controller-manager.service"
 38 |     - "/etc/systemd/system/kube-scheduler.service"
 39 |     - "/etc/kubernetes/"
 40 | 
 41 | - hosts:
 42 |   - kube-cluster
 43 |   - new-node
 44 |   - deploy
 45 |   tasks:
 46 |   - name: stop docker service
 47 |     shell: "systemctl stop docker"
 48 |     ignore_errors: true
 49 | 
 50 |   # 因为calico-kube-controller使用了host网络，相当于使用了docker -net=host，需要
 51 |   # 卸载 /var/run/docker/netns/default
 52 |   - name: 卸载docker 相关fs1
 53 |     mount: path=/var/run/docker/netns/default state=unmounted
 54 | 
 55 |   - name: 卸载docker 相关fs2
 56 |     mount: path=/var/lib/docker/overlay state=unmounted
 57 | 
 58 |   - name: 清理目录和文件
 59 |     file: name={{ item }} state=absent
 60 |     with_items:
 61 |     - "/etc/cni/"
 62 |     - "/root/.kube/"
 63 |     - "/run/flannel/"
 64 |     - "/etc/calico/"
 65 |     - "/var/run/calico/"
 66 |     - "/var/log/calico/"
 67 |     - "/var/lib/docker/"
 68 |     - "/var/run/docker/"
 69 |     - "/etc/systemd/system/calico-node.service"
 70 |     - "/etc/systemd/system/docker.service"
 71 |     - "/etc/systemd/system/docker.service.requires/"
 72 | 
 73 |   - name: 清理 iptables
 74 |     shell: "iptables -F && iptables -X \
 75 | 	&& iptables -F -t nat && iptables -X -t nat \
 76 | 	&& iptables -F -t raw && iptables -X -t raw \
 77 | 	&& iptables -F -t mangle && iptables -X -t mangle"
 78 | 
 79 |   - name: 清理网络
 80 |     shell: "ip link del docker0; \
 81 | 	ip link del tunl0; \
 82 | 	ip link del flannel.1; \
 83 | 	ip link del cni0; \
 84 |         ip link del mynet0; \
 85 | 	systemctl restart networking; \
 86 | 	systemctl restart network"
 87 |     ignore_errors: true
 88 | 
 89 | - hosts: etcd
 90 |   tasks:
 91 |   - name: stop etcd service
 92 |     shell: systemctl stop etcd
 93 |     ignore_errors: true
 94 | 
 95 |   - name: 清理目录和文件
 96 |     file: name={{ item }} state=absent
 97 |     with_items:
 98 |     - "/var/lib/etcd"
 99 |     - "/etc/etcd/"
100 |     - "/etc/systemd/system/etcd.service"
101 | 
102 | - hosts: lb
103 |   tasks:
104 |   - name: stop keepalived service
105 |     shell: systemctl disable keepalived && systemctl stop keepalived
106 |     ignore_errors: true
107 | 
108 |   - name: stop haproxy service
109 |     shell: systemctl disable haproxy && systemctl stop haproxy
110 |     ignore_errors: true
111 | 
112 |   - name: 清理LB 配置文件目录
113 |     file: name={{ item }} state=absent
114 |     with_items:
115 |     - "/etc/haproxy"
116 |     - "/etc/keepalived"
117 |     ignore_errors: true
118 | 
119 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 | Version 2.0, January 2004
  3 | http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 | "License" shall mean the terms and conditions for use, reproduction, and
 10 | distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 | "Licensor" shall mean the copyright owner or entity authorized by the copyright
 13 | owner that is granting the License.
 14 | 
 15 | "Legal Entity" shall mean the union of the acting entity and all other entities
 16 | that control, are controlled by, or are under common control with that entity.
 17 | For the purposes of this definition, "control" means (i) the power, direct or
 18 | indirect, to cause the direction or management of such entity, whether by
 19 | contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
 20 | outstanding shares, or (iii) beneficial ownership of such entity.
 21 | 
 22 | "You" (or "Your") shall mean an individual or Legal Entity exercising
 23 | permissions granted by this License.
 24 | 
 25 | "Source" form shall mean the preferred form for making modifications, including
 26 | but not limited to software source code, documentation source, and configuration
 27 | files.
 28 | 
 29 | "Object" form shall mean any form resulting from mechanical transformation or
 30 | translation of a Source form, including but not limited to compiled object code,
 31 | generated documentation, and conversions to other media types.
 32 | 
 33 | "Work" shall mean the work of authorship, whether in Source or Object form, made
 34 | available under the License, as indicated by a copyright notice that is included
 35 | in or attached to the work (an example is provided in the Appendix below).
 36 | 
 37 | "Derivative Works" shall mean any work, whether in Source or Object form, that
 38 | is based on (or derived from) the Work and for which the editorial revisions,
 39 | annotations, elaborations, or other modifications represent, as a whole, an
 40 | original work of authorship. For the purposes of this License, Derivative Works
 41 | shall not include works that remain separable from, or merely link (or bind by
 42 | name) to the interfaces of, the Work and Derivative Works thereof.
 43 | 
 44 | "Contribution" shall mean any work of authorship, including the original version
 45 | of the Work and any modifications or additions to that Work or Derivative Works
 46 | thereof, that is intentionally submitted to Licensor for inclusion in the Work
 47 | by the copyright owner or by an individual or Legal Entity authorized to submit
 48 | on behalf of the copyright owner. For the purposes of this definition,
 49 | "submitted" means any form of electronic, verbal, or written communication sent
 50 | to the Licensor or its representatives, including but not limited to
 51 | communication on electronic mailing lists, source code control systems, and
 52 | issue tracking systems that are managed by, or on behalf of, the Licensor for
 53 | the purpose of discussing and improving the Work, but excluding communication
 54 | that is conspicuously marked or otherwise designated in writing by the copyright
 55 | owner as "Not a Contribution."
 56 | 
 57 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf
 58 | of whom a Contribution has been received by Licensor and subsequently
 59 | incorporated within the Work.
 60 | 
 61 | 2. Grant of Copyright License.
 62 | 
 63 | Subject to the terms and conditions of this License, each Contributor hereby
 64 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
 65 | irrevocable copyright license to reproduce, prepare Derivative Works of,
 66 | publicly display, publicly perform, sublicense, and distribute the Work and such
 67 | Derivative Works in Source or Object form.
 68 | 
 69 | 3. Grant of Patent License.
 70 | 
 71 | Subject to the terms and conditions of this License, each Contributor hereby
 72 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
 73 | irrevocable (except as stated in this section) patent license to make, have
 74 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where
 75 | such license applies only to those patent claims licensable by such Contributor
 76 | that are necessarily infringed by their Contribution(s) alone or by combination
 77 | of their Contribution(s) with the Work to which such Contribution(s) was
 78 | submitted. If You institute patent litigation against any entity (including a
 79 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a
 80 | Contribution incorporated within the Work constitutes direct or contributory
 81 | patent infringement, then any patent licenses granted to You under this License
 82 | for that Work shall terminate as of the date such litigation is filed.
 83 | 
 84 | 4. Redistribution.
 85 | 
 86 | You may reproduce and distribute copies of the Work or Derivative Works thereof
 87 | in any medium, with or without modifications, and in Source or Object form,
 88 | provided that You meet the following conditions:
 89 | 
 90 | You must give any other recipients of the Work or Derivative Works a copy of
 91 | this License; and
 92 | You must cause any modified files to carry prominent notices stating that You
 93 | changed the files; and
 94 | You must retain, in the Source form of any Derivative Works that You distribute,
 95 | all copyright, patent, trademark, and attribution notices from the Source form
 96 | of the Work, excluding those notices that do not pertain to any part of the
 97 | Derivative Works; and
 98 | If the Work includes a "NOTICE" text file as part of its distribution, then any
 99 | Derivative Works that You distribute must include a readable copy of the
100 | attribution notices contained within such NOTICE file, excluding those notices
101 | that do not pertain to any part of the Derivative Works, in at least one of the
102 | following places: within a NOTICE text file distributed as part of the
103 | Derivative Works; within the Source form or documentation, if provided along
104 | with the Derivative Works; or, within a display generated by the Derivative
105 | Works, if and wherever such third-party notices normally appear. The contents of
106 | the NOTICE file are for informational purposes only and do not modify the
107 | License. You may add Your own attribution notices within Derivative Works that
108 | You distribute, alongside or as an addendum to the NOTICE text from the Work,
109 | provided that such additional attribution notices cannot be construed as
110 | modifying the License.
111 | You may add Your own copyright statement to Your modifications and may provide
112 | additional or different license terms and conditions for use, reproduction, or
113 | distribution of Your modifications, or for any such Derivative Works as a whole,
114 | provided Your use, reproduction, and distribution of the Work otherwise complies
115 | with the conditions stated in this License.
116 | 
117 | 5. Submission of Contributions.
118 | 
119 | Unless You explicitly state otherwise, any Contribution intentionally submitted
120 | for inclusion in the Work by You to the Licensor shall be under the terms and
121 | conditions of this License, without any additional terms or conditions.
122 | Notwithstanding the above, nothing herein shall supersede or modify the terms of
123 | any separate license agreement you may have executed with Licensor regarding
124 | such Contributions.
125 | 
126 | 6. Trademarks.
127 | 
128 | This License does not grant permission to use the trade names, trademarks,
129 | service marks, or product names of the Licensor, except as required for
130 | reasonable and customary use in describing the origin of the Work and
131 | reproducing the content of the NOTICE file.
132 | 
133 | 7. Disclaimer of Warranty.
134 | 
135 | Unless required by applicable law or agreed to in writing, Licensor provides the
136 | Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
137 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
138 | including, without limitation, any warranties or conditions of TITLE,
139 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
140 | solely responsible for determining the appropriateness of using or
141 | redistributing the Work and assume any risks associated with Your exercise of
142 | permissions under this License.
143 | 
144 | 8. Limitation of Liability.
145 | 
146 | In no event and under no legal theory, whether in tort (including negligence),
147 | contract, or otherwise, unless required by applicable law (such as deliberate
148 | and grossly negligent acts) or agreed to in writing, shall any Contributor be
149 | liable to You for damages, including any direct, indirect, special, incidental,
150 | or consequential damages of any character arising as a result of this License or
151 | out of the use or inability to use the Work (including but not limited to
152 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or
153 | any and all other commercial damages or losses), even if such Contributor has
154 | been advised of the possibility of such damages.
155 | 
156 | 9. Accepting Warranty or Additional Liability.
157 | 
158 | While redistributing the Work or Derivative Works thereof, You may choose to
159 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or
160 | other liability obligations and/or rights consistent with this License. However,
161 | in accepting such obligations, You may act only on Your own behalf and on Your
162 | sole responsibility, not on behalf of any other Contributor, and only if You
163 | agree to indemnify, defend, and hold each Contributor harmless for any liability
164 | incurred by, or claims asserted against, such Contributor by reason of your
165 | accepting any such warranty or additional liability.
166 | 
167 | END OF TERMS AND CONDITIONS
168 | 
169 | APPENDIX: How to apply the Apache License to your work
170 | 
171 | To apply the Apache License to your work, attach the following boilerplate
172 | notice, with the fields enclosed by brackets "{}" replaced with your own
173 | identifying information. (Don't include the brackets!) The text should be
174 | enclosed in the appropriate comment syntax for the file format. We also
175 | recommend that a file or class name and description of purpose be included on
176 | the same "printed page" as the copyright notice for easier identification within
177 | third-party archives.
178 | 
179 |    Copyright 2017 jmgao
180 | 
181 |    Licensed under the Apache License, Version 2.0 (the "License");
182 |    you may not use this file except in compliance with the License.
183 |    You may obtain a copy of the License at
184 | 
185 |      http://www.apache.org/licenses/LICENSE-2.0
186 | 
187 |    Unless required by applicable law or agreed to in writing, software
188 |    distributed under the License is distributed on an "AS IS" BASIS,
189 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
190 |    See the License for the specific language governing permissions and
191 |    limitations under the License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 利用Ansible部署kubernetes集群
 2 | 
 3 | ![docker](./pics/docker.jpg) ![kube](./pics/kube.jpg) ![ansible](./pics/ansible.jpg)
 4 | 
 5 | 本系列文档致力于提供快速部署高可用`k8s`集群的工具，并且也努力成为`k8s`实践、使用的参考书；基于二进制方式部署和利用`ansible-playbook`实现自动化：既提供一键安装脚本，也可以分步执行安装各个组件，同时讲解每一步主要参数配置和注意事项；二进制方式部署有助于理解系统各组件的交互原理和熟悉组件启动参数，有助于快速排查解决实际问题。
 6 | 
 7 | **集群特性：`TLS` 双向认证、`RBAC` 授权、多`Master`高可用、支持`Network Policy`**
 8 | 
 9 | **注意：** 为提高集群网络插件安装的灵活性，使用`DaemonSet Pod`方式运行网络插件，目前支持`Calico` `flannel`可选
10 | 
11 | 文档基于`Ubuntu 16.04/CentOS 7`，其他系统需要读者自行替换部分命令；由于使用经验有限和简化脚本考虑，已经尽量避免`ansible-playbook`的高级特性和复杂逻辑。
12 | 
13 | 你可能需要掌握基本`kubernetes` `docker` `linux shell` 知识，关于`ansible`建议阅读 [ansible超快入门](http://weiweidefeng.blog.51cto.com/1957995/1895261) 基本够用。
14 | 
15 | 请阅读[项目分支说明](branch.md)，欢迎提`Issues`和`PRs`参与维护项目。
16 | 
17 | ## 组件版本
18 | 
19 | 1. kubernetes	v1.9.1
20 | 1. etcd		v3.2.13
21 | 1. docker	17.12.0-ce
22 | 1. calico/node	v2.6.5
23 | 1. flannel	v0.9.1
24 | 
25 | + 附：集群用到的所有二进制文件已打包好供下载 [https://pan.baidu.com/s/1c4RFaA](https://pan.baidu.com/s/1c4RFaA)
26 | + 注：`Kubernetes v1.8.x` 版本请切换到项目分支 `v1.8`, 若你需要从v1.8 升级至 v1.9，请参考 [升级注意](docs/upgrade.md)
27 | 
28 | ## 快速指南
29 | 
30 | 单机快速体验k8s集群的测试、开发环境--[AllinOne部署](docs/quickStart.md)；在国内的网络环境下要比官方的minikube方便、简单很多。
31 | 
32 | ## 安装步骤
33 | 
34 | 1. [集群规划和基础参数设定](docs/00-集群规划和基础参数设定.md)
35 | 1. [创建CA证书和环境配置](docs/01-创建CA证书和环境配置.md)
36 | 1. [安装etcd集群](docs/02-安装etcd集群.md)
37 | 1. [配置kubectl命令行工具](docs/03-配置kubectl命令行工具.md)
38 | 1. [安装docker服务](docs/04-安装docker服务.md)
39 | 1. [安装kube-master节点](docs/05-安装kube-master节点.md)
40 | 1. [安装kube-node节点](docs/06-安装kube-node节点.md)
41 | 1. [安装calico网络组件](docs/07-安装calico网络组件.md)
42 | 1. [安装flannel网络组件](docs/07-安装flannel网络组件.md)
43 | 
44 | ## 使用指南
45 | 
46 | - 常用插件部署  [kubedns](docs/guide/kubedns.md) [dashboard](docs/guide/dashboard.md) [heapster](docs/guide/heapster.md) [ingress](docs/guide/ingress.md) [efk](docs/guide/efk.md) [harbor](docs/guide/harbor.md)
47 | - K8S 特性实验  [HPA](docs/guide/hpa.md) [NetworkPolicy](docs/guide/networkpolicy.md)
48 | - 集群运维指南
49 | - 应用部署实践
50 | 
51 | 请根据这份 [目录](docs/guide/index.md) 阅读你所感兴趣的内容，尚在更新中...
52 | 
53 | ## 参考阅读
54 | 
55 | 1. 建议阅读 [rootsongjc-Kubernetes指南](https://github.com/rootsongjc/kubernetes-handbook) 原理和实践指南。
56 | 1. 建议阅读 [feisky-Kubernetes指南](https://github.com/feiskyer/kubernetes-handbook/blob/master/SUMMARY.md) 原理和部署章节。
57 | 1. 建议阅读 [opsnull-安装教程](https://github.com/opsnull/follow-me-install-kubernetes-cluster) 二进制手工部署。
58 | 
59 | ## 版权
60 | 
61 | Copyright 2017 gjmzj (jmgaozz@163.com)
62 | 
63 | Apache License 2.0，详情见 [LICENSE](LICENSE) 文件。
64 | 
65 | 如果觉得这份文档对你有帮助，请支付宝扫描下方的二维码进行捐赠，谢谢！
66 | 
67 | ![donate](./pics/alipay.png) 
68 | 


--------------------------------------------------------------------------------
/ansible.cfg:
--------------------------------------------------------------------------------
  1 | # config file for ansible -- http://ansible.com/
  2 | # ==============================================
  3 | 
  4 | # nearly all parameters can be overridden in ansible-playbook 
  5 | # or with command line flags. ansible will read ANSIBLE_CONFIG,
  6 | # ansible.cfg in the current working directory, .ansible.cfg in
  7 | # the home directory or /etc/ansible/ansible.cfg, whichever it
  8 | # finds first
  9 | 
 10 | [defaults]
 11 | 
 12 | # some basic default values...
 13 | 
 14 | #inventory      = /etc/ansible/hosts
 15 | #library        = /usr/share/my_modules/
 16 | #remote_tmp     = $HOME/.ansible/tmp
 17 | #forks          = 5
 18 | #poll_interval  = 15
 19 | #sudo_user      = root
 20 | #ask_sudo_pass = True
 21 | #ask_pass      = True
 22 | #transport      = smart
 23 | #remote_port    = 22
 24 | #module_lang    = C
 25 | 
 26 | # plays will gather facts by default, which contain information about
 27 | # the remote system.
 28 | #
 29 | # smart - gather by default, but don't regather if already gathered
 30 | # implicit - gather by default, turn off with gather_facts: False
 31 | # explicit - do not gather by default, must say gather_facts: True
 32 | gathering = explicit
 33 | 
 34 | # additional paths to search for roles in, colon separated
 35 | roles_path    = /etc/ansible/roles
 36 | 
 37 | # uncomment this to disable SSH key host checking
 38 | #host_key_checking = False
 39 | 
 40 | # change the default callback
 41 | #stdout_callback = skippy
 42 | # enable additional callbacks
 43 | #callback_whitelist = timer, mail
 44 | 
 45 | # change this for alternative sudo implementations
 46 | #sudo_exe = sudo
 47 | 
 48 | # What flags to pass to sudo
 49 | # WARNING: leaving out the defaults might create unexpected behaviours
 50 | #sudo_flags = -H -S -n
 51 | 
 52 | # SSH timeout
 53 | #timeout = 10
 54 | 
 55 | # default user to use for playbooks if user is not specified
 56 | # (/usr/bin/ansible will use current user as default)
 57 | #remote_user = root
 58 | 
 59 | # logging is off by default unless this path is defined
 60 | # if so defined, consider logrotate
 61 | #log_path = /var/log/ansible.log
 62 | 
 63 | # default module name for /usr/bin/ansible
 64 | #module_name = command
 65 | 
 66 | # use this shell for commands executed under sudo
 67 | # you may need to change this to bin/bash in rare instances
 68 | # if sudo is constrained
 69 | #executable = /bin/sh
 70 | 
 71 | # if inventory variables overlap, does the higher precedence one win
 72 | # or are hash values merged together?  The default is 'replace' but
 73 | # this can also be set to 'merge'.
 74 | #hash_behaviour = replace
 75 | 
 76 | # by default, variables from roles will be visible in the global variable
 77 | # scope. To prevent this, the following option can be enabled, and only
 78 | # tasks and handlers within the role will see the variables there
 79 | #private_role_vars = yes
 80 | 
 81 | # list any Jinja2 extensions to enable here:
 82 | #jinja2_extensions = jinja2.ext.do,jinja2.ext.i18n
 83 | 
 84 | # if set, always use this private key file for authentication, same as 
 85 | # if passing --private-key to ansible or ansible-playbook
 86 | #private_key_file = /path/to/file
 87 | 
 88 | # format of string {{ ansible_managed }} available within Jinja2
 89 | # templates indicates to users editing templates files will be replaced.
 90 | # replacing {file}, {host} and {uid} and strftime codes with proper values.
 91 | #ansible_managed = Ansible managed: {file} modified on %Y-%m-%d %H:%M:%S by {uid} on {host}
 92 | # This short version is better used in templates as it won't flag the file as changed every run.
 93 | #ansible_managed = Ansible managed: {file} on {host}
 94 | 
 95 | # by default, ansible-playbook will display "Skipping [host]" if it determines a task
 96 | # should not be run on a host.  Set this to "False" if you don't want to see these "Skipping" 
 97 | # messages. NOTE: the task header will still be shown regardless of whether or not the 
 98 | # task is skipped.
 99 | #display_skipped_hosts = True
100 | 
101 | # by default (as of 1.3), Ansible will raise errors when attempting to dereference 
102 | # Jinja2 variables that are not set in templates or action lines. Uncomment this line
103 | # to revert the behavior to pre-1.3.
104 | #error_on_undefined_vars = False
105 | 
106 | # by default (as of 1.6), Ansible may display warnings based on the configuration of the
107 | # system running ansible itself. This may include warnings about 3rd party packages or
108 | # other conditions that should be resolved if possible.
109 | # to disable these warnings, set the following value to False:
110 | #system_warnings = True
111 | 
112 | # by default (as of 1.4), Ansible may display deprecation warnings for language
113 | # features that should no longer be used and will be removed in future versions.
114 | # to disable these warnings, set the following value to False:
115 | #deprecation_warnings = True
116 | 
117 | # (as of 1.8), Ansible can optionally warn when usage of the shell and
118 | # command module appear to be simplified by using a default Ansible module
119 | # instead.  These warnings can be silenced by adjusting the following
120 | # setting or adding warn=yes or warn=no to the end of the command line 
121 | # parameter string.  This will for example suggest using the git module
122 | # instead of shelling out to the git command.
123 | # command_warnings = False
124 | 
125 | 
126 | # set plugin path directories here, separate with colons
127 | #action_plugins     = /usr/share/ansible/plugins/action
128 | #callback_plugins   = /usr/share/ansible/plugins/callback
129 | #connection_plugins = /usr/share/ansible/plugins/connection
130 | #lookup_plugins     = /usr/share/ansible/plugins/lookup
131 | #vars_plugins       = /usr/share/ansible/plugins/vars
132 | #filter_plugins     = /usr/share/ansible/plugins/filter
133 | #test_plugins       = /usr/share/ansible/plugins/test
134 | 
135 | # by default callbacks are not loaded for /bin/ansible, enable this if you
136 | # want, for example, a notification or logging callback to also apply to 
137 | # /bin/ansible runs
138 | #bin_ansible_callbacks = False
139 | 
140 | 
141 | # don't like cows?  that's unfortunate.
142 | # set to 1 if you don't want cowsay support or export ANSIBLE_NOCOWS=1 
143 | #nocows = 1
144 | 
145 | # set which cowsay stencil you'd like to use by default. When set to 'random',
146 | # a random stencil will be selected for each task. The selection will be filtered
147 | # against the `cow_whitelist` option below.
148 | #cow_selection = default
149 | #cow_selection = random
150 | 
151 | # when using the 'random' option for cowsay, stencils will be restricted to this list.
152 | # it should be formatted as a comma-separated list with no spaces between names.
153 | # NOTE: line continuations here are for formatting purposes only, as the INI parser
154 | #       in python does not support them.
155 | #cow_whitelist=bud-frogs,bunny,cheese,daemon,default,dragon,elephant-in-snake,elephant,eyes,\
156 | #              hellokitty,kitty,luke-koala,meow,milk,moofasa,moose,ren,sheep,small,stegosaurus,\
157 | #              stimpy,supermilker,three-eyes,turkey,turtle,tux,udder,vader-koala,vader,www
158 | 
159 | # don't like colors either?
160 | # set to 1 if you don't want colors, or export ANSIBLE_NOCOLOR=1
161 | #nocolor = 1
162 | 
163 | # if set to a persistent type (not 'memory', for example 'redis') fact values
164 | # from previous runs in Ansible will be stored.  This may be useful when
165 | # wanting to use, for example, IP information from one group of servers
166 | # without having to talk to them in the same playbook run to get their
167 | # current IP information.
168 | #fact_caching = memory
169 | 
170 | 
171 | # retry files
172 | # When a playbook fails by default a .retry file will be created in ~/
173 | # You can disable this feature by setting retry_files_enabled to False
174 | # and you can change the location of the files by setting retry_files_save_path
175 | 
176 | #retry_files_enabled = False
177 | #retry_files_save_path = ~/.ansible-retry
178 | 
179 | 
180 | # prevents logging of task data, off by default
181 | #no_log = False
182 | 
183 | # prevents logging of tasks, but only on the targets, data is still logged on the master/controller
184 | #no_target_syslog = False
185 | 
186 | # controls the compression level of variables sent to
187 | # worker processes. At the default of 0, no compression
188 | # is used. This value must be an integer from 0 to 9.
189 | #var_compression_level = 9
190 | 
191 | [privilege_escalation]
192 | #become=True
193 | #become_method=sudo
194 | #become_user=root
195 | #become_ask_pass=False
196 | 
197 | [paramiko_connection]
198 | 
199 | # uncomment this line to cause the paramiko connection plugin to not record new host
200 | # keys encountered.  Increases performance on new host additions.  Setting works independently of the
201 | # host key checking setting above.
202 | #record_host_keys=False
203 | 
204 | # by default, Ansible requests a pseudo-terminal for commands executed under sudo. Uncomment this
205 | # line to disable this behaviour.
206 | #pty=False
207 | 
208 | [ssh_connection]
209 | 
210 | # ssh arguments to use
211 | # Leaving off ControlPersist will result in poor performance, so use 
212 | # paramiko on older platforms rather than removing it
213 | #ssh_args = -o ControlMaster=auto -o ControlPersist=60s
214 | 
215 | # The path to use for the ControlPath sockets. This defaults to
216 | # "%(directory)s/ansible-ssh-%%h-%%p-%%r", however on some systems with
217 | # very long hostnames or very long path names (caused by long user names or 
218 | # deeply nested home directories) this can exceed the character limit on
219 | # file socket names (108 characters for most platforms). In that case, you 
220 | # may wish to shorten the string below.
221 | # 
222 | # Example: 
223 | # control_path = %(directory)s/%%h-%%r
224 | #control_path = %(directory)s/ansible-ssh-%%h-%%p-%%r
225 | 
226 | # Enabling pipelining reduces the number of SSH operations required to 
227 | # execute a module on the remote server. This can result in a significant 
228 | # performance improvement when enabled, however when using "sudo:" you must 
229 | # first disable 'requiretty' in /etc/sudoers
230 | #
231 | # By default, this option is disabled to preserve compatibility with
232 | # sudoers configurations that have requiretty (the default on many distros).
233 | # 
234 | #pipelining = False
235 | 
236 | # if True, make ansible use scp if the connection type is ssh 
237 | # (default is sftp)
238 | #scp_if_ssh = True
239 | 
240 | # if False, sftp will not use batch mode to transfer files. This may cause some
241 | # types of file transfer failures impossible to catch however, and should
242 | # only be disabled if your sftp version has problems with batch mode
243 | #sftp_batch_mode = False
244 | 
245 | [accelerate]
246 | #accelerate_port = 5099
247 | #accelerate_timeout = 30
248 | #accelerate_connect_timeout = 5.0
249 | 
250 | # The daemon timeout is measured in minutes. This time is measured
251 | # from the last activity to the accelerate daemon.
252 | #accelerate_daemon_timeout = 30 
253 | 
254 | # If set to yes, accelerate_multi_key will allow multiple
255 | # private keys to be uploaded to it, though each user must
256 | # have access to the system via SSH to add a new key. The default
257 | # is "no".
258 | #accelerate_multi_key = yes
259 | 
260 | [selinux]
261 | # file systems that require special treatment when dealing with security context
262 | # the default behaviour that copies the existing context or uses the user default
263 | # needs to be changed to use the file system dependent context.
264 | #special_context_filesystems=nfs,vboxsf,fuse,ramfs
265 | 


--------------------------------------------------------------------------------
/bin/VERSION.md:
--------------------------------------------------------------------------------
1 | # 主要组件版本
2 | 
3 | + kubernetes	v1.9.1
4 | + etcd		v3.2.13
5 | + docker	17.12.0-ce
6 | 


--------------------------------------------------------------------------------
/branch.md:
--------------------------------------------------------------------------------
1 | ## 项目分支说明
2 | 
3 | 目前项目分支为 `master` `v1.9` `v1.8`，说明如下：
4 | 
5 | - `master` 分支将尽量使用最新版k8s和相关组件，网络使用`DaemonSet Pod`方式安装，目前提供`calico` `flannel` 可选
6 | - `v1.9` 分支将尽量使用k8s v1.9的最新小版本和相关组件，使用`systemd service`方式安装 `calico`网络
7 | - `v1.8` 分支将尽量使用k8s v1.8的最新小版本和相关组件，使用`systemd service`方式安装 `calico`网络
8 | 


--------------------------------------------------------------------------------
/docs/00-集群规划和基础参数设定.md:
--------------------------------------------------------------------------------
  1 | ## 00-集群规划和基础参数设定.md
  2 | 
  3 | 多节点高可用集群部署步骤与[AllinOne部署](quickStart.md)基本一致，增加LB 负载均衡部署步骤。
  4 | 
  5 | ## 高可用集群所需节点配置如下：
  6 | + 部署节点	x1 : 运行这份 ansible 脚本的节点
  7 | + etcd节点	x3 : 注意etcd集群必须是1,3,5,7...奇数个节点
  8 | + master节点	x2 : 根据实际集群规模可以增加节点数，需要额外规划一个master VIP(虚地址)
  9 | + lb节点	x2 : 负载均衡节点两个，安装 haproxy+keepalived
 10 | + node节点	x3 : 真正应用负载的节点，根据需要增加机器配置和节点数
 11 | 
 12 | 生产环境使用建议一个节点只是一个角色，避免性能瓶颈问题，这里演示环境将节点绑定多个角色。项目预定义了3个例子，请修改后完成适合你的集群规划。
 13 | 
 14 | + [单节点](../example/hosts.allinone.example)
 15 | + [单主多节点](../example/hosts.s-master.example)
 16 | + [多主多节点](../example/hosts.m-masters.example)
 17 | 
 18 | ## 集群所用到的参数举例如下：
 19 | ``` bash
 20 | # ---------集群主要参数---------------
 21 | #集群 MASTER IP, 需要负载均衡，一般为VIP地址
 22 | MASTER_IP="192.168.1.10"
 23 | KUBE_APISERVER="https://192.168.1.10:8443"
 24 | 
 25 | #pause镜像地址
 26 | POD_INFRA_CONTAINER_IMAGE=mirrorgooglecontainers/pause-amd64:3.0
 27 | 
 28 | #TLS Bootstrapping 使用的 Token，使用 head -c 16 /dev/urandom | od -An -t x | tr -d ' ' 生成
 29 | BOOTSTRAP_TOKEN="c30302226d4b810e08731702d3890f50"
 30 | 
 31 | # 集群网络插件，目前支持calico和flannel
 32 | CLUSTER_NETWORK="calico"
 33 | 
 34 | # 部分calico相关配置，更全配置可以去roles/calico/templates/calico.yaml.j2自定义
 35 | # 设置 CALICO_IPV4POOL_IPIP=“off”,可以提高网络性能，条件限制详见 05.安装calico网络组件.md
 36 | CALICO_IPV4POOL_IPIP="always"
 37 | # 设置 calico-node使用的host IP，bgp邻居通过该地址建立，可手动指定端口"interface=eth0"或使用>如下自动发现
 38 | IP_AUTODETECTION_METHOD="can-reach=223.5.5.5"
 39 | 
 40 | # 部分flannel配置，详见roles/flannel/templates/kube-flannel.yaml.j2
 41 | FLANNEL_BACKEND="vxlan"
 42 | 
 43 | # 服务网段 (Service CIDR），部署前路由不可达，部署后集群内使用 IP:Port 可达
 44 | SERVICE_CIDR="10.68.0.0/16"
 45 | 
 46 | # POD 网段 (Cluster CIDR），部署前路由不可达，**部署后**路由可达
 47 | CLUSTER_CIDR="172.20.0.0/16"
 48 | 
 49 | # 服务端口范围 (NodePort Range)
 50 | NODE_PORT_RANGE="20000-40000"
 51 | 
 52 | # kubernetes 服务 IP (预分配，一般是 SERVICE_CIDR 中第一个IP)
 53 | CLUSTER_KUBERNETES_SVC_IP="10.68.0.1"
 54 | 
 55 | # 集群 DNS 服务 IP (从 SERVICE_CIDR 中预分配)
 56 | CLUSTER_DNS_SVC_IP="10.68.0.2"
 57 | 
 58 | # 集群 DNS 域名
 59 | CLUSTER_DNS_DOMAIN="cluster.local."
 60 | 
 61 | # etcd 集群间通信的IP和端口, **根据实际 etcd 集群成员设置**
 62 | ETCD_NODES="etcd1=https://192.168.1.1:2380,etcd2=https://192.168.1.2:2380,etcd3=https://192.168.1.3:2380"
 63 | 
 64 | # etcd 集群服务地址列表, **根据实际 etcd 集群成员设置**
 65 | ETCD_ENDPOINTS="https://192.168.1.1:2379,https://192.168.1.2:2379,https://192.168.1.3:2379"
 66 | 
 67 | # 集群basic auth 使用的用户名和密码
 68 | BASIC_AUTH_USER="admin"
 69 | BASIC_AUTH_PASS="test1234"
 70 | 
 71 | # ---------附加参数--------------------
 72 | #默认二进制文件目录
 73 | bin_dir="/root/local/bin"
 74 | 
 75 | #证书目录
 76 | ca_dir="/etc/kubernetes/ssl"
 77 | 
 78 | #部署目录，即 ansible 工作目录，建议不要修改
 79 | base_dir="/etc/ansible"
 80 | 
 81 | #私有仓库 harbor服务器 (域名或者IP)
 82 | #HARBOR_IP="192.168.1.8"
 83 | #HARBOR_DOMAIN="harbor.yourdomain.com"
 84 | ```
 85 | 
 86 | + 请事先规划好使用何种网络插件(calico flannel)，并配置对应网络插件的参数
 87 | 
 88 | ## 部署步骤
 89 | 
 90 | 按照[多主多节点](../example/hosts.m-masters.example)示例的节点配置，至少准备4台虚机，测试搭建一个多主高可用集群。
 91 | 
 92 | ### 1.基础系统配置
 93 | 
 94 | + 推荐内存2G/硬盘20G以上
 95 | + 最小化安装`Ubuntu 16.04 server`或者`CentOS 7 Minimal`
 96 | + 配置基础网络、更新源、SSH登陆等
 97 | 
 98 | ### 2.在每个节点安装依赖工具
 99 | 
100 | Ubuntu 16.04 请执行以下脚本:
101 | 
102 | ``` bash
103 | # 文档中脚本默认均以root用户执行
104 | apt-get update && apt-get upgrade -y && apt-get dist-upgrade -y
105 | # 删除不要的默认安装
106 | apt-get purge ufw lxd lxd-client lxcfs lxc-common
107 | # 安装python2
108 | apt-get install python2.7
109 | # Ubuntu16.04可能需要配置以下软连接
110 | ln -s /usr/bin/python2.7 /usr/bin/python
111 | ```
112 | CentOS 7 请执行以下脚本：
113 | 
114 | ``` bash
115 | # 文档中脚本默认均以root用户执行
116 | # 安装 epel 源并更新
117 | yum install epel-release -y
118 | yum update
119 | # 删除不要的默认安装
120 | yum erase firewalld firewalld-filesystem python-firewall -y
121 | # 安装python
122 | yum install python -y
123 | ```
124 | ### 3.在deploy节点安装及准备ansible
125 | 
126 | ``` bash
127 | # Ubuntu 16.04 
128 | apt-get install git python-pip -y
129 | # CentOS 7
130 | yum install git python-pip -y
131 | # pip安装ansible(国内如果安装太慢可以直接用pip阿里云加速)
132 | #pip install pip --upgrade
133 | #pip install ansible
134 | pip install pip --upgrade -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
135 | pip install --no-cache-dir ansible -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
136 | ```
137 | ### 4.在deploy节点配置免密码登陆
138 | 
139 | ``` bash
140 | ssh-keygen -t rsa -b 2048 回车 回车 回车
141 | ssh-copy-id $IPs #$IPs为所有节点地址包括自身，按照提示输入yes 和root密码
142 | ```
143 | ### 5.在deploy节点编排k8s安装
144 | 
145 | ``` bash
146 | # 下载项目文件
147 | git clone https://github.com/gjmzj/kubeasz.git
148 | mv kubeasz /etc/ansible
149 | # 下载已打包好的binaries，并且解压缩到/etc/ansible/bin目录
150 | # 国内请从我分享的百度云链接下载 https://pan.baidu.com/s/1c4RFaA 
151 | # 如果你有合适网络环境也可以按照/down/download.sh自行从官网下载各种tar包到 ./down目录，并执行download.sh
152 | tar zxvf k8s.191.tar.gz
153 | mv bin/* /etc/ansible/bin
154 | cd /etc/ansible
155 | cp example/hosts.m-masters.example hosts
156 | # 根据上文实际规划修改此hosts文件
157 | vi hosts
158 | ```
159 | + 验证ansible安装
160 | 
161 | 在deploy 节点使用如下命令
162 | 
163 | ``` bash
164 | ansible all -m ping
165 | ```
166 | 如果配置正确可以看到类似输出：
167 | 
168 | ``` text
169 | 192.168.1.42 | SUCCESS => {
170 |     "changed": false, 
171 |     "failed": false, 
172 |     "ping": "pong"
173 | }
174 | 192.168.1.43 | SUCCESS => {
175 |     "changed": false, 
176 |     "failed": false, 
177 |     "ping": "pong"
178 | }
179 | 192.168.1.44 | SUCCESS => {
180 |     "changed": false, 
181 |     "failed": false, 
182 |     "ping": "pong"
183 | }
184 | ```
185 | + 开始安装集群，请阅读每步安装讲解后执行分步安装
186 | 
187 | ``` bash
188 | #ansible-playbook 01.prepare.yml
189 | ###注意，如果后台进程有yum在运行，该命令会无限等待，不会超时退出并打印出错误，这个是Bug，需要手工将yum进程杀死。因为该任务里面需要将firewalld, firewalld-system，python-firewall三个组件使用yum命令删除掉，yum命令的缺陷。
190 | 
191 | #ansible-playbook 02.etcd.yml
192 | #ansible-playbook 03.kubectl.yml
193 | #ansible-playbook 04.docker.yml
194 | #ansible-playbook 05.kube-master.yml
195 | #ansible-playbook 06.kube-node.yml
196 | #ansible-playbook 07.calico.yml 或者 ansible-playbook 07.flannel.yml 只能选择一种网络插件
197 | #ansible-playbook 90.setup.yml # 一步安装
198 | ```
199 | 
200 | [前一篇](quickStart.md) -- [后一篇](01-创建CA证书和环境配置.md)
201 | 


--------------------------------------------------------------------------------
/docs/01-创建CA证书和环境配置.md:
--------------------------------------------------------------------------------
  1 | # 01-创建CA证书和环境配置.md
  2 | 
  3 | 本步骤[01.prepare.yml](../01.prepare.yml)主要完成CA证书创建、分发、环境变量、负载均衡配置等。
  4 | 
  5 | ### 创建 CA 证书和秘钥 
  6 | ``` bash
  7 | roles/deploy
  8 | ├── tasks
  9 | │   └── main.yml
 10 | └── templates
 11 |     ├── ca-config.json.j2
 12 |     └── ca-csr.json.j2
 13 | ```
 14 | kubernetes 系统各组件需要使用 TLS 证书对通信进行加密，使用 CloudFlare 的 PKI 工具集生成自签名的CA证书，用来签名后续创建的其它 TLS 证书。[参考阅读](https://coreos.com/os/docs/latest/generate-self-signed-certificates.html)
 15 | 
 16 | 根据认证对象可以将证书分成三类：服务器证书，客户端证书，对等证书 `peer cert`(表示既是`server cert`又是`client cert`)，在kubernetes 集群中需要的证书种类如下：
 17 | 
 18 | + `etcd` 节点需要标识自己监听服务的server cert，也需要client cert与`etcd`集群其他节点交互，当然可以分别指定2个证书，这里为简化使用一个peer 证书
 19 | + `kube-apiserver` 需要标识apiserver服务的server cert，也需要client cert 从而操作`etcd`集群，这里为简化使用一个peer 证书
 20 | + `kubectl` `calico` `kube-proxy` 只需要 client cert，因此证书请求中 hosts 字段可以为空
 21 | + `kubelet` 证书比较特殊，不是手动生成，它由node节点`TLS BootStrap` 向`apiserver`请求，由master节点的`controller-manager` 自动签发，包含一个client cert 和一个server cert
 22 | 
 23 | 请在另外窗口打开[roles/deploy/tasks/main.yml](../roles/deploy/tasks/main.yml) 文件，对照看以下讲解内容。
 24 | 
 25 | #### 创建 CA 配置文件 [ca-config.json.j2](../roles/deploy/templates/ca-config.json.j2)
 26 | ``` bash
 27 | {
 28 |   "signing": {
 29 |     "default": {
 30 |       "expiry": "87600h"
 31 |     },
 32 |     "profiles": {
 33 |       "kubernetes": {
 34 |         "usages": [
 35 |             "signing",
 36 |             "key encipherment",
 37 |             "server auth",
 38 |             "client auth"
 39 |         ],
 40 |         "expiry": "87600h"
 41 |       }
 42 |     }
 43 |   }
 44 | }
 45 | ```
 46 | + `ca-config.json`：可以定义多个 profiles，分别指定不同的过期时间、使用场景等参数；这里为了方便使用 `kubernetes` 这个profile 签发三种不同类型证书
 47 | + `signing`：表示该证书可用于签名其它证书；生成的 ca.pem 证书中 `CA=TRUE`；
 48 | + `server auth`：表示 client 可以用该 CA 对 server 提供的证书进行验证；
 49 | + `client auth`：表示 server 可以用该 CA 对 client 提供的证书进行验证；
 50 | 
 51 | #### 创建 CA 证书签名请求 [ca-csr.json.j2](../roles/deploy/templates/ca-csr.json.j2)
 52 | ``` bash
 53 | {
 54 |   "CN": "kubernetes",
 55 |   "key": {
 56 |     "algo": "rsa",
 57 |     "size": 2048
 58 |   },
 59 |   "names": [
 60 |     {
 61 |       "C": "CN",
 62 |       "ST": "HangZhou",
 63 |       "L": "XS",
 64 |       "O": "k8s",
 65 |       "OU": "System"
 66 |     }
 67 |   ]
 68 | }
 69 | ```
 70 | 
 71 | #### 生成CA 证书和私钥
 72 | ``` bash
 73 | cfssl gencert -initca ca-csr.json | cfssljson -bare ca
 74 | ```
 75 | + 注意整个集群只能有一个CA证书和配置文件，所以下一步要分发给每一个节点，包括calico/node也需要使用，`ansible` 角色(role) `prepare` 会完成CA 证书分发，所以把ca 证书相关先复制到 `roles/prepare/files/`
 76 | 
 77 | #### 准备分发 CA证书
 78 | 
 79 | ``` bash
 80 | - name: 准备分发 CA证书
 81 |   copy: src={{ ca_dir }}/{{ item }} dest={{ base_dir }}/roles/prepare/files/{{ item }} force=no
 82 |   with_items:
 83 |   - ca.pem
 84 |   - ca-key.pem
 85 |   - ca.csr
 86 |   - ca-config.json
 87 | ```
 88 | + force=no 保证整个安装的幂等性，如果已经生成过CA证书，就使用已经存在的CA，可以多次运行 `ansible-playbook 90.setup.yml` 
 89 | + 如果确实需要更新CA 证书，删除/roles/prepare/files/ca* 可以使用新CA 证书
 90 | 
 91 | ### kubedns.yaml 配置生成
 92 | 
 93 | + kubedns.yaml文件中部分参数(CLUSTER_DNS_SVC_IP, CLUSTER_DNS_DOMAIN)根据hosts文件设置而定，因此需要用template模块替换参数
 94 | + 运行本步骤后，在 manifests/kubedns目录下生成 kubedns.yaml 文件，以供后续部署时使用
 95 | 
 96 | ``` bash
 97 | roles/prepare/
 98 | ├── files
 99 | │   ├── 95-k8s-sysctl.conf
100 | │   ├── ca-config.json
101 | │   ├── ca.csr
102 | │   ├── ca-csr.json
103 | │   ├── ca-key.pem
104 | │   └── ca.pem
105 | └── tasks
106 |     └── main.yml
107 | ```
108 | 请在另外窗口打开[roles/prepare/tasks/main.yml](../roles/prepare/tasks/main.yml) 文件，比较简单直观
109 | 
110 | 1. 首先创建一些基础文件目录
111 | 1. 修改环境变量，把{{ bin_dir }} 添加到$PATH，需要重新登陆 shell生效
112 | 1. 把证书工具 CFSSL下发到指定节点
113 | 1. 把CA 证书相关下发到指定节点的 {{ ca_dir }} 目录
114 | 1. 最后设置基础操作系统软件和系统参数，请阅读脚本中的注释内容
115 | 
116 | ### LB 负载均衡部署
117 | ``` bash
118 | roles/lb
119 | ├── tasks
120 | │   └── main.yml
121 | └── templates
122 |     ├── haproxy.cfg.j2
123 |     ├── keepalived-backup.conf.j2
124 |     └── keepalived-master.conf.j2
125 | ```
126 | 
127 | Haproxy支持四层和七层负载，稳定性好，根据官方文档，HAProxy可以跑满10Gbps-New benchmark of HAProxy at 10 Gbps using Myricom's 10GbE NICs (Myri-10G PCI-Express)，这个作为软件级负载均衡，也是比较惊人的；另外，openstack高可用也有用haproxy的。
128 | 
129 | keepalived观其名可知，保持存活，它是基于VRRP协议保证所谓的高可用或热备的，这里用来预防haproxy的单点故障。
130 | 
131 | keepalived与haproxy配合，实现master的高可用过程如下：
132 | 
133 | + 1.keepalived利用vrrp协议生成一个虚拟地址(VIP)，正常情况下VIP存活在keepalive的主节点，当主节点故障时，VIP能够漂移到keepalived的备节点，保障VIP地址可用性。
134 | + 2.在keepalived的主备节点都配置相同haproxy负载配置，并且监听客户端请求在VIP的地址上，保障随时都有一个haproxy负载均衡在正常工作。并且keepalived启用对haproxy进程的存活检测，一旦主节点haproxy进程故障，VIP也能切换到备节点，从而让备节点的haproxy进行负载工作。
135 | + 3.在haproxy的配置中配置多个后端真实kube-apiserver的endpoints，并启用存活监测后端kube-apiserver，如果一个kube-apiserver故障，haproxy会将其剔除负载池。
136 | 
137 | 请在另外窗口打开[roles/lb/tasks/main.yml](../roles/lb/tasks/main.yml) 文件，对照看以下讲解内容。
138 | 
139 | #### 安装haproxy
140 | 
141 | + 使用apt源安装
142 | 
143 | #### 配置haproxy [haproxy.cfg.j2](../roles/lb/templates/haproxy.cfg.j2)
144 | ``` bash
145 | global
146 |         log /dev/log    local0
147 |         log /dev/log    local1 notice
148 |         chroot /var/lib/haproxy
149 |         stats socket /run/haproxy/admin.sock mode 660 level admin
150 |         stats timeout 30s
151 |         user haproxy
152 |         group haproxy
153 |         daemon
154 |         nbproc 1
155 | 
156 | defaults
157 |         log     global
158 |         timeout connect 5000
159 |         timeout client  50000
160 |         timeout server  50000
161 | 
162 | listen kube-master
163 |         bind 0.0.0.0:{{ MASTER_PORT }}
164 |         mode tcp
165 |         option tcplog
166 |         balance source
167 |         server s1 {{ LB_EP1 }}  check inter 10000 fall 2 rise 2 weight 1
168 |         server s2 {{ LB_EP2 }}  check inter 10000 fall 2 rise 2 weight 1
169 | ```
170 | 如果用apt安装的话，可以在/usr/share/doc/haproxy目录下找到配置指南configuration.txt.gz，全局和默认配置这里不展开，关注`listen` 代理设置模块，各项配置说明：
171 | + 名称 kube-master
172 | + bind 监听客户端请求的地址/端口，保证监听master的VIP地址和端口，{{ MASTER_PORT }}与hosts里面设置对应
173 | + mode 选择四层负载模式 (当然你也可以选择七层负载，请查阅指南，适当调整)
174 | + balance 选择负载算法 (负载算法也有很多供选择)
175 | + server 配置master节点真实的endpoits，必须与 [hosts文件](../example/hosts.m-masters.example)对应设置
176 | 
177 | #### 安装keepalived
178 | 
179 | + 使用apt源安装
180 | 
181 | #### 配置keepalived主节点 [keepalived-master.conf.j2](../roles/lb/templates/keepalived-master.conf.j2)
182 | ``` bash
183 | global_defs {
184 |     router_id lb-master
185 | }
186 | 
187 | vrrp_script check-haproxy {
188 |     script "killall -0 haproxy"
189 |     interval 5
190 |     weight -30
191 | }
192 | 
193 | vrrp_instance VI-kube-master {
194 |     state MASTER
195 |     priority 120
196 |     dont_track_primary
197 |     interface {{ LB_IF }}
198 |     virtual_router_id 51
199 |     advert_int 3
200 |     track_script {
201 |         check-haproxy
202 |     }
203 |     virtual_ipaddress {
204 |         {{ MASTER_IP }}
205 |     }
206 | }
207 | ```
208 | + vrrp_script 定义了监测haproxy进程的脚本，利用shell 脚本`killall -0 haproxy` 进行检测进程是否存活，如果进程不存在，根据`weight -30`设置将主节点优先级降低30，这样原先备节点将变成主节点。
209 | + vrrp_instance 定义了vrrp组，包括优先级、使用端口、router_id、心跳频率、检测脚本、虚拟地址VIP等
210 | + 特别注意 `virtual_router_id` 标识了一个 VRRP组，在同网段下必须唯一，否则出现 `Keepalived_vrrp: bogus VRRP packet received on eth0 !!!`类似报错
211 | 
212 | #### 配置keepalived备节点 [keepalived-backup.conf.j2](../roles/lb/templates/keepalived-backup.conf.j2)
213 | ``` bash
214 | global_defs {
215 |     router_id lb-backup
216 | }
217 | 
218 | vrrp_instance VI-kube-master {
219 |     state BACKUP
220 |     priority 110
221 |     dont_track_primary
222 |     interface {{ LB_IF }}
223 |     virtual_router_id 51
224 |     advert_int 3
225 |     virtual_ipaddress {
226 |         {{ MASTER_IP }}
227 |     }
228 | }
229 | ```
230 | + 备节点的配置类似主节点，除了优先级和检测脚本，其他如 `virtual_router_id` `advert_int` `virtual_ipaddress`必须与主节点一致
231 | 
232 | ### 启动 keepalived 和 haproxy 后验证
233 | 
234 | + lb 节点验证，假定 MASTER_PORT=8443
235 | 
236 | ``` bash
237 | systemctl status haproxy 	# 检查进程状态
238 | journalctl -u haproxy		# 检查进程日志是否有报错信息
239 | systemctl status keepalived 	# 检查进程状态
240 | journalctl -u keepalived	# 检查进程日志是否有报错信息
241 | netstat -antlp|grep 8443	# 检查tcp端口是否监听
242 | ```
243 | + 在 keepalived 主节点
244 | 
245 | ``` bash
246 | ip a				# 检查 master的 VIP地址是否存在
247 | ```
248 | ### keepalived 主备切换演练
249 | 
250 | 1. 尝试关闭 keepalived主节点上的 haproxy进程，然后在keepalived 备节点上查看 master的 VIP地址是否能够漂移过来，并依次检查上一步中的验证项。
251 | 1. 尝试直接关闭 keepalived 主节点系统，检查各验证项。
252 | 
253 | 
254 | [前一篇](00-集群规划和基础参数设定.md) -- [后一篇](02-安装etcd集群.md)
255 | 


--------------------------------------------------------------------------------
/docs/02-安装etcd集群.md:
--------------------------------------------------------------------------------
  1 | ## 02-安装etcd集群.md
  2 | 
  3 | ``` bash
  4 | roles/etcd
  5 | ├── tasks
  6 | │   └── main.yml
  7 | └── templates
  8 |     ├── etcd-csr.json.j2
  9 |     └── etcd.service.j2
 10 | ```
 11 | kuberntes 系统使用 etcd 存储所有数据，是最重要的组件之一，注意 etcd集群只能有奇数个节点(1,3,5...)，本文档使用3个节点做集群。
 12 | 
 13 | 请在另外窗口打开[roles/etcd/tasks/main.yml](../roles/etcd/tasks/main.yml) 文件，对照看以下讲解内容。
 14 | 
 15 | ### 下载etcd/etcdctl 二进制文件、创建证书目录
 16 | 
 17 | ### 创建etcd证书请求 [etcd-csr.json.j2](../roles/etcd/templates/etcd-csr.json.j2)
 18 | 
 19 | ``` bash
 20 | {
 21 |   "CN": "etcd",
 22 |   "hosts": [
 23 |     "127.0.0.1",
 24 |     "{{ NODE_IP }}"
 25 |   ],
 26 |   "key": {
 27 |     "algo": "rsa",
 28 |     "size": 2048
 29 |   },
 30 |   "names": [
 31 |     {
 32 |       "C": "CN",
 33 |       "ST": "HangZhou",
 34 |       "L": "XS",
 35 |       "O": "k8s",
 36 |       "OU": "System"
 37 |     }
 38 |   ]
 39 | }
 40 | ```
 41 | + hosts 字段指定授权使用该证书的 etcd 节点 IP
 42 | 
 43 | ### 创建证书和私钥
 44 | 
 45 | ``` bash
 46 | cd /etc/etcd/ssl && {{ bin_dir }}/cfssl gencert \
 47 |         -ca={{ ca_dir }}/ca.pem \
 48 |         -ca-key={{ ca_dir }}/ca-key.pem \
 49 |         -config={{ ca_dir }}/ca-config.json \
 50 |         -profile=kubernetes etcd-csr.json | {{ bin_dir }}/cfssljson -bare etcd
 51 | ```
 52 | 
 53 | ###  创建etcd 服务文件 [etcd.service.j2](../roles/etcd/templates/etcd.service.j2)
 54 | 
 55 | 先创建工作目录 /var/lib/etcd/
 56 | 
 57 | ``` bash
 58 | [Unit]
 59 | Description=Etcd Server
 60 | After=network.target
 61 | After=network-online.target
 62 | Wants=network-online.target
 63 | Documentation=https://github.com/coreos
 64 | 
 65 | [Service]
 66 | Type=notify
 67 | WorkingDirectory=/var/lib/etcd/
 68 | ExecStart={{ bin_dir }}/etcd \
 69 |   --name={{ NODE_NAME }} \
 70 |   --cert-file=/etc/etcd/ssl/etcd.pem \
 71 |   --key-file=/etc/etcd/ssl/etcd-key.pem \
 72 |   --peer-cert-file=/etc/etcd/ssl/etcd.pem \
 73 |   --peer-key-file=/etc/etcd/ssl/etcd-key.pem \
 74 |   --trusted-ca-file={{ ca_dir }}/ca.pem \
 75 |   --peer-trusted-ca-file={{ ca_dir }}/ca.pem \
 76 |   --initial-advertise-peer-urls=https://{{ NODE_IP }}:2380 \
 77 |   --listen-peer-urls=https://{{ NODE_IP }}:2380 \
 78 |   --listen-client-urls=https://{{ NODE_IP }}:2379,http://127.0.0.1:2379 \
 79 |   --advertise-client-urls=https://{{ NODE_IP }}:2379 \
 80 |   --initial-cluster-token=etcd-cluster-0 \
 81 |   --initial-cluster={{ ETCD_NODES }} \
 82 |   --initial-cluster-state=new \
 83 |   --data-dir=/var/lib/etcd
 84 | Restart=on-failure
 85 | RestartSec=5
 86 | LimitNOFILE=65536
 87 | 
 88 | [Install]
 89 | WantedBy=multi-user.target
 90 | ```
 91 | + 完整参数列表请使用 `etcd --help` 查询
 92 | + 注意etcd 即需要服务器证书也需要客户端证书，这里为方便使用一个peer 证书代替两个证书，更多证书相关请阅读 [01-创建CA证书和环境配置.md](01-创建CA证书和环境配置.md)
 93 | + 注意{{ }} 中的参数与ansible hosts文件中设置对应
 94 | + `--initial-cluster-state` 值为 `new` 时，`--name` 的参数值必须位于 `--initial-cluster` 列表中；
 95 | 
 96 | ### 启动etcd服务
 97 | 
 98 | ``` bash
 99 | systemctl daemon-reload && systemctl enable etcd && systemctl start etcd
100 | ```
101 | 
102 | ### 验证etcd集群状态
103 | 
104 | + systemctl status etcd 查看服务状态
105 | + journalctl -u etcd 查看运行日志
106 | + 在任一 etcd 集群节点上执行如下命令
107 | 
108 | ``` bash
109 | # 根据hosts中配置设置shell变量 $NODE_IPS
110 | export NODE_IPS="192.168.1.1 192.168.1.2 192.168.1.3"
111 | $ for ip in ${NODE_IPS}; do
112 |   ETCDCTL_API=3 /root/local/bin/etcdctl \
113 |   --endpoints=https://${ip}:2379  \
114 |   --cacert=/etc/kubernetes/ssl/ca.pem \
115 |   --cert=/etc/etcd/ssl/etcd.pem \
116 |   --key=/etc/etcd/ssl/etcd-key.pem \
117 |   endpoint health; done
118 | ```
119 | 预期结果：
120 | 
121 | ``` text
122 | https://192.168.1.1:2379 is healthy: successfully committed proposal: took = 2.210885ms
123 | https://192.168.1.2:2379 is healthy: successfully committed proposal: took = 2.784043ms
124 | https://192.168.1.3:2379 is healthy: successfully committed proposal: took = 3.275709ms
125 | ```
126 | 三台 etcd 的输出均为 healthy 时表示集群服务正常。
127 | 
128 | 
129 | [前一篇](01-创建CA证书和环境配置.md) -- [后一篇](03-配置kubectl命令行工具.md)
130 | 


--------------------------------------------------------------------------------
/docs/03-配置kubectl命令行工具.md:
--------------------------------------------------------------------------------
  1 | ## 03-配置kubectl命令行工具.md
  2 | 
  3 | kubectl使用~/.kube/config 配置文件与kube-apiserver进行交互，且拥有完全权限[可配置]，因此尽量避免安装在不必要的节点上，这里为了演示方便，将它安装在master/node/deploy节点。
  4 | `cat ~/.kube/config`可以看到配置文件包含 kube-apiserver 地址、证书、用户名等信息。
  5 | 
  6 | ``` bash
  7 | roles/kubectl
  8 | ├── tasks
  9 | │   └── main.yml
 10 | └── templates
 11 |     └── admin-csr.json.j2
 12 | ```
 13 | 请在另外窗口打开[roles/kubectl/tasks/main.yml](../roles/kubectl/tasks/main.yml) 文件，对照看以下讲解内容。
 14 | 
 15 | ### 准备kubectl使用的admin 证书签名请求 [admin-csr.json.j2](../roles/kubectl/templates/admin-csr.json.j2)
 16 | 
 17 | ``` bash
 18 | {
 19 |   "CN": "admin",
 20 |   "hosts": [],
 21 |   "key": {
 22 |     "algo": "rsa",
 23 |     "size": 2048
 24 |   },
 25 |   "names": [
 26 |     {
 27 |       "C": "CN",
 28 |       "ST": "HangZhou",
 29 |       "L": "XS",
 30 |       "O": "system:masters",
 31 |       "OU": "System"
 32 |     }
 33 |   ]
 34 | }
 35 | 
 36 | ```
 37 | + 后续我们在安装`master`节点时候会启用 `RBAC`特性，它在v1.8.x中已是稳定版本，推荐[RBAC官方文档](https://kubernetes.io/docs/admin/authorization/rbac/)
 38 | + 证书请求中 `O` 指定该证书的 Group 为 `system:masters`，而 `RBAC` 预定义的 `ClusterRoleBinding` 将 Group `system:masters` 与 ClusterRole `cluster-admin` 绑定，这就赋予了kubectl**所有集群权限**
 39 | 
 40 | kubectl get clusterrolebinding cluster-admin -o yaml
 41 | 
 42 | ``` bash
 43 | apiVersion: rbac.authorization.k8s.io/v1
 44 | kind: ClusterRoleBinding
 45 | metadata:
 46 |   annotations:
 47 |     rbac.authorization.kubernetes.io/autoupdate: "true"
 48 |   creationTimestamp: 2017-11-30T01:33:10Z
 49 |   labels:
 50 |     kubernetes.io/bootstrapping: rbac-defaults
 51 |   name: cluster-admin
 52 |   resourceVersion: "76"
 53 |   selfLink: /apis/rbac.authorization.k8s.io/v1/clusterrolebindings/cluster-admin
 54 |   uid: 6c9dd451-d56e-11e7-8ed6-525400103a5d
 55 | roleRef:
 56 |   apiGroup: rbac.authorization.k8s.io
 57 |   kind: ClusterRole
 58 |   name: cluster-admin
 59 | subjects:
 60 | - apiGroup: rbac.authorization.k8s.io
 61 |   kind: Group
 62 |   name: system:masters
 63 | ```
 64 | ### 创建admin 证书和私钥
 65 | 
 66 | ``` bash
 67 | cd {{ ca_dir }} && {{ bin_dir }}/cfssl gencert \
 68 |         -ca={{ ca_dir }}/ca.pem \
 69 |         -ca-key={{ ca_dir }}/ca-key.pem \
 70 |         -config={{ ca_dir }}/ca-config.json \
 71 |         -profile=kubernetes admin-csr.json | {{ bin_dir }}/cfssljson -bare admin
 72 | ```
 73 | ### 创建 kubectl kubeconfig 文件
 74 | 
 75 | #### 设置集群参数，指定CA证书和apiserver地址
 76 | 
 77 | ``` bash
 78 | {{ bin_dir }}/kubectl config set-cluster kubernetes \
 79 |         --certificate-authority={{ ca_dir }}/ca.pem \
 80 |         --embed-certs=true \
 81 |         --server={{ KUBE_APISERVER }}
 82 | ```
 83 | 
 84 | #### 设置客户端认证参数，指定使用admin证书和私钥
 85 | 
 86 | ``` bash
 87 | {{ bin_dir }}/kubectl config set-credentials admin \
 88 |         --client-certificate={{ ca_dir }}/admin.pem \
 89 |         --embed-certs=true \
 90 |         --client-key={{ ca_dir }}/admin-key.pem
 91 | ```
 92 | 
 93 | #### 设置上下文参数，说明使用cluster集群和用户admin
 94 | 
 95 | ``` bash
 96 | {{ bin_dir }}/kubectl config set-context kubernetes \
 97 |         --cluster=kubernetes --user=admin
 98 | ```
 99 | 
100 | #### 选择默认上下文
101 | 
102 | ``` bash
103 | {{ bin_dir }}/kubectl config use-context kubernetes
104 | ```
105 | + 注意{{ }}中参数与ansible hosts文件中设置对应
106 | + 以上生成的 kubeconfig 自动保存到 ~/.kube/config 文件
107 | 
108 | 
109 | [前一篇](02-安装etcd集群.md) -- [后一篇](04-安装docker服务.md)
110 | 


--------------------------------------------------------------------------------
/docs/04-安装docker服务.md:
--------------------------------------------------------------------------------
  1 | ## 04-安装docker服务.md
  2 | 
  3 | ``` bash
  4 | roles/docker/
  5 | ├── files
  6 | │   ├── daemon.json
  7 | │   ├── docker
  8 | │   └── docker-tag
  9 | ├── tasks
 10 | │   └── main.yml
 11 | └── templates
 12 |     └── docker.service.j2
 13 | ```
 14 | 
 15 | 请在另外窗口打开[roles/docker/tasks/main.yml](../roles/docker/tasks/main.yml) 文件，对照看以下讲解内容。
 16 | 
 17 | ### 创建docker的systemd unit文件 
 18 | 
 19 | ``` bash
 20 | [Unit]
 21 | Description=Docker Application Container Engine
 22 | Documentation=http://docs.docker.io
 23 | 
 24 | [Service]
 25 | Environment="PATH={{ bin_dir }}:/bin:/sbin:/usr/bin:/usr/sbin"
 26 | ExecStart={{ bin_dir }}/dockerd --log-level=error
 27 | ExecStartPost=/sbin/iptables -I FORWARD -s 0.0.0.0/0 -j ACCEPT
 28 | ExecReload=/bin/kill -s HUP $MAINPID
 29 | Restart=on-failure
 30 | RestartSec=5
 31 | LimitNOFILE=infinity
 32 | LimitNPROC=infinity
 33 | LimitCORE=infinity
 34 | Delegate=yes
 35 | KillMode=process
 36 | 
 37 | [Install]
 38 | WantedBy=multi-user.target
 39 | ```
 40 | + dockerd 运行时会调用其它 docker 命令，如 docker-proxy，所以需要将 docker 命令所在的目录加到 PATH 环境变量中；
 41 | + docker 从 1.13 版本开始，将`iptables` 的`filter` 表的`FORWARD` 链的默认策略设置为`DROP`，从而导致 ping 其它 Node 上的 Pod IP 失败，因此必须在 `filter` 表的`FORWARD` 链增加一条默认允许规则 `iptables -I FORWARD -s 0.0.0.0/0 -j ACCEPT`
 42 | + 运行`dockerd --help` 查看所有可以可配置参数，确保默认开启 `--iptables` 和 `--ip-masq` 选项
 43 | 
 44 | ### 配置国内镜像加速
 45 | 
 46 | 众所周知从国内下载docker官方仓库镜像非常缓慢，所以对于k8s集群来说配置镜像加速非常重要，配置 `/etc/docker/daemon.json`
 47 | 
 48 | ``` bash
 49 | {
 50 |   "registry-mirrors": ["https://registry.docker-cn.com"],
 51 |   "max-concurrent-downloads": 6
 52 | }
 53 | ```
 54 | 
 55 | 这将在后续部署calico下载 calico/node:v2.6.2镜像和kubedns/heapster/dashboard镜像时起到重要加速效果。
 56 | 
 57 | 由于K8S的官方镜像存放在`gcr.io`仓库，因此这个镜像加速对K8S的官方镜像没有效果；好在`Docker Hub`上有很多K8S镜像的转存，而`Docker Hub`上的镜像是国内可以加速的。
 58 | 这里推荐[mirrorgooglecontainers](https://hub.docker.com/u/mirrorgooglecontainers/)几乎能找到所有K8S相关的google镜像，而且更新及时，感谢维护者的辛勤付出！后文将看到部署附加组件时基本都是用他们的镜像。
 59 | 
 60 | 当然对于企业内部应用的docker镜像，想要在K8S平台运行的话，特别是结合开发`CI/CD` 流程，肯定是需要部署私有镜像仓库的，后续会简单提到 `Harbor`的部署。
 61 | 
 62 | ### 清理 iptables
 63 | 
 64 | 因为后续`calico`网络、`kube-proxy`等将大量使用 iptables规则，安装前清空所有`iptables`策略规则；常见发行版`Ubuntu`的 `ufw` 和 `CentOS`的 `firewalld`等基于`iptables`的防火墙最好直接卸载，避免不必要的冲突。
 65 | 
 66 | ``` bash
 67 | iptables -F && iptables -X \
 68 |         && iptables -F -t nat && iptables -X -t nat \
 69 |         && iptables -F -t raw && iptables -X -t raw \
 70 |         && iptables -F -t mangle && iptables -X -t mangle
 71 | ```
 72 | + calico 网络支持 `network-policy`，使用的`calico-kube-controllers` 会使用到`iptables` 所有的四个表 `filter` `nat` `raw` `mangle`，所以一并清理
 73 | 
 74 | ### 启动 docker 略
 75 | 
 76 | ### 可选-安装docker查询镜像 tag的小工具
 77 | 
 78 | docker官方目前没有提供在命令行直接查询某个镜像的tag信息的方式，网上找来一个脚本工具，使用很方便。
 79 | 
 80 | ``` bash
 81 | > docker-tag library/ubuntu
 82 | "14.04"
 83 | "16.04"
 84 | "17.04"
 85 | "latest"
 86 | "trusty"
 87 | "trusty-20171117"
 88 | "xenial"
 89 | "xenial-20171114"
 90 | "zesty"
 91 | "zesty-20171114"
 92 | >docker-tag mirrorgooglecontainers/kubernetes-dashboard-amd64
 93 | "v0.1.0"
 94 | "v1.0.0"
 95 | "v1.0.0-beta1"
 96 | "v1.0.1"
 97 | "v1.1.0-beta1"
 98 | "v1.1.0-beta2"
 99 | "v1.1.0-beta3"
100 | "v1.7.0"
101 | "v1.7.1"
102 | "v1.8.0"
103 | ``` 
104 | + 需要先apt安装轻量JSON处理程序 `jq`
105 | + 然后下载脚本即可使用
106 | + 脚本很简单，就一行命令如下
107 | 
108 | ``` bash
109 | #!/bin/bash
110 | curl -s -S "https://registry.hub.docker.com/v2/repositories/$@/tags/" | jq '."results"[]["name"]' |sort
111 | ```
112 | + 对于 CentOS7 安装 `jq` 稍微费力一点，需要启用 `EPEL` 源
113 | 
114 | ``` bash
115 | wget http://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
116 | rpm -ivh epel-release-latest-7.noarch.rpm
117 | yum install jq
118 | ```
119 | 
120 | ### 验证
121 | 
122 | 运行`ansible-playbook 04.docker.yml` 成功后可以验证
123 | 
124 | ``` bash
125 | systemctl status docker # 服务状态
126 | journalctl -u docker # 运行日志
127 | docker version
128 | docker info
129 | ```
130 | `iptables-save|grep FORWARD` 查看 iptables filter表 FORWARD链，最后要有一个 `-A FORWARD -j ACCEPT` 保底允许规则
131 | 
132 | ``` bash
133 | iptables-save|grep FORWARD
134 | :FORWARD ACCEPT [0:0]
135 | :FORWARD DROP [0:0]
136 | -A FORWARD -j DOCKER-USER
137 | -A FORWARD -j DOCKER-ISOLATION
138 | -A FORWARD -o docker0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
139 | -A FORWARD -o docker0 -j DOCKER
140 | -A FORWARD -i docker0 ! -o docker0 -j ACCEPT
141 | -A FORWARD -i docker0 -o docker0 -j ACCEPT
142 | -A FORWARD -j ACCEPT
143 | ```
144 | 
145 | [前一篇](03-配置kubectl命令行工具.md) -- [后一篇](05-安装kube-master节点.md)
146 | 


--------------------------------------------------------------------------------
/docs/05-安装kube-master节点.md:
--------------------------------------------------------------------------------
  1 | ## 05-安装kube-master节点.md
  2 | 
  3 | 部署master节点包含三个组件`apiserver` `scheduler` `controller-manager`，其中：
  4 | 
  5 | - apiserver提供集群管理的REST API接口，包括认证授权、数据校验以及集群状态变更等
  6 |   - 只有API Server才直接操作etcd
  7 |   - 其他模块通过API Server查询或修改数据
  8 |   - 提供其他模块之间的数据交互和通信的枢纽
  9 | - scheduler负责分配调度Pod到集群内的node节点
 10 |   - 监听kube-apiserver，查询还未分配Node的Pod
 11 |   - 根据调度策略为这些Pod分配节点
 12 | - controller-manager由一系列的控制器组成，它通过apiserver监控整个集群的状态，并确保集群处于预期的工作状态
 13 | 
 14 | master节点的高可用主要就是实现apiserver组件的高可用，在之前部署lb节点时候已经配置haproxy对它进行负载均衡。
 15 | 
 16 | ``` text
 17 | roles/kube-master/
 18 | ├── tasks
 19 | │   └── main.yml
 20 | └── templates
 21 |     ├── basic-auth.csv.j2
 22 |     ├── kube-apiserver.service.j2
 23 |     ├── kube-controller-manager.service.j2
 24 |     ├── kubernetes-csr.json.j2
 25 |     ├── kube-scheduler.service.j2
 26 |     └── token.csv.j2
 27 | ```
 28 | 
 29 | 请在另外窗口打开[roles/kube-master/tasks/main.yml](../roles/kube-master/tasks/main.yml) 文件，对照看以下讲解内容。
 30 | 
 31 | ### 创建 kubernetes 证书签名请求
 32 | 
 33 | ``` bash
 34 | {
 35 |   "CN": "kubernetes",
 36 |   "hosts": [
 37 |     "127.0.0.1",
 38 |     "{{ MASTER_IP }}",
 39 |     "{{ NODE_IP }}",
 40 |     "{{ CLUSTER_KUBERNETES_SVC_IP }}",
 41 |     "kubernetes",
 42 |     "kubernetes.default",
 43 |     "kubernetes.default.svc",
 44 |     "kubernetes.default.svc.cluster",
 45 |     "kubernetes.default.svc.cluster.local"
 46 |   ],
 47 |   "key": {
 48 |     "algo": "rsa",
 49 |     "size": 2048
 50 |   },
 51 |   "names": [
 52 |     {
 53 |       "C": "CN",
 54 |       "ST": "HangZhou",
 55 |       "L": "XS",
 56 |       "O": "k8s",
 57 |       "OU": "System"
 58 |     }
 59 |   ]
 60 | }
 61 | ```
 62 | - kubernetes 证书既是服务器证书，同时apiserver又作为客户端证书去访问etcd 集群；作为服务器证书需要设置hosts 指定使用该证书的IP 或域名列表，需要注意的是：
 63 |   - 多主高可用集群需要把master VIP地址 {{ MASTER_IP }} 也添加进去
 64 |   - `kubectl get svc` 将看到集群中由api-server 创建的默认服务 `kubernetes`，因此也要把 `kubernetes` 服务名和各个服务域名也添加进去
 65 | - 注意所有{{ }}变量与ansible hosts中设置的对应关系
 66 | 
 67 | ### 创建 token 认证配置
 68 | 
 69 | 因为手动为每个node节点配置TLS认证比较麻烦，后续apiserver会开启 experimental-bootstrap-token-auth 特性，利用 kubelet启动时的 token信息与此处token认证匹配认证，然后自动为 node颁发证书
 70 | 
 71 | ``` bash
 72 | {{ BOOTSTRAP_TOKEN }},kubelet-bootstrap,10001,"system:kubelet-bootstrap"
 73 | ```
 74 | 
 75 | ### 创建基础用户名/密码认证配置
 76 | 
 77 | 可选，为后续使用基础认证的场景做准备，如实现dashboard 用不同用户名登陆绑定不同的权限，后续更新dashboard的实践文档。
 78 | 
 79 | ### 创建apiserver的服务配置文件
 80 | 
 81 | ``` bash
 82 | [Unit]
 83 | Description=Kubernetes API Server
 84 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes
 85 | After=network.target
 86 | 
 87 | [Service]
 88 | ExecStart={{ bin_dir }}/kube-apiserver \
 89 |   --admission-control=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,ResourceQuota,NodeRestriction \
 90 |   --bind-address={{ NODE_IP }} \
 91 |   --insecure-bind-address=127.0.0.1 \
 92 |   --authorization-mode=Node,RBAC \
 93 |   --runtime-config=rbac.authorization.k8s.io/v1 \
 94 |   --kubelet-https=true \
 95 |   --anonymous-auth=false \
 96 |   --basic-auth-file={{ ca_dir }}/basic-auth.csv \
 97 |   --enable-bootstrap-token-auth \
 98 |   --token-auth-file={{ ca_dir }}/token.csv \
 99 |   --service-cluster-ip-range={{ SERVICE_CIDR }} \
100 |   --service-node-port-range={{ NODE_PORT_RANGE }} \
101 |   --tls-cert-file={{ ca_dir }}/kubernetes.pem \
102 |   --tls-private-key-file={{ ca_dir }}/kubernetes-key.pem \
103 |   --client-ca-file={{ ca_dir }}/ca.pem \
104 |   --service-account-key-file={{ ca_dir }}/ca-key.pem \
105 |   --etcd-cafile={{ ca_dir }}/ca.pem \
106 |   --etcd-certfile={{ ca_dir }}/kubernetes.pem \
107 |   --etcd-keyfile={{ ca_dir }}/kubernetes-key.pem \
108 |   --etcd-servers={{ ETCD_ENDPOINTS }} \
109 |   --enable-swagger-ui=true \
110 |   --allow-privileged=true \
111 |   --audit-log-maxage=30 \
112 |   --audit-log-maxbackup=3 \
113 |   --audit-log-maxsize=100 \
114 |   --audit-log-path=/var/lib/audit.log \
115 |   --event-ttl=1h \
116 |   --v=2
117 | Restart=on-failure
118 | RestartSec=5
119 | Type=notify
120 | LimitNOFILE=65536
121 | 
122 | [Install]
123 | WantedBy=multi-user.target
124 | ```
125 | + Kubernetes 对 API 访问需要依次经过认证、授权和准入控制(admission controll)，认证解决用户是谁的问题，授权解决用户能做什么的问题，Admission Control则是资源管理方面的作用。
126 | + 支持同时提供https（默认监听在6443端口）和http API（默认监听在127.0.0.1的8080端口），其中http API是非安全接口，不做任何认证授权机制，kube-scheduler、kube-controller-manager 一般和 kube-apiserver 部署在同一台机器上，它们使用非安全端口和 kube-apiserver通信; 其他集群外部就使用HTTPS访问 apiserver
127 | + 关于authorization-mode=Node,RBAC v1.7+支持Node授权，配合NodeRestriction准入控制来限制kubelet仅可访问node、endpoint、pod、service以及secret、configmap、PV和PVC等相关的资源；需要注意的是v1.7中Node 授权是默认开启的，v1.8中需要显式配置开启，否则 Node无法正常工作
128 | + 缺省情况下 kubernetes 对象保存在 etcd /registry 路径下，可以通过 --etcd-prefix 参数进行调整
129 | + 详细参数配置请参考`kube-apiserver --help`，关于认证、授权和准入控制请[阅读](https://github.com/feiskyer/kubernetes-handbook/blob/master/components/apiserver.md)
130 | 
131 | ### 创建controller-manager 的服务文件
132 | 
133 | ``` bash
134 | [Unit]
135 | Description=Kubernetes Controller Manager
136 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes
137 | 
138 | [Service]
139 | ExecStart={{ bin_dir }}/kube-controller-manager \
140 |   --address=127.0.0.1 \
141 |   --master=http://127.0.0.1:8080 \
142 |   --allocate-node-cidrs=true \
143 |   --service-cluster-ip-range={{ SERVICE_CIDR }} \
144 |   --cluster-cidr={{ CLUSTER_CIDR }} \
145 |   --cluster-name=kubernetes \
146 |   --cluster-signing-cert-file={{ ca_dir }}/ca.pem \
147 |   --cluster-signing-key-file={{ ca_dir }}/ca-key.pem \
148 |   --service-account-private-key-file={{ ca_dir }}/ca-key.pem \
149 |   --root-ca-file={{ ca_dir }}/ca.pem \
150 |   --leader-elect=true \
151 |   --v=2
152 | Restart=on-failure
153 | RestartSec=5
154 | 
155 | [Install]
156 | WantedBy=multi-user.target
157 | ```
158 | + --address 值必须为 127.0.0.1，因为当前 kube-apiserver 期望 scheduler 和 controller-manager 在同一台机器
159 | + --master=http://127.0.0.1:8080 使用非安全 8080 端口与 kube-apiserver 通信
160 | + --cluster-cidr 指定 Cluster 中 Pod 的 CIDR 范围，该网段在各 Node 间必须路由可达(calico 实现)
161 | + --service-cluster-ip-range 参数指定 Cluster 中 Service 的CIDR范围，必须和 kube-apiserver 中的参数一致
162 | + --cluster-signing-* 指定的证书和私钥文件用来签名为 TLS BootStrap 创建的证书和私钥
163 | + --root-ca-file 用来对 kube-apiserver 证书进行校验，指定该参数后，才会在Pod 容器的 ServiceAccount 中放置该 CA 证书文件
164 | + --leader-elect=true 使用多节点选主的方式选择主节点。只有主节点才会启动所有控制器，而其他从节点则仅执行选主算法
165 | 
166 | ### 创建scheduler 的服务文件
167 | 
168 | ``` bash
169 | [Unit]
170 | Description=Kubernetes Scheduler
171 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes
172 | 
173 | [Service]
174 | ExecStart={{ bin_dir }}/kube-scheduler \
175 |   --address=127.0.0.1 \
176 |   --master=http://127.0.0.1:8080 \
177 |   --leader-elect=true \
178 |   --v=2
179 | Restart=on-failure
180 | RestartSec=5
181 | 
182 | [Install]
183 | WantedBy=multi-user.target
184 | ```
185 | 
186 | + --address 同样值必须为 127.0.0.1
187 | + --master=http://127.0.0.1:8080 使用非安全 8080 端口与 kube-apiserver 通信
188 | + --leader-elect=true 部署多台机器组成的 master 集群时选举产生一个处于工作状态的 kube-controller-manager 进程
189 | 
190 | ### master 集群的验证
191 | 
192 | 运行 `ansible-playbook 06.kube-master.yml` 成功后，验证 master节点的主要组件：
193 | 
194 | ``` bash
195 | # 查看进程状态
196 | systemctl status kube-apiserver
197 | systemctl status kube-controller-manager
198 | systemctl status kube-scheduler
199 | # 查看进程运行日志
200 | journalctl -u kube-apiserver
201 | journalctl -u kube-controller-manager
202 | journalctl -u kube-scheduler
203 | ```
204 | 执行 `kubectl get componentstatus` 可以看到
205 | 
206 | ``` bash
207 | NAME                 STATUS    MESSAGE              ERROR
208 | scheduler            Healthy   ok                   
209 | controller-manager   Healthy   ok                   
210 | etcd-0               Healthy   {"health": "true"}   
211 | etcd-2               Healthy   {"health": "true"}   
212 | etcd-1               Healthy   {"health": "true"} 
213 | ```
214 | 
215 | [前一篇](04-安装docker服务.md) -- [后一篇](06-安装kube-node节点.md)
216 | 


--------------------------------------------------------------------------------
/docs/06-安装kube-node节点.md:
--------------------------------------------------------------------------------
  1 | ## 06-安装kube-node节点.md
  2 | 
  3 | `kube-node` 是集群中承载应用的节点，前置条件需要先部署好`kube-master`节点(因为需要操作`用户角色绑定`、`批准kubelet TLS 证书请求`等)，它需要部署如下组件：
  4 | 
  5 | + docker：运行容器
  6 | + calico： 配置容器网络 (或者 flannel)
  7 | + kubelet： kube-node上最主要的组件
  8 | + kube-proxy： 发布应用服务与负载均衡
  9 | 
 10 | ``` bash
 11 | roles/kube-node
 12 | ├── tasks
 13 | │   └── main.yml
 14 | └── templates
 15 |     ├── cni-default.conf.j2
 16 |     ├── kubelet.service.j2
 17 |     ├── kube-proxy-csr.json.j2
 18 |     └── kube-proxy.service.j2
 19 | ```
 20 | 
 21 | 请在另外窗口打开[roles/kube-node/tasks/main.yml](../roles/kube-node/tasks/main.yml) 文件，对照看以下讲解内容。
 22 | 
 23 | ### 创建角色绑定
 24 | 
 25 | kubelet 启动时向 kube-apiserver 发送 TLS bootstrapping 请求，需要先将 bootstrap token 文件中的 kubelet-bootstrap 用户赋予 system:node-bootstrapper 角色，然后 kubelet 才有权限创建认证请求
 26 | 
 27 | ``` bash
 28 | # 增加15秒延时是为了等待上一步kube-master 启动完全
 29 | "sleep 15 && {{ bin_dir }}/kubectl create clusterrolebinding kubelet-bootstrap \
 30 |         --clusterrole=system:node-bootstrapper --user=kubelet-bootstrap"
 31 | ```
 32 | 
 33 | ### 创建 bootstrapping kubeconfig 文件
 34 | 
 35 | ``` bash
 36 | #设置集群参数
 37 |   shell: "{{ bin_dir }}/kubectl config set-cluster kubernetes \
 38 |         --certificate-authority={{ ca_dir }}/ca.pem \
 39 |         --embed-certs=true \
 40 |         --server={{ KUBE_APISERVER }} \
 41 |         --kubeconfig=bootstrap.kubeconfig"
 42 | #设置客户端认证参数
 43 |   shell: "{{ bin_dir }}/kubectl config set-credentials kubelet-bootstrap \
 44 |         --token={{ BOOTSTRAP_TOKEN }} \
 45 |         --kubeconfig=bootstrap.kubeconfig"
 46 | #设置上下文参数
 47 |   shell: "{{ bin_dir }}/kubectl config set-context default \
 48 |         --cluster=kubernetes \
 49 |         --user=kubelet-bootstrap \
 50 |         --kubeconfig=bootstrap.kubeconfig"
 51 | #选择默认上下文
 52 |   shell: "{{ bin_dir }}/kubectl config use-context default --kubeconfig=bootstrap.kubeconfig"
 53 | ```
 54 | + 注意 kubelet bootstrapping认证时是靠 token的，后续由 `master`为其生成证书和私钥
 55 | + 以上生成的bootstrap.kubeconfig配置文件需要移动到/etc/kubernetes/目录下，后续在kubelet启动参数中指定该目录下的 bootstrap.kubeconfig
 56 | 
 57 | ### 创建cni 基础网络插件配置文件
 58 | 
 59 | 因为后续需要用 `DaemonSet Pod`方式运行k8s网络插件，所以kubelet.server服务必须开启cni相关参数，并且提供cni网络配置文件
 60 | 
 61 | ### 创建 kubelet 的服务文件
 62 | 
 63 | + 必须先创建工作目录 `/var/lib/kubelet`
 64 | 
 65 | ``` bash
 66 | [Unit]
 67 | Description=Kubernetes Kubelet
 68 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes
 69 | After=docker.service
 70 | Requires=docker.service
 71 | 
 72 | [Service]
 73 | WorkingDirectory=/var/lib/kubelet
 74 | #--pod-infra-container-image=registry.access.redhat.com/rhel7/pod-infrastructure:latest
 75 | ExecStart={{ bin_dir }}/kubelet \
 76 |   --address={{ NODE_IP }} \
 77 |   --hostname-override={{ NODE_IP }} \
 78 |   --pod-infra-container-image={{ POD_INFRA_CONTAINER_IMAGE }} \
 79 |   --experimental-bootstrap-kubeconfig=/etc/kubernetes/bootstrap.kubeconfig \
 80 |   --kubeconfig=/etc/kubernetes/kubelet.kubeconfig \
 81 |   --cert-dir={{ ca_dir }} \
 82 |   --network-plugin=cni \
 83 |   --cni-conf-dir=/etc/cni/net.d \
 84 |   --cni-bin-dir={{ bin_dir }} \
 85 |   --cluster-dns={{ CLUSTER_DNS_SVC_IP }} \
 86 |   --cluster-domain={{ CLUSTER_DNS_DOMAIN }} \
 87 |   --hairpin-mode hairpin-veth \
 88 |   --allow-privileged=true \
 89 |   --fail-swap-on=false \
 90 |   --logtostderr=true \
 91 |   --v=2
 92 | #kubelet cAdvisor 默认在所有接口监听 4194 端口的请求, 以下iptables限制内网访问
 93 | ExecStartPost=/sbin/iptables -A INPUT -s 10.0.0.0/8 -p tcp --dport 4194 -j ACCEPT
 94 | ExecStartPost=/sbin/iptables -A INPUT -s 172.16.0.0/12 -p tcp --dport 4194 -j ACCEPT
 95 | ExecStartPost=/sbin/iptables -A INPUT -s 192.168.0.0/16 -p tcp --dport 4194 -j ACCEPT
 96 | ExecStartPost=/sbin/iptables -A INPUT -p tcp --dport 4194 -j DROP
 97 | Restart=on-failure
 98 | RestartSec=5
 99 | 
100 | [Install]
101 | WantedBy=multi-user.target
102 | ```
103 | + --pod-infra-container-image 指定`基础容器`的镜像，负责创建Pod 内部共享的网络、文件系统等，这个基础容器非常重要：**K8S每一个运行的 POD里面必然包含这个基础容器**，如果它没有运行起来那么你的POD 肯定创建不了，kubelet日志里面会看到类似 ` FailedCreatePodSandBox` 错误，本项目集群常见 `SandBox` 容器起不来有两个原因：a. pause镜像没有下载到 b. calico/node 容器还没有正常运行，可用`docker ps -a` 验证
104 | + --experimental-bootstrap-kubeconfig 指向 bootstrap kubeconfig 文件，kubelet 使用该文件中的用户名和 token 向 kube-apiserver 发送 TLS Bootstrapping 请求
105 | + --cluster-dns 指定 kubedns 的 Service IP(可以先分配，后续创建 kubedns 服务时指定该 IP)，--cluster-domain 指定域名后缀，这两个参数同时指定后才会生效；
106 | + --network-plugin=cni --cni-conf-dir=/etc/cni/net.d --cni-bin-dir={{ bin_dir }} 为使用cni 网络，并调用calico管理网络所需的配置
107 | + --fail-swap-on=false K8S 1.8需显示禁用这个，否则服务不能启动
108 | 
109 | ### 批准kubelet 的 TLS 证书请求
110 | 
111 | ``` bash
112 | sleep 15 && {{ bin_dir }}/kubectl get csr|grep 'Pending' | awk 'NR>0{print $1}'| xargs {{ bin_dir }}/kubectl certificate approve
113 | ```
114 | + 增加15秒延时等待kubelet启动
115 | + `kubectl get csr |grep 'Pending'` 找出待批准的 TLS请求
116 | + `kubectl certificate approve` 批准请求
117 | 
118 | ### 创建 kube-proxy 证书请求
119 | 
120 | ``` bash
121 | {
122 |   "CN": "system:kube-proxy",
123 |   "hosts": [],
124 |   "key": {
125 |     "algo": "rsa",
126 |     "size": 2048
127 |   },
128 |   "names": [
129 |     {
130 |       "C": "CN",
131 |       "ST": "HangZhou",
132 |       "L": "XS",
133 |       "O": "k8s",
134 |       "OU": "System"
135 |     }
136 |   ]
137 | }
138 | ```
139 | + CN 指定该证书的 User 为 system:kube-proxy，预定义的 ClusterRoleBinding system:node-proxier 将User system:kube-proxy 与 Role system:node-proxier 绑定，授予了调用 kube-apiserver Proxy 相关 API 的权限；
140 | + kube-proxy 使用客户端证书可以不指定hosts 字段
141 | 
142 | ### 创建 kube-proxy kubeconfig 文件
143 | 
144 | ``` bash
145 | #设置集群参数
146 |   shell: "{{ bin_dir }}/kubectl config set-cluster kubernetes \
147 |         --certificate-authority={{ ca_dir }}/ca.pem \
148 |         --embed-certs=true \
149 |         --server={{ KUBE_APISERVER }} \
150 |         --kubeconfig=kube-proxy.kubeconfig"
151 | #设置客户端认证参数
152 |   shell: "{{ bin_dir }}/kubectl config set-credentials kube-proxy \
153 |         --client-certificate={{ ca_dir }}/kube-proxy.pem \
154 |         --client-key={{ ca_dir }}/kube-proxy-key.pem \
155 |         --embed-certs=true \
156 |         --kubeconfig=kube-proxy.kubeconfig"
157 | #设置上下文参数
158 |   shell: "{{ bin_dir }}/kubectl config set-context default \
159 |         --cluster=kubernetes \
160 |         --user=kube-proxy \
161 |         --kubeconfig=kube-proxy.kubeconfig"
162 | #选择默认上下文
163 |   shell: "{{ bin_dir }}/kubectl config use-context default --kubeconfig=kube-proxy.kubeconfig"
164 | ```
165 | + 生成的kube-proxy.kubeconfig 配置文件需要移动到/etc/kubernetes/目录，后续kube-proxy服务启动参数里面需要指定
166 | 
167 | ### 创建 kube-proxy服务文件
168 | 
169 | ``` bash
170 | [Unit]
171 | Description=Kubernetes Kube-Proxy Server
172 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes
173 | After=network.target
174 | 
175 | [Service]
176 | WorkingDirectory=/var/lib/kube-proxy
177 | ExecStart={{ bin_dir }}/kube-proxy \
178 |   --bind-address={{ NODE_IP }} \
179 |   --hostname-override={{ NODE_IP }} \
180 |   --kubeconfig=/etc/kubernetes/kube-proxy.kubeconfig \
181 |   --logtostderr=true \
182 |   --v=2
183 | Restart=on-failure
184 | RestartSec=5
185 | LimitNOFILE=65536
186 | 
187 | [Install]
188 | WantedBy=multi-user.target
189 | ```
190 | 
191 | + --hostname-override 参数值必须与 kubelet 的值一致，否则 kube-proxy 启动后会找不到该 Node，从而不会创建任何 iptables 规则
192 | + 特别注意：kube-proxy 根据 --cluster-cidr 判断集群内部和外部流量，指定 --cluster-cidr 或 --masquerade-all 选项后 kube-proxy 才会对访问 Service IP 的请求做 SNAT；但是这个特性与calico 实现 network policy冲突，所以如果要用 network policy，这两个选项都不要指定。
193 | 
194 | ### 验证 node 状态
195 | 
196 | ``` bash
197 | systemctl status kubelet	# 查看状态
198 | systemctl status kube-proxy
199 | journalctl -u kubelet		# 查看日志
200 | journalctl -u kube-proxy 
201 | ```
202 | 运行 `kubectl get node` 可以看到类似
203 | 
204 | ``` bash
205 | NAME           STATUS    ROLES     AGE       VERSION
206 | 192.168.1.42   Ready     <none>    2d        v1.9.0
207 | 192.168.1.43   Ready     <none>    2d        v1.9.0
208 | 192.168.1.44   Ready     <none>    2d        v1.9.0
209 | ```
210 | 
211 | 
212 | [前一篇](05-安装kube-master节点.md) -- [后一篇](07-安装calico网络组件.md)
213 | 


--------------------------------------------------------------------------------
/docs/07-安装calico网络组件.md:
--------------------------------------------------------------------------------
  1 | ## 07-安装calico网络组件.md
  2 | 
  3 | 推荐阅读[feiskyer-kubernetes指南](https://github.com/feiskyer/kubernetes-handbook) 网络相关内容
  4 | 
  5 | 首先回顾下K8S网络设计原则，在配置集群网络插件或者实践K8S 应用/服务部署请时刻想到这些原则：
  6 | 
  7 | - 1.每个Pod都拥有一个独立IP地址，Pod内所有容器共享一个网络命名空间
  8 | - 2.集群内所有Pod都在一个直接连通的扁平网络中，可通过IP直接访问
  9 |   - 所有容器之间无需NAT就可以直接互相访问
 10 |   - 所有Node和所有容器之间无需NAT就可以直接互相访问
 11 |   - 容器自己看到的IP跟其他容器看到的一样
 12 | - 3.Service cluster IP尽可在集群内部访问，外部请求需要通过NodePort、LoadBalance或者Ingress来访问
 13 | 
 14 | `Container Network Interface (CNI)`是目前CNCF主推的网络模型，它由两部分组成：
 15 | 
 16 | - CNI Plugin负责给容器配置网络，它包括两个基本的接口
 17 |   - 配置网络: AddNetwork(net *NetworkConfig, rt *RuntimeConf) (types.Result, error)
 18 |   - 清理网络: DelNetwork(net *NetworkConfig, rt *RuntimeConf) error
 19 | - IPAM Plugin负责给容器分配IP地址
 20 | 
 21 | Kubernetes Pod的网络是这样创建的：
 22 | - 0.每个Pod除了创建时指定的容器外，都有一个kubelet启动时指定的`基础容器`，比如：`mirrorgooglecontainers/pause-amd64` `registry.access.redhat.com/rhel7/pod-infrastructure`
 23 | - 1.首先 kubelet创建`基础容器`生成network namespace
 24 | - 2.然后 kubelet调用网络CNI driver，由它根据配置调用具体的CNI 插件
 25 | - 3.然后 CNI 插件给`基础容器`配置网络
 26 | - 4.最后 Pod 中其他的容器共享使用`基础容器`的网络
 27 | 
 28 | 本文档基于CNI driver 调用calico 插件来配置kubernetes的网络，常用CNI插件有 `flannel` `calico` `weave`等等，这些插件各有优势，也在互相借鉴学习优点，比如：在所有node节点都在一个二层网络时候，flannel提供hostgw实现，避免vxlan实现的udp封装开销，估计是目前最高效的；calico也针对L3 Fabric，推出了IPinIP的选项，利用了GRE隧道封装；因此这些插件都能适合很多实际应用场景，这里选择calico，主要考虑它支持 `kubernetes network policy`。
 29 | 
 30 | 推荐阅读[calico kubernetes guide](https://docs.projectcalico.org/v2.6/getting-started/kubernetes/)
 31 | 
 32 | calico-node需要在所有master节点和node节点安装 
 33 | 
 34 | ``` bash
 35 | roles/calico/
 36 | ├── tasks
 37 | │   └── main.yml
 38 | └── templates
 39 |     ├── calico-csr.json.j2
 40 |     ├── calicoctl.cfg.j2
 41 |     ├── calico-rbac.yaml.j2
 42 |     └── calico.yaml.j2
 43 | ```
 44 | 请在另外窗口打开[roles/calico/tasks/main.yml](../roles/calico/tasks/main.yml) 文件，对照看以下讲解内容。
 45 | 
 46 | ### 创建calico 证书申请
 47 | 
 48 | ``` bash
 49 | {
 50 |   "CN": "calico",
 51 |   "hosts": [],
 52 |   "key": {
 53 |     "algo": "rsa",
 54 |     "size": 2048
 55 |   },
 56 |   "names": [
 57 |     {
 58 |       "C": "CN",
 59 |       "ST": "HangZhou",
 60 |       "L": "XS",
 61 |       "O": "k8s",
 62 |       "OU": "System"
 63 |     }
 64 |   ]
 65 | }
 66 | ```
 67 | - calico 使用客户端证书，所以hosts字段可以为空；后续可以看到calico证书用在四个地方：
 68 |   - calico/node 这个docker 容器运行时访问 etcd 使用证书
 69 |   - cni 配置文件中，cni 插件需要访问 etcd 使用证书
 70 |   - calicoctl 操作集群网络时访问 etcd 使用证书
 71 |   - calico/kube-controllers 同步集群网络策略时访问 etcd 使用证书
 72 | 
 73 | ### 创建 calico DaemonSet yaml文件和rbac 文件
 74 | 
 75 | 请对照 roles/calico/templates/calico.yaml.j2文件注释和以下注意内容
 76 | 
 77 | + 详细配置参数请参考[calico官方文档](https://docs.projectcalico.org/v2.6/reference/node/configuration)
 78 | + calico-node是以docker容器运行在host上的，因此需要把之前的证书目录 /etc/calico/ssl挂载到容器中
 79 | + 配置ETCD_ENDPOINTS 、CA、证书等，所有{{ }}变量与ansible hosts文件中设置对应
 80 | + 配置集群POD网络 CALICO_IPV4POOL_CIDR={{ CLUSTER_CIDR }}
 81 | + **重要**本K8S集群运行在同网段kvm虚机上，虚机间没有网络ACL限制，因此可以设置`CALICO_IPV4POOL_IPIP=off`，如果你的主机位于不同网段，或者运行在公有云上需要打开这个选项 `CALICO_IPV4POOL_IPIP=always`
 82 | + 配置FELIX_DEFAULTENDPOINTTOHOSTACTION=ACCEPT 默认允许Pod到Node的网络流量，更多[felix配置选项](https://docs.projectcalico.org/v2.6/reference/felix/configuration)
 83 | 
 84 | ### 安装calico 网络
 85 | 
 86 | + 安装之前必须确保`kube-master`和`kube-node`节点已经成功部署
 87 | + 只需要在任意装有kubectl客户端的节点运行 `kubectl create `安装即可，脚本中选取`NODE_ID=node1`节点安装
 88 | + 等待15s后(视网络拉取calico相关镜像速度)，calico 网络插件安装完成，删除之前kube-node安装时默认cni网络配置
 89 | 
 90 | ### [可选]配置calicoctl工具 [calicoctl.cfg.j2](roles/calico/templates/calicoctl.cfg.j2)
 91 | 
 92 | ``` bash
 93 | apiVersion: v1
 94 | kind: calicoApiConfig
 95 | metadata:
 96 | spec:
 97 |   datastoreType: "etcdv2"
 98 |   etcdEndpoints: {{ ETCD_ENDPOINTS }}
 99 |   etcdKeyFile: /etc/calico/ssl/calico-key.pem
100 |   etcdCertFile: /etc/calico/ssl/calico.pem
101 |   etcdCACertFile: /etc/calico/ssl/ca.pem
102 | ```
103 | 
104 | ### 验证calico网络
105 | 
106 | 执行calico安装成功后可以验证如下：(需要等待镜像下载完成，有时候即便上一步已经配置了docker国内加速，还是可能比较慢，请确认以下容器运行起来以后，再执行后续验证步骤)
107 | 
108 | ``` bash
109 | kubectl get pod --all-namespaces
110 | NAMESPACE     NAME                                       READY     STATUS    RESTARTS   AGE
111 | kube-system   calico-kube-controllers-5c6b98d9df-xj2n4   1/1       Running   0          1m
112 | kube-system   calico-node-4hr52                          2/2       Running   0          1m
113 | kube-system   calico-node-8ctc2                          2/2       Running   0          1m
114 | kube-system   calico-node-9t8md                          2/2       Running   0          1m
115 | ```
116 | 
117 | **查看网卡和路由信息**
118 | 
119 | 先在集群创建几个测试pod:  `kubectl run test --image=busybox --replicas=3 sleep 30000`
120 | 
121 | ``` bash
122 | # 查看网卡信息
123 | ip a
124 | ```
125 | 
126 | + 可以看到包含类似cali1cxxx的网卡，是calico为测试pod生成的
127 | + tunl0网卡现在不用管，是默认生成的，当开启IPIP 特性时使用的隧道
128 | 
129 | ``` bash
130 | # 查看路由
131 | route -n
132 | Kernel IP routing table
133 | Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
134 | 0.0.0.0         192.168.1.1     0.0.0.0         UG    0      0        0 ens3
135 | 192.168.1.0     0.0.0.0         255.255.255.0   U     0      0        0 ens3
136 | 172.17.0.0      0.0.0.0         255.255.0.0     U     0      0        0 docker0
137 | 172.20.3.64     192.168.1.34    255.255.255.192 UG    0      0        0 ens3
138 | 172.20.33.128   0.0.0.0         255.255.255.192 U     0      0        0 *
139 | 172.20.33.129   0.0.0.0         255.255.255.255 UH    0      0        0 caliccc295a6d4f
140 | 172.20.104.0    192.168.1.35    255.255.255.192 UG    0      0        0 ens3
141 | 172.20.166.128  192.168.1.63    255.255.255.192 UG    0      0        0 ens3
142 | ```
143 | 
144 | **查看所有calico节点状态**
145 | 
146 | ``` bash
147 | calicoctl node status
148 | Calico process is running.
149 | 
150 | IPv4 BGP status
151 | +--------------+-------------------+-------+----------+-------------+
152 | | PEER ADDRESS |     PEER TYPE     | STATE |  SINCE   |    INFO     |
153 | +--------------+-------------------+-------+----------+-------------+
154 | | 192.168.1.34 | node-to-node mesh | up    | 12:34:00 | Established |
155 | | 192.168.1.35 | node-to-node mesh | up    | 12:34:00 | Established |
156 | | 192.168.1.63 | node-to-node mesh | up    | 12:34:01 | Established |
157 | +--------------+-------------------+-------+----------+-------------+
158 | ```
159 | 
160 | **BGP 协议是通过TCP 连接来建立邻居的，因此可以用netstat 命令验证 BGP Peer**
161 | 
162 | ``` bash
163 | netstat -antlp|grep ESTABLISHED|grep 179
164 | tcp        0      0 192.168.1.66:179        192.168.1.35:41316      ESTABLISHED 28479/bird      
165 | tcp        0      0 192.168.1.66:179        192.168.1.34:40243      ESTABLISHED 28479/bird      
166 | tcp        0      0 192.168.1.66:179        192.168.1.63:48979      ESTABLISHED 28479/bird
167 | ```
168 | 
169 | **查看集群ipPool情况**
170 | 
171 | ``` bash
172 | calicoctl get ipPool -o yaml
173 | - apiVersion: v1
174 |   kind: ipPool
175 |   metadata:
176 |     cidr: 172.20.0.0/16
177 |   spec:
178 |     nat-outgoing: true
179 | ```
180 | 
181 | [前一篇](06-安装kube-node节点.md) -- [后一篇]()
182 | 


--------------------------------------------------------------------------------
/docs/07-安装flannel网络组件.md:
--------------------------------------------------------------------------------
  1 | ## 07-安装flannel网络组件.md
  2 | 
  3 | ** 注意: ** 只需选择安装`calico` `flannel`其中之一，如果你已经安装了`calico`，请跳过此步骤。
  4 | 
  5 | 关于k8s网络设计和CNI Plugin的介绍请阅读[安装calico](07-安装calico网络组件.md)中相关内容。
  6 | 
  7 | `Flannel`是最早应用到k8s集群的网络插件之一，简单高效，且提供多个后端`backend`模式供选择；本文介绍以`DaemonSet Pod`方式集成到k8s集群，需要在所有master节点和node节点安装。
  8 | 
  9 | ``` text
 10 | roles/flannel/
 11 | ├── tasks
 12 | │   └── main.yml
 13 | └── templates
 14 |     └── kube-flannel.yaml.j2
 15 | ```
 16 | 
 17 | 请在另外窗口打开[roles/flannel/tasks/main.yml](../roles/flannel/tasks/main.yml) 文件，对照看以下讲解内容。
 18 | 
 19 | ### 下载基础cni 插件
 20 | 
 21 | 请到CNI 插件最新[release](https://github.com/containernetworking/plugins/releases)页面下载[cni-v0.6.0.tgz](https://github.com/containernetworking/plugins/releases/download/v0.6.0/cni-v0.6.0.tgz)，解压后里面有很多插件，选择如下几个复制到项目 `bin`目录下
 22 | 
 23 | - flannel用到的插件
 24 |   - bridge
 25 |   - flannel
 26 |   - host-local
 27 |   - loopback
 28 |   - portmap
 29 | 
 30 | Flannel CNI 插件的配置文件可以包含多个`plugin` 或由其调用其他`plugin`；`Flannel DaemonSet Pod`运行以后会生成`/run/flannel/subnet.env `文件，例如：
 31 | 
 32 | ``` bash
 33 | FLANNEL_NETWORK=10.1.0.0/16
 34 | FLANNEL_SUBNET=10.1.17.1/24
 35 | FLANNEL_MTU=1472
 36 | FLANNEL_IPMASQ=true
 37 | ```
 38 | 然后它利用这个文件信息去配置和调用`bridge`插件来生成容器网络，调用`host-local`来管理`IP`地址，例如：
 39 | 
 40 | ``` bash
 41 | {
 42 | 	"name": "mynet",
 43 | 	"type": "bridge",
 44 | 	"mtu": 1472,
 45 | 	"ipMasq": false,
 46 | 	"isGateway": true,
 47 | 	"ipam": {
 48 | 		"type": "host-local",
 49 | 		"subnet": "10.1.17.0/24"
 50 | 	}
 51 | }
 52 | ```
 53 | - 更多相关介绍请阅读：
 54 |   - [flannel kubernetes 集成](https://github.com/coreos/flannel/blob/master/Documentation/kubernetes.md)
 55 |   - [flannel cni 插件](https://github.com/containernetworking/plugins/tree/master/plugins/meta/flannel)
 56 |   - [更多 cni 插件](https://github.com/containernetworking/plugins)
 57 | 
 58 | ### 准备`Flannel DaemonSet` yaml配置文件
 59 | 
 60 | 请阅读 `roles/flannel/templates/kube-flannel.yaml.j2` 内容，注意：
 61 | 
 62 | + 本安装方式，flannel使用apiserver 存储数据，而不是 etcd
 63 | + 配置相关RBAC 权限和 `service account`
 64 | + 配置`ConfigMap`包含 CNI配置和 flannel配置(指定backend等)，和`hosts`文件中相关设置对应
 65 | + `DaemonSet Pod`包含两个容器，一个容器运行flannel本身，另一个init容器部署cni 配置文件
 66 | + 为方便国内加速使用镜像 `jmgao1983/flannel:v0.9.1-amd64` (官方镜像在docker-hub上的转存)
 67 | 
 68 | ### 安装 flannel网络
 69 | 
 70 | + 安装之前必须确保kube-master和kube-node节点已经成功部署
 71 | + 只需要在任意装有kubectl客户端的节点运行 kubectl create安装即可，脚本中选取NODE_ID=node1节点安装
 72 | + 等待15s后(视网络拉取相关镜像速度)，flannel 网络插件安装完成，删除之前kube-node安装时默认cni网络配置
 73 | 
 74 | ### 验证flannel网络
 75 | 
 76 | 执行flannel安装成功后可以验证如下：(需要等待镜像下载完成，有时候即便上一步已经配置了docker国内加速，还是可能比较慢，请确认以下容器运行起来以后，再执行后续验证步骤)
 77 | 
 78 | ``` bash
 79 | # kubectl get pod --all-namespaces
 80 | NAMESPACE     NAME                    READY     STATUS    RESTARTS   AGE
 81 | kube-system   kube-flannel-ds-m8mzm   1/1       Running   0          3m
 82 | kube-system   kube-flannel-ds-mnj6j   1/1       Running   0          3m
 83 | kube-system   kube-flannel-ds-mxn6k   1/1       Running   0          3m
 84 | ```
 85 | 在集群创建几个测试pod:  `kubectl run test --image=busybox --replicas=3 sleep 30000`
 86 | 
 87 | ``` bash
 88 | # kubectl get pod --all-namespaces -o wide|head -n 4
 89 | NAMESPACE     NAME                    READY     STATUS    RESTARTS   AGE       IP             NODE
 90 | default       busy-5956b54c8b-ld4gb   1/1       Running   0          9m        172.20.2.7     192.168.1.1
 91 | default       busy-5956b54c8b-lj9l9   1/1       Running   0          9m        172.20.1.5     192.168.1.2
 92 | default       busy-5956b54c8b-wwpkz   1/1       Running   0          9m        172.20.0.6     192.168.1.3
 93 | 
 94 | # 查看路由
 95 | # ip route
 96 | default via 192.168.1.254 dev ens3 onlink 
 97 | 192.168.1.0/24 dev ens3  proto kernel  scope link  src 192.168.1.1 
 98 | 172.17.0.0/16 dev docker0  proto kernel  scope link  src 172.17.0.1 linkdown 
 99 | 172.20.0.0/24 via 192.168.1.3 dev ens3 
100 | 172.20.1.0/24 via 192.168.1.2 dev ens3 
101 | 172.20.2.0/24 dev cni0  proto kernel  scope link  src 172.20.2.1 
102 | ```
103 | 现在各节点上分配 ping 这三个POD网段地址，确保能通：
104 | 
105 | ``` bash
106 | ping 172.20.2.7
107 | ping 172.20.1.5
108 | ping 172.20.0.6
109 | ```
110 | 
111 | [前一篇](06-安装kube-node节点.md) -- [后一篇]()
112 | 


--------------------------------------------------------------------------------
/docs/guide/dashboard.md:
--------------------------------------------------------------------------------
  1 | ## dashboard
  2 | 
  3 | 本文档基于 dashboard 1.6.3版本，目前 dashboard 已出 1.8.0版本，在安全控制方面已有很大改进，后续更新新版本说明。
  4 | 
  5 | ### 部署
  6 | 
  7 | 配置文件参考 `https://github.com/kubernetes/kubernetes` 项目目录 `kubernetes/cluster/addons/dashboard`
  8 | 
  9 | 安装很简单 `kubectl create -f manifests/dashboard/`，主要讲解一些注意事项
 10 | 
 11 | 请在另外窗口打开 [kubernetes-dashboard.yaml](../../manifests/dashboard/kubernetes-dashboard.yaml)
 12 | 
 13 | + 由于 kube-apiserver 启用了 RBAC授权，dashboard使用的 ServiceAccount `kubernetes-dashboard` 必须有相应的权限去访问apiserver(在新版本1.8.0中，该访问权限已按最小化方式授权)，在1.6.3 版本，先粗放一点，把`kubernetes-dashboard` 与 集群角色 `cluster-admin` 绑定，这样dashboard就拥有了所有访问apiserver的权限。
 14 | + 开发测试环境为了方便配置dashboard-service时候，指定了 `NodePort`方式暴露服务，这样集群外部可以使用 `http://NodeIP:NodePort` 方式直接访问 dashboard，生产环境建议关闭该访问途径。
 15 | 
 16 | ### 验证
 17 | 
 18 | ``` bash
 19 | # 查看pod 运行状态
 20 | kubectl get pod -n kube-system | grep dashboard
 21 | kubernetes-dashboard-86bd8778bf-w4974      1/1       Running   0          12h
 22 | # 查看dashboard service
 23 | kubectl get svc -n kube-system|grep dashboard
 24 | kubernetes-dashboard   NodePort    10.68.7.67      <none>        80:5452/TCP	12h
 25 | # 查看集群服务
 26 | kubectl cluster-info|grep dashboard
 27 | kubernetes-dashboard is running at https://192.168.1.10:6443/api/v1/namespaces/kube-system/services/kubernetes-dashboard/proxy
 28 | # 查看pod 运行日志，关注有没有错误
 29 | kubectl logs kubernetes-dashboard-86bd8778bf-w4974 -n kube-system
 30 | ```
 31 | 
 32 | ### 访问
 33 | 
 34 | 因为dashboard 作为k8s 原生UI，能够展示各种资源信息，甚至可以有修改、增加、删除权限，所以有必要对访问进行认证和控制，本项目预置部署的集群有以下安全设置：详见 [apiserver配置模板](../../roles/kube-master/templates/kube-apiserver.service.j2)
 35 | 
 36 | + 启用 `TLS认证` `RBAC授权`等安全特性
 37 | + 关闭 apiserver非安全端口8080的外部访问`--insecure-bind-address=127.0.0.1`
 38 | + 关闭匿名认证`--anonymous-auth=false`
 39 | + 补充启用基本密码认证 `--basic-auth-file=/etc/kubernetes/ssl/basic-auth.csv`，[密码文件模板](../../roles/kube-master/templates/basic-auth.csv.j2)中按照每行(密码,用户名,序号)的格式，可以定义多个用户
 40 | 
 41 | #### 1. 临时访问：使用 `http://NodeIP:NodePort` 方式直接访问 dashboard，生产环境建议关闭该途径
 42 | 
 43 | #### 2. 用户+密码访问：安全性比证书方式差点，务必保管好密码文件`basic-auth.csv`
 44 | 
 45 | - 这里演示两种权限，使用admin 登陆dashboard拥有所有权限，使用readonly 登陆后仅查看权限，首先在 master节点文件 `/etc/kubernetes/ssl/basic-auth.csv` 确认用户名和密码，如果要增加或者修改用户，修改保存该文件后记得逐个重启你的master 节点
 46 | - 为了演示用户密码访问，如果你已经完成证书访问方式，你可以在浏览器删除证书，或者访问时候浏览器询问你证书时不选证书
 47 | - 2.1 设置用户admin 的RBAC 权限，如下运行配置文件 `kubectl create -f ui-admin-rbac.yaml`
 48 | 
 49 | ``` bash
 50 | kind: ClusterRole
 51 | apiVersion: rbac.authorization.k8s.io/v1
 52 | metadata:
 53 |   name: ui-admin
 54 | rules:
 55 | - apiGroups:
 56 |   - ""
 57 |   resources:
 58 |   - services
 59 |   - services/proxy
 60 |   verbs:
 61 |   - '*'
 62 | 
 63 | ---
 64 | apiVersion: rbac.authorization.k8s.io/v1
 65 | kind: RoleBinding
 66 | metadata:
 67 |   name: ui-admin-binding
 68 |   namespace: kube-system
 69 | roleRef:
 70 |   apiGroup: rbac.authorization.k8s.io
 71 |   kind: ClusterRole
 72 |   name: ui-admin
 73 | subjects:
 74 | - apiGroup: rbac.authorization.k8s.io
 75 |   kind: User
 76 |   name: admin
 77 | ```  
 78 | - 2.2 设置用户readonly 的RBAC 权限，如下运行配置文件 `kubectl create -f ui-read-rbac.yaml`
 79 | 
 80 | ``` bash
 81 | kind: ClusterRole
 82 | apiVersion: rbac.authorization.k8s.io/v1
 83 | metadata:
 84 |   name: ui-read
 85 | rules:
 86 | - apiGroups:
 87 |   - ""
 88 |   resources:
 89 |   - services
 90 |   - services/proxy
 91 |   verbs:
 92 |   - get
 93 |   - list
 94 |   - watch
 95 | 
 96 | ---
 97 | apiVersion: rbac.authorization.k8s.io/v1
 98 | kind: RoleBinding
 99 | metadata:
100 |   name: ui-read-binding
101 |   namespace: kube-system
102 | roleRef:
103 |   apiGroup: rbac.authorization.k8s.io
104 |   kind: ClusterRole
105 |   name: ui-read
106 | subjects:
107 | - apiGroup: rbac.authorization.k8s.io
108 |   kind: User
109 |   name: readonly
110 | ```
111 | - 2.3 访问 `https://x.x.x.x:6443/api/v1/namespaces/kube-system/services/kubernetes-dashboard/proxy` 使用 admin登陆拥有所有权限，比如删除某个部署；使用 readonly登陆只有查看权限，尝试删除某个部署会提示错误 `forbidden: User \"readonly\" cannot delete services/proxy in the namespace \"kube-system\"`
112 | 
113 | #### 3. 证书访问：最安全的方式，配置较复杂
114 | - 使用集群CA 生成客户端证书，可以根据需要生成权限不同的证书，这里为了演示直接使用 kubectl使用的证书和key(在03.kubectl.yml阶段生成)，该证书拥有所有权限
115 | - 指定格式导出该证书，进入`/etc/kubernetes/ssl`目录，使用命令`openssl pkcs12 -export -in admin.pem -inkey admin-key.pem -out kube-admin.p12` 提示输入证书密码和确认密码，可以用密码再增加一层保护，也可以直接回车跳过，完成后目录下多了 `kube-admin.p12`文件，将它分发给授权的用户
116 | - 用户将 `kube-admin.p12` 双击导入证书即可，`IE` 和`Chrome` 中输入`https://x.x.x.x:6443/api/v1/namespaces/kube-system/services/kubernetes-dashboard/proxy` 或者 `https://x.x.x.x:6443/ui` 即可访问。补充：最新firefox需要在浏览器中单独导入 [选项] - [隐私与安全] - [证书/查看证书] - [您的证书] 页面点击 [导入] 该证书
117 | 
118 | ### 小结
119 | 
120 | + dashboard 版本 1.6.3 访问控制实现较复杂，文档中给出的例子也有助于你理解 RBAC的灵活控制能力，当然最好去[官方文档](https://kubernetes.io/docs/admin/authorization/rbac/)学习一下，这块篇幅不长
121 | + 由于还未部署 Heapster 插件，当前 dashboard 不能展示 Pod、Nodes 的 CPU、内存等 metric 图形，后续部署 heapster后自然能够看到
122 | + 本文中的权限设置仅供演示用，生产环境请在此基础上修改成适合你安全需求的方式
123 | 
124 | [前一篇](kubedns.md) -- [目录](index.md) -- [后一篇](heapster.md)
125 | 


--------------------------------------------------------------------------------
/docs/guide/efk.md:
--------------------------------------------------------------------------------
1 | ## EFK
2 | 


--------------------------------------------------------------------------------
/docs/guide/harbor.md:
--------------------------------------------------------------------------------
  1 | ## harbor
  2 | 
  3 | Habor是由VMWare中国团队开源的容器镜像仓库。事实上，Habor是在Docker Registry上进行了相应的企业级扩展，从而获得了更加广泛的应用，这些新的企业级特性包括：管理用户界面，基于角色的访问控制 ，水平扩展，同步，AD/LDAP集成以及审计日志等。本文档仅说明部署单个基础harbor服务的步骤。
  4 | 
  5 | ### 安装步骤
  6 | 
  7 | 1. 在deploy节点下载最新的 [docker-compose](https://github.com/docker/compose/releases) 二进制文件，改名后把它放到项目 `/etc/ansible/bin`目录下，后续版本会一起打包进百度云盘`k8s.xxx.tar.gz`文件中，可以省略该步骤
  8 | 
  9 | ``` bash
 10 | wget https://github.com/docker/compose/releases/download/1.18.0/docker-compose-Linux-x86_64
 11 | mv docker-compose-Linux-x86_64 /etc/ansible/bin/docker-compose
 12 | ```
 13 | 2. 在deploy节点下载最新的 [harbor](https://github.com/vmware/harbor/releases) 离线安装包，把它放到项目 `/etc/ansible/down` 目录下，也可以从分享的百度云盘下载
 14 | 
 15 | 3. 在deploy节点编辑/etc/ansible/hosts文件，可以参考 `example`目录下的模板，修改部分举例如下
 16 | 
 17 | ``` bash
 18 | # 如果启用harbor，请配置后面harbor相关参数
 19 | [harbor]
 20 | 192.168.1.8 NODE_IP="192.168.1.8"
 21 | 
 22 | #私有仓库 harbor服务器 (域名或者IP)
 23 | HARBOR_IP="192.168.1.8"
 24 | HARBOR_DOMAIN="harbor.test.com"
 25 | ```
 26 | 
 27 | 4. 在deploy节点执行 `cd /etc/ansible && ansible-playbook 11.harbor.yml`，完成harbor安装
 28 | 
 29 | ### 安装讲解
 30 | 
 31 | 根据 `11.harbor.yml`文件，harbor节点需要以下步骤：
 32 | 
 33 | 1. role `prepare` 基础系统环境准备
 34 | 1. role `docker` 安装docker
 35 | 1. role `harbor` 安装harbor
 36 | 
 37 | `kube-node`节点在harbor部署完之后，需要配置harbor的证书，并可以在hosts里面添加harbor的域名解析，如果你的环境中有dns服务器，可以跳过hosts文件设置
 38 | 
 39 | 请在另外窗口打开 [roles/harbor/tasks/main.yml](../../roles/harbor/tasks/main.yml)，对照以下讲解
 40 | 
 41 | 1. 下载docker-compose可执行文件到$PATH目录
 42 | 1. 自注册变量result判断是否已经安装harbor，避免重复安装问题
 43 | 1. 解压harbor离线安装包到指定目录
 44 | 1. 导入harbor所需 docker images
 45 | 1. 创建harbor证书和私钥(复用集群的CA证书)
 46 | 1. 修改harbor.cfg配置文件
 47 | 1. 启动harbor安装脚本
 48 | 
 49 | ### 验证harbor
 50 | 
 51 | 1. 在harbor节点使用`docker ps -a` 查看harbor容器组件运行情况
 52 | 1. 浏览器访问harbor节点的IP地址 `https://{{ NODE_IP }}`，使用账号 admin 和 密码 Harbor12345 (harbor.cfg 配置文件中的默认)登陆系统
 53 | 
 54 | ### 在k8s集群使用harbor
 55 | 
 56 | admin用户web登陆后可以方便的创建项目，并指定项目属性(公开或者私有)；然后创建用户，并在项目`成员`选项中选择用户和权限；
 57 | 
 58 | #### 镜像上传
 59 | 
 60 | 在node上使用harbor私有镜像仓库首先需要在指定目录配置harbor的CA证书，详见 `11.harbor.yml`文件。
 61 | 
 62 | 使用docker客户端登陆`harbor.test.com`，然后把镜像tag成 `harbor.test.com/$项目名/$镜像名:$TAG` 之后，即可使用docker push 上传
 63 | 
 64 | ``` bash
 65 | docker login harbor.test.com
 66 | Username: 
 67 | Password:
 68 | Login Succeeded
 69 | docker tag busybox:latest harbor.test.com/library/busybox:latest
 70 | docker push harbor.test.com/library/busybox:latest
 71 | The push refers to a repository [harbor.test.com/library/busybox]
 72 | 0271b8eebde3: Pushed 
 73 | latest: digest: sha256:91ef6c1c52b166be02645b8efee30d1ee65362024f7da41c404681561734c465 size: 527
 74 | ```
 75 | #### k8s中使用harbor
 76 | 
 77 | 1. 如果镜像保存在harbor中的公开项目中，那么只需要在yaml文件中简单指定harbor私有镜像即可，例如
 78 | 
 79 | ``` bash
 80 | apiVersion: v1
 81 | kind: Pod
 82 | metadata:
 83 |   name: test-busybox
 84 | spec:
 85 |   containers:
 86 |   - name: test-busybox
 87 |     image: harbor.test.com/xxx/busybox:latest
 88 |     imagePullPolicy: Always
 89 | ```
 90 | 
 91 | 2. 如果镜像保存在harbor中的私有项目中，那么yaml文件中使用该私有项目的镜像需要指定`imagePullSecrets`，例如
 92 | 
 93 | ``` bash
 94 | apiVersion: v1
 95 | kind: Pod
 96 | metadata:
 97 |   name: test-busybox
 98 | spec:
 99 |   containers:
100 |   - name: test-busybox
101 |     image: harbor.test.com/xxx/busybox:latest
102 |     imagePullPolicy: Always
103 |   imagePullSecrets:
104 |   - name: harborKey1
105 | ```
106 | 其中 `harborKey1`可以用以下两种方式生成：
107 | 
108 | + 1.使用 `kubectl create secret docker-registry harborkey1 --docker-server=harbor.test.com --docker-username=admin --docker-password=Harbor12345 --docker-email=team@test.com`
109 | + 2.使用yaml配置文件生成 
110 | 
111 | ``` bash
112 | //harborkey1.yaml
113 | apiVersion: v1
114 | kind: Secret
115 | metadata:
116 |   name: harborkey1
117 |   namespace: default
118 | data:
119 |     .dockerconfigjson: {base64 -w 0 ~/.docker/config.json}
120 | type: kubernetes.io/dockerconfigjson
121 | ```
122 | 前面docker login会在~/.docker下面创建一个config.json文件保存鉴权串，这里secret yaml的.dockerconfigjson后面的数据就是那个json文件的base64编码输出（-w 0让base64输出在单行上，避免折行）
123 | 
124 | ### 管理harbor
125 | 
126 | + 日志目录 `/var/log/harbor`
127 | + 数据目录 `/data` ，其中最主要是 `/data/database` 和 `/data/registry` 目录，如果你要彻底重新安装harbor，删除这两个目录即可
128 | 
129 | 先进入harbor安装目录 `cd /root/local/harbor`，常规操作如下：
130 | 
131 | 1. 暂停harbor `docker-compose stop` : docker容器stop，并不删除容器
132 | 2. 恢复harbor `docker-compose start` : 恢复docker容器运行
133 | 3. 停止harbor `docker-compose down -v` : 停止并删除docker容器
134 | 4. 启动harbor `docker-compose up -d` : 启动所有docker容器
135 | 
136 | 修改harbor的运行配置，需要如下步骤：
137 | 
138 | ``` bash
139 | # 停止 harbor
140 |  docker-compose down -v
141 | # 修改配置
142 |  vim harbor.cfg
143 | # 执行./prepare已更新配置到docker-compose.yml文件
144 |  ./prepare
145 | # 启动 harbor
146 |  docker-compose up -d
147 | ```
148 | #### harbor 升级
149 | 
150 | 以下步骤基于harbor 1.1.2 版本升级到 1.2.2版本 
151 | 
152 | ``` bash
153 | # 进入harbor解压缩后的目录，停止harbor
154 | cd /root/local/harbor
155 | docker-compose down
156 | 
157 | # 备份这个目录
158 | cd ..
159 | mkdir -p /backup && mv harbor /backup/harbor
160 | 
161 | # 下载更新的离线安装包，并解压
162 | tar zxvf harbor-offline-installer-v1.2.2.tgz  -C /root/local
163 | 
164 | # 使用官方数据库迁移工具，备份数据库，修改数据库连接用户和密码，创建数据库备份目录
165 | # 迁移工具使用docker镜像，镜像tag由待升级到目标harbor版本决定，这里由 1.1.2升级到1.2.2，所以使用 tag 1.2
166 | docker pull vmware/harbor-db-migrator:1.2
167 | mkdir -p /backup/db-1.1.2
168 | docker run -it --rm -e DB_USR=root -e DB_PWD=xxxx -v /data/database:/var/lib/mysql -v /backup/db-1.1.2:/harbor-migration/backup vmware/harbor-db-migrator:1.2 backup
169 | 
170 | # 因为新老版本数据库结构不一样，需要数据库migration
171 | docker run -it --rm -e DB_USR=root -e DB_PWD=xxxx -v /data/database:/var/lib/mysql vmware/harbor-db-migrator:1.2 up head
172 | 
173 | # 修改新版本 harbor.cfg配置，需要保持与老版本相关配置项保持一致，然后执行安装即可
174 | cd /root/local/harbor
175 | vi harbor.cfg
176 | ./install.sh
177 | 
178 | [前一篇]() -- [目录](index.md) -- [后一篇]()
179 | 


--------------------------------------------------------------------------------
/docs/guide/heapster.md:
--------------------------------------------------------------------------------
 1 | ## heapster
 2 | 
 3 | `Heapster` 监控整个集群资源的过程：首先kubelet内置的cAdvisor收集本node节点的容器资源占用情况，然后heapster从kubelet提供的api采集节点和容器的资源占用，最后heapster 持久化数据存储到`influxdb`中（也可以是其他的存储后端,Google Cloud Monitoring等）。
 4 | 
 5 | `Grafana` 则通过配置数据源指向上述 `influxdb`，从而界面化显示监控信息。
 6 | 
 7 | ### 部署
 8 | 
 9 | 访问 [heapster release](https://github.com/kubernetes/heapster)页面下载最新 release 1.4.3，参考目录`heapster-1.3.0/deploy/kube-config/influxdb`，因为这个官方release 在k8s1.8.4使用还是有不少问题，请在参考的基础上使用本项目提供的yaml文件
10 | 
11 | 1. [grafana](../../manifests/heapster/grafana.yaml)
12 | 1. [heapster](../../manifests/heapster/heapster.yaml)
13 | 1. [influxdb](../../manifests/heapster/influxdb.yaml)
14 | 
15 | 安装比较简单 `kubectl create -f /etc/ansible/manifests/heapster/`，主要讲一下注意事项
16 | 
17 | #### grafana.yaml配置
18 | 
19 | + 修改`heapster-grafana-amd64`镜像，v4.2.0版本修改成 v4.4.3版本，否则 grafana pod无法起来，报`CrashLoopBackOff`错误，详见[ISSUE](https://github.com/kubernetes/heapster/issues/1806)
20 | + 参数`- name: GF_SERVER_ROOT_URL`的设置要根据后续访问grafana的方式确定，如果使用 NodePort方式访问，必须设置成:`value: /`；如果使用apiserver proxy方式，必须设置成`value: /api/v1/namespaces/kube-system/services/monitoring-grafana/proxy/`，注意官方文件中预设的`value: /api/v1/proxy/namespaces/kube-system/services/monitoring-grafana/`已经不适合k8s 1.8.0版本了，
21 | + `kubernetes.io/cluster-service: 'true'` 和 `type: NodePort` 根据上述的访问方式设置，建议使用apiserver 方式，可以增加安全控制
22 | 
23 | #### heapster.yaml配置
24 | 
25 | + 需要配置 RBAC 把 ServiceAccount `heapster` 与集群预定义的集群角色 `system:heapster` 绑定，这样heapster pod才有相应权限去访问 apiserver
26 | 
27 | #### influxdb.yaml配置
28 | 
29 | + influxdb 官方建议使用命令行或 HTTP API 接口来查询数据库，从 v1.1.0 版本开始默认关闭 admin UI，这里参考[opsnull](https://github.com/opsnull/follow-me-install-kubernetes-cluster/blob/master/10-%E9%83%A8%E7%BD%B2Heapster%E6%8F%92%E4%BB%B6.md)给出的方法，增加ConfigMap配置，然后挂载到容器中，覆盖默认配置
30 | + 注意influxdb 这个版本只能使用 NodePort方式访问它的admin UI，才能正确连接数据库
31 | 
32 | ### 验证
33 | 
34 | ``` bash
35 | $ kubectl get pods -n kube-system | grep -E 'heapster|monitoring'
36 | heapster-3273315324-tmxbg               1/1       Running   0          11m
37 | monitoring-grafana-2255110352-94lpn     1/1       Running   0          11m
38 | monitoring-influxdb-884893134-3vb6n     1/1       Running   0          11m
39 | ```
40 | 扩展检查Pods日志：
41 | ``` bash
42 | $ kubectl logs heapster-3273315324-tmxbg -n kube-system
43 | $ kubectl logs monitoring-grafana-2255110352-94lpn -n kube-system
44 | $ kubectl logs monitoring-influxdb-884893134-3vb6n -n kube-system
45 | ```
46 | 部署完heapster，使用上一步介绍方法查看kubernets dashboard 界面，就可以看到各 Nodes、Pods 的 CPU、内存、负载等利用率曲线图，如果 dashboard上还无法看到利用率图，使用以下命令重启 dashboard pod：
47 | + 首先删除 `kubectl scale deploy kubernetes-dashboard --replicas=0 -n kube-system`
48 | + 然后新建 `kubectl scale deploy kubernetes-dashboard --replicas=1 -n kube-system`
49 | 
50 | ### 访问 grafana
51 | 
52 | #### 1.通过apiserver 访问（建议的方式）
53 | 
54 | ``` bash
55 | kubectl cluster-info | grep grafana
56 | monitoring-grafana is running at https://x.x.x.x:6443/api/v1/namespaces/kube-system/services/monitoring-grafana/proxy
57 | ```
58 | 请参考上一步 [访问dashboard](dashboard.md)同样的方式，使用证书或者密码认证，访问`https://x.x.x.x:6443/api/v1/namespaces/kube-system/services/monitoring-grafana/proxy`即可，如图可以点击[Home]选择查看 `Cluster` `Pods`的监控图形
59 | 
60 | ![grafana](../../pics/grafana.png)
61 | 
62 | #### 2.通过NodePort 访问
63 | 
64 | + 修改 `Service` 允许 type: NodePort
65 | + 修改 `Deployment`中参数`- name: GF_SERVER_ROOT_URL`为 `value: /`
66 | + 如果之前grafana已经运行，使用 `kubectl replace --force -f /etc/ansible/manifests/heapster/grafana.yaml` 重启 grafana插件
67 | 
68 | ``` bash
69 | kubectl get svc -n kube-system|grep grafana
70 | monitoring-grafana        NodePort    10.68.135.50    <none>        80:5855/TCP		11m
71 | ```
72 | 然后用浏览器访问 http://NodeIP:5855 
73 | 
74 | ### 访问 influxdb
75 | 
76 | 官方建议使用命令行或 HTTP API 接口来查询`influxdb`数据库，如非必要就跳过此步骤
77 | 
78 | 目前根据测试 k8s v1.8.4 使用 NodePort 方式访问 admin 界面后才能正常连接数据库
79 | 
80 | ``` bash
81 | kubectl get svc -n kube-system|grep influxdb
82 | monitoring-influxdb    NodePort    10.68.195.193   <none>        8086:3382/TCP,8083:7651/TCP   12h
83 | ```
84 | + 如上例子，8083是管理页面端口，对外暴露的端口为7651
85 | + 8086 是数据连接端口，对外暴露的端口为3382
86 | 
87 | 使用浏览器访问 http://NodeIP:7651，如图在页面的 “Connection Settings” 的 Host 中输入 node IP， Port 中输入 3382(由8086对外暴露的端口)，点击 “Save” 即可
88 | 
89 | ![influxdb](../../pics/influxdb.png)
90 | 
91 | 
92 | [前一篇](dashboard.md) -- [目录](index.md) -- [后一篇](ingress.md)
93 | 


--------------------------------------------------------------------------------
/docs/guide/hpa.md:
--------------------------------------------------------------------------------
 1 | ## Horizontal Pod Autoscaling
 2 | 
 3 | 自动水平伸缩，是指运行在k8s上的应用负载(POD)，可以根据资源使用率进行自动扩容、缩容；我们知道应用的资源使用率通常都有高峰和低谷，所以k8s的`HPA`特性应运而生；它也是最能体现区别于传统运维的优势之一，不仅能够弹性伸缩，而且完全自动化！
 4 | 
 5 | 根据 CPU 使用率或自定义 metrics 自动扩展 Pod 数量（支持 replication controller、deployment）；k8s1.6版本之前是通过kubelet来获取监控指标，1.6版本之后是通过api server、heapster或者kube-aggregator来获取监控指标。
 6 | 
 7 | ### Metrics支持
 8 | 
 9 | 根据不同版本的API中，HPA autoscale时靠以下指标来判断资源使用率：
10 | - autoscaling/v1: CPU
11 | - autoscaling/v2alpha1
12 |   - 内存
13 |   - 自定义metrics
14 |   - 多metrics组合: 根据每个metric的值计算出scale的值，并将最大的那个指作为扩容的最终结果
15 | 
16 | ### 基础示例
17 | 
18 | 本实验环境基于k8s 1.8 和 1.9，仅使用`autoscaling/v1` 版本API
19 | 
20 | ``` bash
21 | # 创建deploy和service
22 | $ kubectl run php-apache --image=pilchard/hpa-example --requests=cpu=200m --expose --port=80
23 | 
24 | # 创建autoscaler
25 | $ kubectl autoscale deploy php-apache --cpu-percent=50 --min=1 --max=10
26 | 
27 | # 稍等查看hpa状态
28 | $ kubectl get hpa php-apache
29 | NAME         REFERENCE               TARGETS    MINPODS   MAXPODS   REPLICAS   AGE
30 | php-apache   Deployment/php-apache   0% / 50%   1         10        1          1d
31 | 
32 | # 增加负载
33 | $ kubectl run --rm -it load-generator --image=busybox /bin/sh
34 | Hit enter for command prompt
35 | $ while true; do wget -q -O- http://php-apache; done;
36 | 
37 | # 稍等查看hpa显示负载增加，且副本数目增加为4
38 | $ kubectl get hpa php-apache
39 | NAME         REFERENCE               TARGETS      MINPODS   MAXPODS   REPLICAS   AGE
40 | php-apache   Deployment/php-apache   430% / 50%   1         10        4          4m
41 | 
42 | # 注意k8s为了避免频繁增删pod，对副本的增加速度有限制
43 | # 实验过程可以看到副本数目从1到4到8到10，大概都需要4~5分钟的缓冲期
44 | $ kubectl get hpa php-apache
45 | NAME         REFERENCE               TARGETS     MINPODS   MAXPODS   REPLICAS   AGE
46 | php-apache   Deployment/php-apache   86% / 50%   1         10        8          9m
47 | $ kubectl get hpa php-apache
48 | NAME         REFERENCE               TARGETS     MINPODS   MAXPODS   REPLICAS   AGE
49 | php-apache   Deployment/php-apache   52% / 50%   1         10        10         12m
50 | 
51 | # 清除负载，CTRL+C 结束上述循环程序，稍后副本数目变回1
52 | $ kubectl get hpa php-apache
53 | NAME         REFERENCE               TARGETS    MINPODS   MAXPODS   REPLICAS   AGE
54 | php-apache   Deployment/php-apache   0% / 50%   1         10        1          17m
55 | ```
56 | 
57 | 


--------------------------------------------------------------------------------
/docs/guide/index.md:
--------------------------------------------------------------------------------
 1 | ## 使用指南
 2 | 
 3 | ### 附加组件安装
 4 | 
 5 | - 安装 [kubedns](kubedns.md)
 6 | - 安装 [dashboard](dashboard.md)
 7 | - 安装 [heapster](heapster.md)
 8 | - 安装 [ingress](ingress.md)
 9 | - 安装 efk
10 | - 安装 [harbor](harbor.md)
11 | 
12 | ### 基础特性演示
13 | 
14 | - 自动水平伸缩-基础 [Horizontal Pod Autoscaling](hpa.md)
15 | - 网络安全策略 [Network Policy](networkpolicy.md)
16 | 
17 | ### 集群维护指南
18 | 
19 | - 集群状态检查
20 | - 集群扩容
21 |   - node 节点扩容
22 |   - master 节点扩容
23 |   - etcd 集群扩容
24 | - 清理集群
25 | 
26 | ### 应用实践
27 | 
28 | - 官方入门教程
29 | - Django 应用部署
30 | - Java tomcat 应用部署
31 | - NFS StorageClass 动态存储卷实践
32 | 
33 | ### 其他
34 | 
35 | 


--------------------------------------------------------------------------------
/docs/guide/ingress.md:
--------------------------------------------------------------------------------
  1 | ## Ingress简介
  2 | 
  3 | ingress就是从kubernetes集群外访问集群的入口，将用户的URL请求转发到不同的service上。ingress相当于nginx反向代理服务器，它包括的规则定义就是URL的路由信息；它的实现需要部署`Ingress controller`(比如 [traefik](https://github.com/containous/traefik) [ingress-nginx](https://github.com/kubernetes/ingress-nginx) 等)，`Ingress controller`通过apiserver监听ingress和service的变化，并根据规则配置负载均衡并提供访问入口，达到服务发现的作用。
  4 | 
  5 | + 未配置ingress：
  6 | 
  7 | 集群外部 -> NodePort -> K8S Service
  8 | 
  9 | + 配置ingress:
 10 | 
 11 | 集群外部 -> Ingress -> K8S Service
 12 | 
 13 | + 注意：ingress 本身也需要部署`Ingress controller`时暴露`NodePort`让外部访问
 14 | 
 15 | ### 部署 Traefik
 16 | 
 17 | Traefik 提供了一个简单好用 `Ingress controller`，下文基于它讲解一个简单的 ingress部署和测试例子。请查看yaml配置 [traefik-ingress.yaml](../../manifests/ingress/traefik-ingress.yaml)，参考[traefik 官方k8s例子](https://github.com/containous/traefik/tree/master/examples/k8s)
 18 | 
 19 | #### 安装 traefik ingress-controller
 20 | 
 21 | ``` bash
 22 | kubectl create -f /etc/ansible/manifests/ingress/traefik-ingress.yaml
 23 | ```
 24 | + 注意需要配置 `RBAC`授权
 25 | + 注意trafik `Service`中 `80`端口为 traefik ingress-controller的服务端口，`8080`端口为 traefik 的管理WEB界面；为后续配置方便指定`80` 端口暴露`NodePort`端口为 `23456`(对应于在hosts配置中`NODE_PORT_RANGE`范围内可用端口)
 26 | 
 27 | #### 验证 traefik ingress-controller
 28 | 
 29 | ``` bash
 30 | # kubectl get deploy -n kube-system traefik-ingress-controller
 31 | NAME                         DESIRED   CURRENT   UP-TO-DATE   AVAILABLE   AGE
 32 | traefik-ingress-controller   1         1         1            1           4m
 33 | 
 34 | # kubectl get svc -n kube-system traefik-ingress-service
 35 | NAME                      TYPE       CLUSTER-IP     EXTERNAL-IP   PORT(S)                       AGE
 36 | traefik-ingress-service   NodePort   10.68.69.170   <none>        80:23456/TCP,8080:34815/TCP   4m
 37 | ```
 38 | + 可以看到`traefik-ingress-service` 服务端口`80`暴露的nodePort确实为`23456`
 39 | 
 40 | #### 测试 ingress
 41 | 
 42 | + 首先创建测试用K8S应用，并且该应用服务不用nodePort暴露，而是用ingress方式让外部访问
 43 | 
 44 | ``` bash
 45 | kubectl run test-hello --image=nginx --expose --port=80
 46 | ##
 47 | # kubectl get deploy test-hello
 48 | NAME         DESIRED   CURRENT   UP-TO-DATE   AVAILABLE   AGE
 49 | test-hello   1         1         1            1           56s
 50 | # kubectl get svc test-hello
 51 | NAME         TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)   AGE
 52 | test-hello   ClusterIP   10.68.124.115   <none>        80/TCP    1m
 53 | ```
 54 | + 然后为这个应用创建 ingress，`kubectl create -f /etc/ansible/manifests/ingress/test-hello.ing.yaml`
 55 | 
 56 | ``` bash
 57 | # test-hello.ing.yaml内容
 58 | apiVersion: extensions/v1beta1
 59 | kind: Ingress
 60 | metadata:
 61 |   name: test-hello
 62 | spec:
 63 |   rules:
 64 |   - host: hello.test.com
 65 |     http:
 66 |       paths:
 67 |       - path: /
 68 |         backend:
 69 |           serviceName: test-hello
 70 |           servicePort: 80
 71 | ```
 72 | + 集群内部尝试访问: `curl -H Host:hello.test.com 10.68.124.115` 能够看到欢迎页面 `Welcome to nginx!`；在集群外部尝试访问(假定集群一个NodeIP为 192.168.1.1): `curl -H Host:hello.test.com 192.168.1.1:23456`，也能够看到欢迎页面 `Welcome to nginx!`，说明ingress测试成功
 73 | 
 74 | + 最后我们可以为traefik WEB管理页面也创建一个ingress, `kubectl create -f /etc/ansible/manifests/ingress/traefik-ui.ing.yaml`
 75 | 
 76 | ``` bash
 77 | # traefik-ui.ing.yaml内容
 78 | ---
 79 | apiVersion: extensions/v1beta1
 80 | kind: Ingress
 81 | metadata:
 82 |   name: traefik-web-ui
 83 |   namespace: kube-system
 84 | spec:
 85 |   rules:
 86 |   - host: traefik-ui.test.com
 87 |     http:
 88 |       paths:
 89 |       - path: /
 90 |         backend:
 91 |           serviceName: traefik-ingress-service
 92 |           servicePort: 8080
 93 | ```
 94 | 这样在集群外部可以使用 `curl -H Host:traefik-ui.test.com 192.168.1.1:23456` 尝试访问WEB管理页面，返回 `<a href="/dashboard/">Found</a>.`说明 traefik-ui的ingress配置生效了。
 95 | 
 96 | ### [可选] 部署`ingress-service`的代理
 97 | 
 98 | 在客户端主机上可以通过修改本机 `hosts` 文件，如上例子，增加两条记录：
 99 | 
100 | ``` text
101 | 192.168.1.1	hello.test.com
102 | 192.168.1.1	traefik-ui.test.com
103 | ```
104 | 打开浏览器输入域名 `http://hello.test.com:23456` 和 `http://traefik-ui.test.com:23456` 就可以访问k8s的应用服务了。
105 | 
106 | 当然如果你的环境中有类似 nginx/haproxy 等代理，可以做代理转发以去掉 `23456`这个端口，这里以 haproxy演示下。
107 | 
108 | 如果你的集群根据本项目部署了高可用方案，那么可以利用`LB` 节点haproxy 来做，当然如果生产环境K8S应用已经部署非常多，建议还是使用独立的 `nginx/haproxy`集群
109 | 
110 | 在 LB 主备节点，修改 `/etc/haproxy/haproxy.cfg`类似如下：
111 | 
112 | ``` bash
113 | global
114 |         log /dev/log    local0
115 |         log /dev/log    local1 notice
116 |         chroot /var/lib/haproxy
117 |         stats socket /run/haproxy/admin.sock mode 660 level admin
118 |         stats timeout 30s
119 |         user haproxy
120 |         group haproxy
121 |         daemon
122 |         nbproc 1
123 | 
124 | defaults
125 |         log     global
126 |         timeout connect 5000
127 |         timeout client  50000
128 |         timeout server  50000
129 | 
130 | listen kube-master
131 |         bind 0.0.0.0:8443
132 |         mode tcp
133 |         option tcplog
134 |         balance source
135 |         # 根据实际kube-master 节点数量增减如下endpoints
136 |         server s1 192.168.1.1:6443  check inter 10000 fall 2 rise 2 weight 1
137 |         server s2 192.168.1.2:6443  check inter 10000 fall 2 rise 2 weight 1
138 | 
139 | listen kube-node
140 | 	# 先确认 LB节点80端口可用
141 |         bind 0.0.0.0:80		
142 |         mode tcp
143 |         option tcplog
144 |         balance source
145 |         # 根据实际kube-node 节点数量增减如下endpoints
146 |         server s1 192.168.1.1:23456  check inter 10000 fall 2 rise 2 weight 1
147 |         server s2 192.168.1.2:23456  check inter 10000 fall 2 rise 2 weight 1
148 |         server s3 192.168.1.3:23456  check inter 10000 fall 2 rise 2 weight 1
149 | ```
150 | 修改保存后，重启haproxy服务；
151 | 
152 | 这样我们就可以访问集群`master-VIP`的`80`端口，由haproxy代理转发到实际的node节点和nodePort端口上了。这时可以修改客户端本机 `hosts`文件如下：(假定 master-VIP=192.168.1.10)
153 | 
154 | ``` text
155 | 192.168.1.10     hello.test.com
156 | 192.168.1.10    traefik-ui.test.com
157 | ```
158 | 打开浏览器输入域名 `http://hello.test.com` 和 `http://traefik-ui.test.com`可以正常访问。
159 | 
160 | 
161 | [前一篇](heapster.md) -- [目录](index.md) -- [后一篇](efk.md)
162 | 


--------------------------------------------------------------------------------
/docs/guide/kubedns.md:
--------------------------------------------------------------------------------
 1 | ## 部署 kubedns
 2 | 
 3 | kubedns 是 k8s 集群首先需要部署的，集群中的其他 pods 使用它提供域名解析服务；主要可以解析 `集群服务名` 和 `Pod hostname`；
 4 | 
 5 | 配置文件参考 `https://github.com/kubernetes/kubernetes` 项目目录 `kubernetes/cluster/addons/dns` 
 6 | 
 7 | ### 安装
 8 | 
 9 | **kubectl create -f /etc/ansible/manifests/kubedns/[kubedns.yaml](../../manifests/kubedns/kubedns.yaml)**
10 | 
11 | + 注意deploy中使用的 serviceAccount `kube-dns`，该预定义的 ClusterRoleBinding system:kube-dns 将 kube-system 命名空间的 kube-dns ServiceAccount 与 system:kube-dns ClusterRole 绑定， 因此POD 具有访问 kube-apiserver DNS 相关 API 的权限；
12 | + 集群 pod默认继承 node的dns 解析，修改 kubelet服务启动参数 --resolv-conf=""，可以更改这个特性，详见 kubelet 启动参数
13 | 
14 | ### 验证 kubedns
15 | 
16 | 新建一个测试nginx服务
17 | 
18 | `kubectl run nginx --image=nginx --expose --port=80`
19 | 
20 | 确认nginx服务
21 | 
22 | ``` bash
23 | kubectl get pod|grep nginx
24 | nginx-7cbc4b4d9c-fl46v   1/1       Running   0          1m
25 | kubectl get svc|grep nginx
26 | nginx        ClusterIP   10.68.33.167   <none>        80/TCP    1m
27 | ```
28 | 
29 | 测试pod busybox
30 | 
31 | ``` bash
32 | kubectl run busybox --rm -it --image=busybox /bin/sh
33 | If you don't see a command prompt, try pressing enter.
34 | / # cat /etc/resolv.conf
35 | nameserver 10.68.0.2
36 | search default.svc.cluster.local. svc.cluster.local. cluster.local.
37 | options ndots:5
38 | # 测试集群内部服务解析
39 | / # nslookup nginx
40 | Server:    10.68.0.2
41 | Address 1: 10.68.0.2 kube-dns.kube-system.svc.cluster.local
42 | 
43 | Name:      nginx
44 | Address 1: 10.68.33.167 nginx.default.svc.cluster.local
45 | / # nslookup kubernetes
46 | Server:    10.68.0.2
47 | Address 1: 10.68.0.2 kube-dns.kube-system.svc.cluster.local
48 | 
49 | Name:      kubernetes
50 | Address 1: 10.68.0.1 kubernetes.default.svc.cluster.local
51 | # 测试外部域名的解析，默认集成node的dns解析
52 | / # nslookup www.baidu.com
53 | Server:    10.68.0.2
54 | Address 1: 10.68.0.2 kube-dns.kube-system.svc.cluster.local
55 | 
56 | Name:      www.baidu.com
57 | Address 1: 180.97.33.108
58 | Address 2: 180.97.33.107
59 | / #
60 | ```
61 | 
62 | [前一篇](index.md) -- [目录](index.md) -- [后一篇](dashboard.md)
63 | 


--------------------------------------------------------------------------------
/docs/guide/networkpolicy.md:
--------------------------------------------------------------------------------
1 | ## Network Policy
2 | 


--------------------------------------------------------------------------------
/docs/quickStart.md:
--------------------------------------------------------------------------------
  1 | ## 快速指南
  2 | 
  3 | 以下为基于Ubuntu 16.04/CentOS 7.4 快速体验k8s集群的测试、开发环境--AllinOne部署，觉得比官方的minikube方便、简单很多。
  4 | 
  5 | ### 1.基础系统配置
  6 | 
  7 | + 推荐内存2G/硬盘20G以上
  8 | + 最小化安装`Ubuntu 16.04 server`或者`CentOS 7 Minimal`
  9 | + 配置基础网络、更新源、SSH登陆等
 10 | 
 11 | ### 2.安装依赖工具
 12 | 
 13 | Ubuntu 16.04 请执行以下脚本:
 14 | 
 15 | ``` bash
 16 | # 文档中脚本默认均以root用户执行
 17 | apt-get update && apt-get upgrade -y && apt-get dist-upgrade -y
 18 | # 删除不要的默认安装
 19 | apt-get purge ufw lxd lxd-client lxcfs lxc-common
 20 | # 安装依赖工具
 21 | apt-get install python2.7 git python-pip
 22 | # Ubuntu16.04可能需要配置以下软连接
 23 | ln -s /usr/bin/python2.7 /usr/bin/python
 24 | ```
 25 | CentOS 7 请执行以下脚本：
 26 | 
 27 | ``` bash
 28 | # 文档中脚本默认均以root用户执行
 29 | # 安装 epel 源并更新
 30 | yum install epel-release -y
 31 | yum update
 32 | # 删除不要的默认安装
 33 | yum erase firewalld firewalld-filesystem python-firewall -y
 34 | # 安装依赖工具
 35 | yum install git python python-pip -y
 36 | ```
 37 | ### 3.ansible安装及准备
 38 | 
 39 | ``` bash
 40 | # 安装ansible (国内如果安装太慢可以直接用pip阿里云加速)
 41 | #pip install pip --upgrade
 42 | #pip install ansible
 43 | pip install pip --upgrade -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
 44 | pip install --no-cache-dir ansible -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
 45 | # 配置ansible ssh密钥登陆
 46 | ssh-keygen -t rsa -b 2048 回车 回车 回车
 47 | ssh-copy-id $IP #$IP为本虚机地址，按照提示输入yes 和root密码
 48 | ```
 49 | ### 4.安装kubernetes集群
 50 | ``` bash
 51 | git clone https://github.com/gjmzj/kubeasz.git
 52 | mv kubeasz /etc/ansible
 53 | # 下载已打包好的binaries，并且解压缩到/etc/ansible/bin目录
 54 | # 国内请从分享的百度云链接下载 https://pan.baidu.com/s/1c4RFaA
 55 | # 如果你有合适网络环境也可以按照/down/download.sh自行从官网下载各种tar包到 ./down目录，并执行download.sh
 56 | tar zxvf k8s.191.tar.gz
 57 | mv bin/* /etc/ansible/bin
 58 | # 配置ansible的hosts文件
 59 | cd /etc/ansible
 60 | cp example/hosts.allinone.example hosts
 61 | 然后根据实际情况修改此hosts文件，所有节点都是本虚机IP
 62 | # 采用一步安装或者分步安装
 63 | ansible-playbook 90.setup.yml # 一步安装
 64 | #ansible-playbook 01.prepare.yml
 65 | #ansible-playbook 02.etcd.yml
 66 | #ansible-playbook 03.kubectl.yml
 67 | #ansible-playbook 04.docker.yml
 68 | #ansible-playbook 05.kube-master.yml
 69 | #ansible-playbook 06.kube-node.yml
 70 | # 网络只可选择calico flannel一种安装
 71 | #ansible-playbook 07.calico.yml 
 72 | #ansible-playbook 07.flannel.yml
 73 | ```
 74 | 如果执行成功，k8s集群就安装好了。详细分步讲解请查看项目目录 `/docs` 下相关文档
 75 | 
 76 | ### 5.验证安装
 77 | ``` bash
 78 | # 如果提示kubectl: command not found，退出重新ssh登陆一下，环境变量生效即可
 79 | kubectl version
 80 | kubectl get componentstatus # 可以看到scheduler/controller-manager/etcd等组件 Healthy
 81 | kubectl cluster-info # 可以看到kubernetes master(apiserver)组件 running
 82 | kubectl get node # 可以看到单 node Ready状态
 83 | kubectl get pod --all-namespaces # 可以查看所有集群pod状态
 84 | kubectl get svc --all-namespaces # 可以查看所有集群服务状态
 85 | ```
 86 | ### 6.安装主要组件
 87 | ``` bash
 88 | # 安装kubedns
 89 | kubectl create -f /etc/ansible/manifests/kubedns
 90 | # 安装heapster
 91 | kubectl create -f /etc/ansible/manifests/heapster
 92 | # 安装dashboard
 93 | kubectl create -f /etc/ansible/manifests/dashboard
 94 | ```
 95 | + 更新后`dashboard`已经默认关闭非安全端口访问，请使用`https://xx.xx.xx.xx:6443/api/v1/namespaces/kube-system/services/kubernetes-dashboard/proxy`访问，并用默认用户 `admin:test1234` 登陆，更多内容请查阅[dashboard文档](guide/dashboard.md)
 96 | 
 97 | ### 7.清理集群
 98 | 
 99 | 以上步骤创建的K8S开发测试环境请尽情折腾，碰到错误尽量通过查看日志、上网搜索、提交`issues`等方式解决；当然如果是彻底奔溃了，可以清理集群后重新创建。
100 | 
101 | 一步清理：`ansible-playbook 99.clean.yml`
102 | 


--------------------------------------------------------------------------------
/docs/upgrade.md:
--------------------------------------------------------------------------------
 1 | ## 升级注意事项
 2 | 
 3 | ### v1.8 >>> v1.9
 4 | 
 5 | + 1.下载最新项目代码 `cd /etc/ansible && git pull origin master`
 6 | + 2.下载新的二进制 `k8s.190.tar.gz` 解压并覆盖 `/etc/ansible/bin/` 目录下文件
 7 | + 3.更新集群 `cd /etc/ansible && ansible-playbook 90.setup.yml`
 8 | + 4.[可选]升级`calico-kube-controllers`相关，在任一node节点执行如下
 9 | 
10 | ``` bash
11 | cd /root/kube-system/calico
12 | kubectl delete deploy calico-kube-controllers -n kube-system
13 | kubectl create -f calico-kube-controllers.yaml
14 | ```
15 | 
16 | 注1：升级过程会短暂中断集群中已经运行的应用；如果你想要零中断升级，可以在熟悉项目安装原理基础上自行尝试，或者关注后续项目[使用指南]中的文档更新
17 | 
18 | 注2：k8s集群v1.8升级v1.9.0，目前测试不用修改任何服务参数，只要替换二进制文件；
19 | 


--------------------------------------------------------------------------------
/down/download.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #主要组件版本如下
  3 | export K8S_VER=v1.9.1
  4 | export ETCD_VER=v3.2.13
  5 | export DOCKER_VER=17.12.0-ce
  6 | export CNI_VER=v0.6.0
  7 | export DOCKER_COMPOSE=1.18.0
  8 | export HARBOR=v1.2.2
  9 | 
 10 | echo "\n建议直接下载本人打包好的所有必要二进制包k8s-***.all.tar.gz，然后解压到bin目录"
 11 | echo "\n建议不使用此脚本，如果你想升级组件或者实验，请通读该脚本，必要时适当修改后使用"
 12 | echo "\n注意1：请按照以下链接手动下载二进制包到down目录中"
 13 | echo "\n注意2：如果还没有手工下载tar包，请Ctrl-c结束此脚本"
 14 | 
 15 | echo "\n----download k8s binary at:"
 16 | echo https://dl.k8s.io/${K8S_VER}/kubernetes-server-linux-amd64.tar.gz
 17 | 
 18 | echo "\n----download etcd binary at:"
 19 | echo https://github.com/coreos/etcd/releases/download/${ETCD_VER}/etcd-${ETCD_VER}-linux-amd64.tar.gz
 20 | echo https://storage.googleapis.com/etcd/${ETCD_VER}/etcd-${ETCD_VER}-linux-amd64.tar.gz
 21 | 
 22 | echo "\n----download docker binary at:"
 23 | echo https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_VER}.tgz
 24 | 
 25 | echo "\n----download ca tools at:"
 26 | echo https://pkg.cfssl.org/R1.2/cfssl_linux-amd64
 27 | echo https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64
 28 | echo https://pkg.cfssl.org/R1.2/cfssl-certinfo_linux-amd64
 29 | 
 30 | echo "\n----download docker-compose at:"
 31 | echo https://github.com/docker/compose/releases/download/${DOCKER_COMPOSE}/docker-compose-Linux-x86_64
 32 | 
 33 | echo "\n----download harbor-offline-installer at:"
 34 | echo https://github.com/vmware/harbor/releases/download/${HARBOR}/harbor-offline-installer-${HARBOR}.tgz
 35 | 
 36 | echo "\n----download cni plugins at:"
 37 | echo https://github.com/containernetworking/plugins/releases/download/${CNI_VER}/cni-${CNI_VER}.tgz
 38 | 
 39 | sleep 30
 40 | 
 41 | ### 准备证书工具程序
 42 | echo "\n准备证书工具程序..."
 43 | if [ -f "cfssl_linux-amd64" ]; then
 44 |   mv cfssl_linux-amd64 ../bin/cfssl
 45 | else
 46 |   echo 请先下载https://pkg.cfssl.org/R1.2/cfssl_linux-amd64
 47 | fi
 48 | if [ -f "cfssljson_linux-amd64" ]; then
 49 |   mv cfssljson_linux-amd64 ../bin/cfssljson
 50 | else
 51 |   echo 请先下载https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64
 52 | fi
 53 | if [ -f "cfssl-certinfo_linux-amd64" ]; then
 54 |   mv cfssl-certinfo_linux-amd64 ../bin/cfssl-certinfo
 55 | else
 56 |   echo 请先下载https://pkg.cfssl.org/R1.2/cfssl-certinfo_linux-amd64
 57 | fi
 58 | 
 59 | ### 准备etcd程序
 60 | echo "\n准备etcd二进制程序..."
 61 | if [ -f "etcd-${ETCD_VER}-linux-amd64.tar.gz" ]; then
 62 |   echo "\nextracting etcd binaries..."
 63 |   tar zxf etcd-${ETCD_VER}-linux-amd64.tar.gz
 64 |   mv etcd-${ETCD_VER}-linux-amd64/etcd* ../bin
 65 | else
 66 |   echo 请先下载etcd-${ETCD_VER}-linux-amd64.tar.gz
 67 | fi
 68 | 
 69 | ### 准备kubernetes程序
 70 | echo "\n准备kubernetes二进制程序..."
 71 | if [ -f "kubernetes-server-linux-amd64.tar.gz" ]; then
 72 |   echo "\nextracting kubernetes binaries..."
 73 |   tar zxf kubernetes-server-linux-amd64.tar.gz
 74 |   mv kubernetes/server/bin/kube-apiserver ../bin
 75 |   mv kubernetes/server/bin/kube-controller-manager ../bin
 76 |   mv kubernetes/server/bin/kubectl ../bin
 77 |   mv kubernetes/server/bin/kubelet ../bin
 78 |   mv kubernetes/server/bin/kube-proxy ../bin
 79 |   mv kubernetes/server/bin/kube-scheduler ../bin
 80 | else
 81 |   echo 请先下载kubernetes-server-linux-amd64.tar.gz
 82 | fi
 83 | 
 84 | ### 准备docker程序
 85 | echo "\n准备docker二进制程序..."
 86 | if [ -f "docker-${DOCKER_VER}.tgz" ]; then
 87 |   echo "\nextracting docker binaries..."
 88 |   tar zxf docker-${DOCKER_VER}.tgz
 89 |   mv docker/docker* ../bin
 90 |   if [ -f "docker/completion/bash/docker" ]; then
 91 |     mv -f docker/completion/bash/docker ../roles/docker/files/docker
 92 |   fi
 93 | else
 94 |   echo 请先下载docker-${DOCKER_VER}.tgz
 95 | fi
 96 | 
 97 | ### 准备cni plugins，仅安装flannel需要，安装calico由容器专门下载cni plugins 
 98 | echo "\n准备cni plugins，仅安装flannel需要，安装calico由容器专门下载cni plugins..."
 99 | if [ -f "cni-${CNI_VER}.tgz" ]; then
100 |   echo "\nextracting cni plugins binaries..."
101 |   tar zxf cni-${CNI_VER}.tgz
102 |   mv bridge ../bin
103 |   mv flannel ../bin
104 |   mv host-local ../bin
105 |   mv loopback ../bin
106 |   mv portmap ../bin
107 | else
108 |   echo 请先下载cni-${CNI_VER}.tgz
109 | fi
110 | 


--------------------------------------------------------------------------------
/example/hosts.allinone.example:
--------------------------------------------------------------------------------
 1 | # 部署节点：运行ansible 脚本的节点
 2 | [deploy]
 3 | 192.168.1.1
 4 | 
 5 | # etcd集群请提供如下NODE_NAME、NODE_IP变量
 6 | # 请注意etcd集群必须是1,3,5,7...奇数个节点
 7 | [etcd]
 8 | 192.168.1.1 NODE_NAME=etcd1 NODE_IP="192.168.1.1"
 9 | 
10 | [kube-master]
11 | 192.168.1.1 NODE_IP="192.168.1.1"
12 | 
13 | #确保node节点有变量NODE_ID=node1
14 | [kube-node]
15 | 192.168.1.1 NODE_ID=node1 NODE_IP="192.168.1.1"
16 | 
17 | [kube-cluster:children]
18 | kube-node
19 | kube-master
20 | 
21 | # 如果启用harbor，请配置后面harbor相关参数
22 | [harbor]
23 | #192.168.1.8 NODE_IP="192.168.1.8"
24 | 
25 | # 预留组，后续添加node节点使用
26 | [new-node]
27 | #192.168.1.xx NODE_ID=node6 NODE_IP="192.168.1.xx"
28 | 
29 | [all:vars]
30 | # ---------集群主要参数---------------
31 | #集群 MASTER IP
32 | MASTER_IP="192.168.1.1"
33 | 
34 | #集群 APISERVER
35 | KUBE_APISERVER="https://192.168.1.1:6443"
36 | 
37 | #pause镜像地址
38 | POD_INFRA_CONTAINER_IMAGE=mirrorgooglecontainers/pause-amd64:3.0
39 | 
40 | #TLS Bootstrapping 使用的 Token，使用 head -c 16 /dev/urandom | od -An -t x | tr -d ' ' 生成
41 | BOOTSTRAP_TOKEN="d18f94b5fa585c7123f56803d925d2e7"
42 | 
43 | # 集群网络插件，目前支持calico和flannel
44 | CLUSTER_NETWORK="calico"
45 | 
46 | # 部分calico相关配置，更全配置可以去roles/calico/templates/calico.yaml.j2自定义
47 | # 设置 CALICO_IPV4POOL_IPIP=“off”,可以提高网络性能，条件限制详见 05.安装calico网络组件.md
48 | CALICO_IPV4POOL_IPIP="always"
49 | # 设置 calico-node使用的host IP，bgp邻居通过该地址建立，可手动指定端口"interface=eth0"或使用如下自动发现
50 | IP_AUTODETECTION_METHOD="can-reach=223.5.5.5"
51 | 
52 | # 部分flannel配置，详见roles/flannel/templates/kube-flannel.yaml.j2
53 | FLANNEL_BACKEND="vxlan"
54 | 
55 | # 服务网段 (Service CIDR），部署前路由不可达，部署后集群内使用 IP:Port 可达
56 | SERVICE_CIDR="10.68.0.0/16"
57 | 
58 | # POD 网段 (Cluster CIDR），部署前路由不可达，**部署后**路由可达
59 | CLUSTER_CIDR="172.20.0.0/16"
60 | 
61 | # 服务端口范围 (NodePort Range)
62 | NODE_PORT_RANGE="20000-40000"
63 | 
64 | # kubernetes 服务 IP (预分配，一般是 SERVICE_CIDR 中第一个IP)
65 | CLUSTER_KUBERNETES_SVC_IP="10.68.0.1"
66 | 
67 | # 集群 DNS 服务 IP (从 SERVICE_CIDR 中预分配)
68 | CLUSTER_DNS_SVC_IP="10.68.0.2"
69 | 
70 | # 集群 DNS 域名
71 | CLUSTER_DNS_DOMAIN="cluster.local."
72 | 
73 | # etcd 集群间通信的IP和端口, **根据实际 etcd 集群成员设置**
74 | ETCD_NODES="etcd1=https://192.168.1.1:2380"
75 | 
76 | # etcd 集群服务地址列表, **根据实际 etcd 集群成员设置**
77 | ETCD_ENDPOINTS="https://192.168.1.1:2379"
78 | 
79 | # 集群basic auth 使用的用户名和密码
80 | BASIC_AUTH_USER="admin"
81 | BASIC_AUTH_PASS="test1234"
82 | 
83 | # ---------附加参数--------------------
84 | #默认二进制文件目录
85 | bin_dir="/root/local/bin"
86 | 
87 | #证书目录
88 | ca_dir="/etc/kubernetes/ssl"
89 | 
90 | #部署目录，即 ansible 工作目录
91 | base_dir="/etc/ansible"
92 | 
93 | #私有仓库 harbor服务器 (域名或者IP)
94 | #HARBOR_IP="192.168.1.8"
95 | #HARBOR_DOMAIN="harbor.yourdomain.com"
96 | 


--------------------------------------------------------------------------------
/example/hosts.m-masters.example:
--------------------------------------------------------------------------------
  1 | # 部署节点：运行这份 ansible 脚本的节点
  2 | [deploy]
  3 | 192.168.1.1
  4 | 
  5 | # etcd集群请提供如下NODE_NAME、NODE_IP变量
  6 | # 请注意etcd集群必须是1,3,5,7...奇数个节点
  7 | [etcd]
  8 | 192.168.1.1 NODE_NAME=etcd1 NODE_IP="192.168.1.1"
  9 | 192.168.1.2 NODE_NAME=etcd2 NODE_IP="192.168.1.2"
 10 | 192.168.1.3 NODE_NAME=etcd3 NODE_IP="192.168.1.3"
 11 | 
 12 | [kube-master]
 13 | 192.168.1.1 NODE_IP="192.168.1.1"
 14 | 192.168.1.2 NODE_IP="192.168.1.2"
 15 | 
 16 | # 负载均衡至少两个节点，安装 haproxy+keepalived
 17 | # 根据master节点数量同步修改roles/lb/templates/haproxy.cfg.j2 
 18 | [lb]
 19 | 192.168.1.1 LB_IF="eth0" LB_ROLE=backup
 20 | 192.168.1.2 LB_IF="eth0" LB_ROLE=master
 21 | [lb:vars]
 22 | LB_EP1="192.168.1.1:6443"	# api-server 实际成员地址端口
 23 | LB_EP2="192.168.1.2:6443"	# api-server 实际成员地址端口
 24 | MASTER_IP="192.168.1.10"  	# api-server 虚地址
 25 | MASTER_PORT="8443"		# api-server 服务端口
 26 | 
 27 | #确保node节点有变量NODE_ID=node1
 28 | [kube-node]
 29 | 192.168.1.2 NODE_ID=node1 NODE_IP="192.168.1.2"
 30 | 192.168.1.3 NODE_ID=node2 NODE_IP="192.168.1.3"
 31 | 192.168.1.4 NODE_ID=node3 NODE_IP="192.168.1.4"
 32 | 
 33 | [kube-cluster:children]
 34 | kube-node
 35 | kube-master
 36 | 
 37 | # 如果启用harbor，请配置后面harbor相关参数
 38 | [harbor]
 39 | #192.168.1.8 NODE_IP="192.168.1.8"
 40 | 
 41 | # 预留组，后续添加node节点使用
 42 | [new-node]
 43 | #192.168.1.xx NODE_ID=node6 NODE_IP="192.168.1.xx"
 44 | #192.168.1.xx NODE_ID=node7 NODE_IP="192.168.1.xx"
 45 | 
 46 | [all:vars]
 47 | # ---------集群主要参数---------------
 48 | #集群 MASTER IP, 需要负载均衡，一般为VIP地址
 49 | MASTER_IP="192.168.1.10"
 50 | KUBE_APISERVER="https://192.168.1.10:8443"
 51 | 
 52 | #pause镜像地址
 53 | POD_INFRA_CONTAINER_IMAGE=mirrorgooglecontainers/pause-amd64:3.0
 54 | 
 55 | #TLS Bootstrapping 使用的 Token，使用 head -c 16 /dev/urandom | od -An -t x | tr -d ' ' 生成
 56 | BOOTSTRAP_TOKEN="c30302226d4b810e08731702d3890f50"
 57 | 
 58 | # 集群网络插件，目前支持calico和flannel
 59 | CLUSTER_NETWORK="calico"
 60 | 
 61 | # 部分calico相关配置，更全配置可以去roles/calico/templates/calico.yaml.j2自定义
 62 | # 设置 CALICO_IPV4POOL_IPIP=“off”,可以提高网络性能，条件限制详见 05.安装calico网络组件.md
 63 | CALICO_IPV4POOL_IPIP="always"
 64 | # 设置 calico-node使用的host IP，bgp邻居通过该地址建立，可手动指定端口"interface=eth0"或使用如下自动发现
 65 | IP_AUTODETECTION_METHOD="can-reach=223.5.5.5"
 66 | 
 67 | # 部分flannel配置，详见roles/flannel/templates/kube-flannel.yaml.j2
 68 | FLANNEL_BACKEND="vxlan"
 69 | 
 70 | # 服务网段 (Service CIDR），部署前路由不可达，部署后集群内使用 IP:Port 可达
 71 | SERVICE_CIDR="10.68.0.0/16"
 72 | 
 73 | # POD 网段 (Cluster CIDR），部署前路由不可达，**部署后**路由可达
 74 | CLUSTER_CIDR="172.20.0.0/16"
 75 | 
 76 | # 服务端口范围 (NodePort Range)
 77 | NODE_PORT_RANGE="20000-40000"
 78 | 
 79 | # kubernetes 服务 IP (预分配，一般是 SERVICE_CIDR 中第一个IP)
 80 | CLUSTER_KUBERNETES_SVC_IP="10.68.0.1"
 81 | 
 82 | # 集群 DNS 服务 IP (从 SERVICE_CIDR 中预分配)
 83 | CLUSTER_DNS_SVC_IP="10.68.0.2"
 84 | 
 85 | # 集群 DNS 域名
 86 | CLUSTER_DNS_DOMAIN="cluster.local."
 87 | 
 88 | # etcd 集群间通信的IP和端口, **根据实际 etcd 集群成员设置**
 89 | ETCD_NODES="etcd1=https://192.168.1.1:2380,etcd2=https://192.168.1.2:2380,etcd3=https://192.168.1.3:2380"
 90 | 
 91 | # etcd 集群服务地址列表, **根据实际 etcd 集群成员设置**
 92 | ETCD_ENDPOINTS="https://192.168.1.1:2379,https://192.168.1.2:2379,https://192.168.1.3:2379"
 93 | 
 94 | # 集群basic auth 使用的用户名和密码
 95 | BASIC_AUTH_USER="admin"
 96 | BASIC_AUTH_PASS="test1234"
 97 | 
 98 | # ---------附加参数--------------------
 99 | #默认二进制文件目录
100 | bin_dir="/root/local/bin"
101 | 
102 | #证书目录
103 | ca_dir="/etc/kubernetes/ssl"
104 | 
105 | #部署目录，即 ansible 工作目录，建议不要修改
106 | base_dir="/etc/ansible"
107 | 
108 | #私有仓库 harbor服务器 (域名或者IP)
109 | #HARBOR_IP="192.168.1.8"
110 | #HARBOR_DOMAIN="harbor.yourdomain.com"
111 | 


--------------------------------------------------------------------------------
/example/hosts.s-master.example:
--------------------------------------------------------------------------------
  1 | # 部署节点：运行ansible 脚本的节点
  2 | [deploy]
  3 | 192.168.1.1
  4 | 
  5 | # etcd集群请提供如下NODE_NAME、NODE_IP变量
  6 | # 请注意etcd集群必须是1,3,5,7...奇数个节点
  7 | [etcd]
  8 | 192.168.1.1 NODE_NAME=etcd1 NODE_IP="192.168.1.1"
  9 | 192.168.1.2 NODE_NAME=etcd2 NODE_IP="192.168.1.2"
 10 | 192.168.1.3 NODE_NAME=etcd3 NODE_IP="192.168.1.3"
 11 | 
 12 | [kube-master]
 13 | 192.168.1.1 NODE_IP="192.168.1.1"
 14 | 
 15 | #确保node节点有变量NODE_ID=node1
 16 | [kube-node]
 17 | 192.168.1.1 NODE_ID=node1 NODE_IP="192.168.1.1"
 18 | 192.168.1.2 NODE_ID=node2 NODE_IP="192.168.1.2"
 19 | 192.168.1.3 NODE_ID=node3 NODE_IP="192.168.1.3"
 20 | 
 21 | [kube-cluster:children]
 22 | kube-node
 23 | kube-master
 24 | 
 25 | # 如果启用harbor，请配置后面harbor相关参数
 26 | [harbor]
 27 | #192.168.1.8 NODE_IP="192.168.1.8"
 28 | 
 29 | # 预留组，后续添加node节点使用
 30 | [new-node]
 31 | #192.168.1.xx NODE_ID=node6 NODE_IP="192.168.1.xx"
 32 | 
 33 | [all:vars]
 34 | # ---------集群主要参数---------------
 35 | #集群 MASTER IP
 36 | MASTER_IP="192.168.1.1"
 37 | 
 38 | #集群 APISERVER
 39 | KUBE_APISERVER="https://192.168.1.1:6443"
 40 | 
 41 | #pause镜像地址
 42 | POD_INFRA_CONTAINER_IMAGE=mirrorgooglecontainers/pause-amd64:3.0
 43 | 
 44 | #TLS Bootstrapping 使用的 Token，使用 head -c 16 /dev/urandom | od -An -t x | tr -d ' ' 生成
 45 | BOOTSTRAP_TOKEN="d18f94b5fa585c7123f56803d925d2e7"
 46 | 
 47 | # 集群网络插件，目前支持calico和flannel
 48 | CLUSTER_NETWORK="calico"
 49 | 
 50 | # 部分calico相关配置，更全配置可以去roles/calico/templates/calico.yaml.j2自定义
 51 | # 设置 CALICO_IPV4POOL_IPIP=“off”,可以提高网络性能，条件限制详见 05.安装calico网络组件.md
 52 | CALICO_IPV4POOL_IPIP="always"
 53 | # 设置 calico-node使用的host IP，bgp邻居通过该地址建立，可手动指定端口"interface=eth0"或使用如下自动发现
 54 | IP_AUTODETECTION_METHOD="can-reach=223.5.5.5"
 55 | 
 56 | # 部分flannel配置，详见roles/flannel/templates/kube-flannel.yaml.j2
 57 | FLANNEL_BACKEND="vxlan"
 58 | 
 59 | # 服务网段 (Service CIDR），部署前路由不可达，部署后集群内使用 IP:Port 可达
 60 | SERVICE_CIDR="10.68.0.0/16"
 61 | 
 62 | # POD 网段 (Cluster CIDR），部署前路由不可达，**部署后**路由可达
 63 | CLUSTER_CIDR="172.20.0.0/16"
 64 | 
 65 | # 服务端口范围 (NodePort Range)
 66 | NODE_PORT_RANGE="20000-40000"
 67 | 
 68 | # kubernetes 服务 IP (预分配，一般是 SERVICE_CIDR 中第一个IP)
 69 | CLUSTER_KUBERNETES_SVC_IP="10.68.0.1"
 70 | 
 71 | # 集群 DNS 服务 IP (从 SERVICE_CIDR 中预分配)
 72 | CLUSTER_DNS_SVC_IP="10.68.0.2"
 73 | 
 74 | # 集群 DNS 域名
 75 | CLUSTER_DNS_DOMAIN="cluster.local."
 76 | 
 77 | # etcd 集群间通信的IP和端口, **根据实际 etcd 集群成员设置**
 78 | ETCD_NODES="etcd1=https://192.168.1.1:2380,etcd2=https://192.168.1.2:2380,etcd3=https://192.168.1.3:2380"
 79 | 
 80 | # etcd 集群服务地址列表, **根据实际 etcd 集群成员设置**
 81 | ETCD_ENDPOINTS="https://192.168.1.1:2379,https://192.168.1.2:2379,https://192.168.1.3:2379"
 82 | 
 83 | # 集群basic auth 使用的用户名和密码
 84 | BASIC_AUTH_USER="admin"
 85 | BASIC_AUTH_PASS="test1234"
 86 | 
 87 | # ---------附加参数--------------------
 88 | #默认二进制文件目录
 89 | bin_dir="/root/local/bin"
 90 | 
 91 | #证书目录
 92 | ca_dir="/etc/kubernetes/ssl"
 93 | 
 94 | #部署目录，即 ansible 工作目录
 95 | base_dir="/etc/ansible"
 96 | 
 97 | #私有仓库 harbor服务器 (域名或者IP)
 98 | #HARBOR_IP="192.168.1.8"
 99 | #HARBOR_DOMAIN="harbor.yourdomain.com"
100 | 


--------------------------------------------------------------------------------
/manifests/dashboard/kubernetes-dashboard.yaml:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # Configuration to deploy release version of the Dashboard UI compatible with
 16 | # Kubernetes 1.6 (RBAC enabled).
 17 | #
 18 | # Example usage: kubectl create -f <this_file>
 19 | 
 20 | apiVersion: v1
 21 | kind: ServiceAccount
 22 | metadata:
 23 |   labels:
 24 |     k8s-app: kubernetes-dashboard
 25 |   name: kubernetes-dashboard
 26 |   namespace: kube-system
 27 | ---
 28 | apiVersion: rbac.authorization.k8s.io/v1
 29 | kind: ClusterRoleBinding
 30 | metadata:
 31 |   name: kubernetes-dashboard
 32 |   labels:
 33 |     k8s-app: kubernetes-dashboard
 34 | roleRef:
 35 |   apiGroup: rbac.authorization.k8s.io
 36 |   kind: ClusterRole
 37 |   name: cluster-admin
 38 | subjects:
 39 | - kind: ServiceAccount
 40 |   name: kubernetes-dashboard
 41 |   namespace: kube-system
 42 | ---
 43 | kind: Deployment
 44 | apiVersion: apps/v1
 45 | metadata:
 46 |   labels:
 47 |     k8s-app: kubernetes-dashboard
 48 |   name: kubernetes-dashboard
 49 |   namespace: kube-system
 50 | spec:
 51 |   replicas: 1
 52 |   revisionHistoryLimit: 10
 53 |   selector:
 54 |     matchLabels:
 55 |       k8s-app: kubernetes-dashboard
 56 |   template:
 57 |     metadata:
 58 |       labels:
 59 |         k8s-app: kubernetes-dashboard
 60 |     spec:
 61 |       containers:
 62 |       - name: kubernetes-dashboard
 63 |         #image: gcr.io/google_containers/kubernetes-dashboard-amd64:v1.6.3
 64 |         image: mirrorgooglecontainers/kubernetes-dashboard-amd64:v1.6.3
 65 |         ports:
 66 |         - containerPort: 9090
 67 |           protocol: TCP
 68 |         args:
 69 |           # Uncomment the following line to manually specify Kubernetes API server Host
 70 |           # If not specified, Dashboard will attempt to auto discover the API server and connect
 71 |           # to it. Uncomment only if the default does not work.
 72 |           # - --apiserver-host=http://my-address:port
 73 |         livenessProbe:
 74 |           httpGet:
 75 |             path: /
 76 |             port: 9090
 77 |           initialDelaySeconds: 30
 78 |           timeoutSeconds: 30
 79 |       serviceAccountName: kubernetes-dashboard
 80 |       # Comment the following tolerations if Dashboard must not be deployed on master
 81 |       tolerations:
 82 |       - key: node-role.kubernetes.io/master
 83 |         effect: NoSchedule
 84 | ---
 85 | kind: Service
 86 | apiVersion: v1
 87 | metadata:
 88 |   labels:
 89 |     k8s-app: kubernetes-dashboard
 90 |     kubernetes.io/cluster-service: "true"
 91 |     addonmanager.kubernetes.io/mode: Reconcile
 92 |   name: kubernetes-dashboard
 93 |   namespace: kube-system
 94 | spec:
 95 |   ports:
 96 |   - port: 80
 97 |     targetPort: 9090
 98 |   selector:
 99 |     k8s-app: kubernetes-dashboard
100 |   type: NodePort
101 | 


--------------------------------------------------------------------------------
/manifests/dashboard/ui-admin-rbac.yaml:
--------------------------------------------------------------------------------
 1 | kind: ClusterRole
 2 | apiVersion: rbac.authorization.k8s.io/v1
 3 | metadata:
 4 |   name: ui-admin
 5 | rules:
 6 | - apiGroups:
 7 |   - ""
 8 |   resources:
 9 |   - services
10 |   - services/proxy
11 |   verbs:
12 |   - '*'
13 | 
14 | ---
15 | apiVersion: rbac.authorization.k8s.io/v1
16 | kind: RoleBinding
17 | metadata:
18 |   name: ui-admin-binding
19 |   namespace: kube-system
20 | roleRef:
21 |   apiGroup: rbac.authorization.k8s.io
22 |   kind: ClusterRole
23 |   name: ui-admin
24 | subjects:
25 | - apiGroup: rbac.authorization.k8s.io
26 |   kind: User
27 |   name: admin
28 | 


--------------------------------------------------------------------------------
/manifests/dashboard/ui-read-rbac.yaml:
--------------------------------------------------------------------------------
 1 | kind: ClusterRole
 2 | apiVersion: rbac.authorization.k8s.io/v1
 3 | metadata:
 4 |   name: ui-read
 5 | rules:
 6 | - apiGroups:
 7 |   - ""
 8 |   resources:
 9 |   - services
10 |   - services/proxy
11 |   verbs:
12 |   - get
13 |   - list
14 |   - watch
15 | 
16 | ---
17 | apiVersion: rbac.authorization.k8s.io/v1
18 | kind: RoleBinding
19 | metadata:
20 |   name: ui-read-binding
21 |   namespace: kube-system
22 | roleRef:
23 |   apiGroup: rbac.authorization.k8s.io
24 |   kind: ClusterRole
25 |   name: ui-read
26 | subjects:
27 | - apiGroup: rbac.authorization.k8s.io
28 |   kind: User
29 |   name: readonly
30 | 


--------------------------------------------------------------------------------
/manifests/heapster/grafana.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: apps/v1
 3 | kind: Deployment
 4 | metadata:
 5 |   name: monitoring-grafana
 6 |   namespace: kube-system
 7 | spec:
 8 |   replicas: 1
 9 |   selector:
10 |     matchLabels:
11 |       k8s-app: grafana
12 |   template:
13 |     metadata:
14 |       labels:
15 |         task: monitoring
16 |         k8s-app: grafana
17 |     spec:
18 |       containers:
19 |       - name: grafana
20 |         #image: gcr.io/google_containers/heapster-grafana-amd64:v4.2.0
21 |         image: mirrorgooglecontainers/heapster-grafana-amd64:v4.4.3
22 |         ports:
23 |         - containerPort: 3000
24 |           protocol: TCP
25 |         volumeMounts:
26 |         - mountPath: /var
27 |           name: grafana-storage
28 |         env:
29 |         - name: INFLUXDB_HOST
30 |           value: monitoring-influxdb
31 |         - name: GF_SERVER_HTTP_PORT
32 |           value: "3000"
33 |           # The following env variables are required to make Grafana accessible via
34 |           # the kubernetes api-server proxy. On production clusters, we recommend
35 |           # removing these env variables, setup auth for grafana, and expose the grafana
36 |           # service using a LoadBalancer or a public IP.
37 |         - name: GF_AUTH_BASIC_ENABLED
38 |           value: "false"
39 |         - name: GF_AUTH_ANONYMOUS_ENABLED
40 |           value: "true"
41 |         - name: GF_AUTH_ANONYMOUS_ORG_ROLE
42 |           value: Admin
43 |         - name: GF_SERVER_ROOT_URL
44 |           # If you're only using the API Server proxy, set this value instead:
45 |           value: /api/v1/namespaces/kube-system/services/monitoring-grafana/proxy/
46 |           #value: /
47 |       volumes:
48 |       - name: grafana-storage
49 |         emptyDir: {}
50 | ---
51 | apiVersion: v1
52 | kind: Service
53 | metadata:
54 |   labels:
55 |     # For use as a Cluster add-on (https://github.com/kubernetes/kubernetes/tree/master/cluster/addons)
56 |     # If you are NOT using this as an addon, you should comment out this line.
57 |     kubernetes.io/cluster-service: 'true'
58 |     kubernetes.io/name: monitoring-grafana
59 |   name: monitoring-grafana
60 |   namespace: kube-system
61 | spec:
62 |   # In a production setup, we recommend accessing Grafana through an external Loadbalancer
63 |   # or through a public IP.
64 |   # type: LoadBalancer
65 |   # You could also use NodePort to expose the service at a randomly-generated port
66 |   # type: NodePort
67 |   ports:
68 |   - port: 80
69 |     targetPort: 3000
70 |   selector:
71 |     k8s-app: grafana
72 | 


--------------------------------------------------------------------------------
/manifests/heapster/heapster.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: ServiceAccount
 4 | metadata:
 5 |   name: heapster
 6 |   namespace: kube-system
 7 | ---
 8 | 
 9 | apiVersion: rbac.authorization.k8s.io/v1
10 | kind: ClusterRoleBinding
11 | metadata:
12 |   name: heapster
13 | subjects:
14 |   - kind: ServiceAccount
15 |     name: heapster
16 |     namespace: kube-system
17 | roleRef:
18 |   kind: ClusterRole
19 |   name: system:heapster
20 |   apiGroup: rbac.authorization.k8s.io
21 | ---
22 | 
23 | apiVersion: apps/v1
24 | kind: Deployment
25 | metadata:
26 |   name: heapster
27 |   namespace: kube-system
28 | spec:
29 |   replicas: 1
30 |   selector:
31 |     matchLabels:
32 |       k8s-app: heapster
33 |   template:
34 |     metadata:
35 |       labels:
36 |         task: monitoring
37 |         k8s-app: heapster
38 |     spec:
39 |       serviceAccountName: heapster
40 |       containers:
41 |       - name: heapster
42 |         #image: gcr.io/google_containers/heapster-amd64:v1.3.0
43 |         image: mirrorgooglecontainers/heapster-amd64:v1.3.0
44 |         imagePullPolicy: IfNotPresent
45 |         command:
46 |         - /heapster
47 |         - --source=kubernetes:https://kubernetes.default
48 |         - --sink=influxdb:http://monitoring-influxdb.kube-system.svc:8086
49 | ---
50 | apiVersion: v1
51 | kind: Service
52 | metadata:
53 |   labels:
54 |     task: monitoring
55 |     # For use as a Cluster add-on (https://github.com/kubernetes/kubernetes/tree/master/cluster/addons)
56 |     # If you are NOT using this as an addon, you should comment out this line.
57 |     #kubernetes.io/cluster-service: 'true'
58 |     kubernetes.io/name: Heapster
59 |   name: heapster
60 |   namespace: kube-system
61 | spec:
62 |   ports:
63 |   - port: 80
64 |     targetPort: 8082
65 |   selector:
66 |     k8s-app: heapster
67 | 


--------------------------------------------------------------------------------
/manifests/heapster/influxdb.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | apiVersion: apps/v1
  3 | kind: Deployment
  4 | metadata:
  5 |   name: monitoring-influxdb
  6 |   namespace: kube-system
  7 | spec:
  8 |   replicas: 1
  9 |   selector:
 10 |     matchLabels:
 11 |       k8s-app: influxdb
 12 |   template:
 13 |     metadata:
 14 |       labels:
 15 |         task: monitoring
 16 |         k8s-app: influxdb
 17 |     spec:
 18 |       containers:
 19 |       - name: influxdb
 20 |         #image: gcr.io/google_containers/heapster-influxdb-amd64:v1.1.1
 21 |         image: mirrorgooglecontainers/heapster-influxdb-amd64:v1.1.1
 22 |         volumeMounts:
 23 |         - mountPath: /data
 24 |           name: influxdb-storage
 25 |         - mountPath: /etc/
 26 |           name: influxdb-config
 27 |       volumes:
 28 |       - name: influxdb-storage
 29 |         emptyDir: {}
 30 |       - name: influxdb-config
 31 |         configMap:
 32 |           name: influxdb-config
 33 | ---
 34 | apiVersion: v1
 35 | kind: Service
 36 | metadata:
 37 |   labels:
 38 |     task: monitoring
 39 |     # For use as a Cluster add-on (https://github.com/kubernetes/kubernetes/tree/master/cluster/addons)
 40 |     # If you are NOT using this as an addon, you should comment out this line.
 41 |     # kubernetes.io/cluster-service: 'true'
 42 |     kubernetes.io/name: monitoring-influxdb
 43 |   name: monitoring-influxdb
 44 |   namespace: kube-system
 45 | spec:
 46 |   type: NodePort
 47 |   ports:
 48 |   - port: 8086
 49 |     targetPort: 8086
 50 |     name: http
 51 |   - port: 8083
 52 |     targetPort: 8083
 53 |     name: admin
 54 |   selector:
 55 |     k8s-app: influxdb
 56 | ---
 57 | 
 58 | apiVersion: v1
 59 | kind: ConfigMap
 60 | metadata:
 61 |   name: influxdb-config
 62 |   namespace: kube-system
 63 | data:
 64 |   config.toml: |
 65 |     reporting-disabled = true
 66 |     bind-address = ":8088"
 67 | 
 68 |     [meta]
 69 |       dir = "/data/meta"
 70 |       retention-autocreate = true
 71 |       logging-enabled = true
 72 | 
 73 |     [data]
 74 |       dir = "/data/data"
 75 |       wal-dir = "/data/wal"
 76 |       query-log-enabled = true
 77 |       cache-max-memory-size = 1073741824
 78 |       cache-snapshot-memory-size = 26214400
 79 |       cache-snapshot-write-cold-duration = "10m0s"
 80 |       compact-full-write-cold-duration = "4h0m0s"
 81 |       max-series-per-database = 1000000
 82 |       max-values-per-tag = 100000
 83 |       trace-logging-enabled = false
 84 | 
 85 |     [coordinator]
 86 |       write-timeout = "10s"
 87 |       max-concurrent-queries = 0
 88 |       query-timeout = "0s"
 89 |       log-queries-after = "0s"
 90 |       max-select-point = 0
 91 |       max-select-series = 0
 92 |       max-select-buckets = 0
 93 | 
 94 |     [retention]
 95 |       enabled = true
 96 |       check-interval = "30m0s"
 97 | 
 98 |     [admin]
 99 |       enabled = true
100 |       bind-address = ":8083"
101 |       https-enabled = false
102 |       https-certificate = "/etc/ssl/influxdb.pem"
103 | 
104 |     [shard-precreation]
105 |       enabled = true
106 |       check-interval = "10m0s"
107 |       advance-period = "30m0s"
108 | 
109 |     [monitor]
110 |       store-enabled = true
111 |       store-database = "_internal"
112 |       store-interval = "10s"
113 | 
114 |     [subscriber]
115 |       enabled = true
116 |       http-timeout = "30s"
117 |       insecure-skip-verify = false
118 |       ca-certs = ""
119 |       write-concurrency = 40
120 |       write-buffer-size = 1000
121 | 
122 |     [http]
123 |       enabled = true
124 |       bind-address = ":8086"
125 |       auth-enabled = false
126 |       log-enabled = true
127 |       write-tracing = false
128 |       pprof-enabled = false
129 |       https-enabled = false
130 |       https-certificate = "/etc/ssl/influxdb.pem"
131 |       https-private-key = ""
132 |       max-row-limit = 10000
133 |       max-connection-limit = 0
134 |       shared-secret = ""
135 |       realm = "InfluxDB"
136 |       unix-socket-enabled = false
137 |       bind-socket = "/var/run/influxdb.sock"
138 | 
139 |     [[graphite]]
140 |       enabled = false
141 |       bind-address = ":2003"
142 |       database = "graphite"
143 |       retention-policy = ""
144 |       protocol = "tcp"
145 |       batch-size = 5000
146 |       batch-pending = 10
147 |       batch-timeout = "1s"
148 |       consistency-level = "one"
149 |       separator = "."
150 |       udp-read-buffer = 0
151 | 
152 |     [[collectd]]
153 |       enabled = false
154 |       bind-address = ":25826"
155 |       database = "collectd"
156 |       retention-policy = ""
157 |       batch-size = 5000
158 |       batch-pending = 10
159 |       batch-timeout = "10s"
160 |       read-buffer = 0
161 |       typesdb = "/usr/share/collectd/types.db"
162 | 
163 |     [[opentsdb]]
164 |       enabled = false
165 |       bind-address = ":4242"
166 |       database = "opentsdb"
167 |       retention-policy = ""
168 |       consistency-level = "one"
169 |       tls-enabled = false
170 |       certificate = "/etc/ssl/influxdb.pem"
171 |       batch-size = 1000
172 |       batch-pending = 5
173 |       batch-timeout = "1s"
174 |       log-point-errors = true
175 | 
176 |     [[udp]]
177 |       enabled = false
178 |       bind-address = ":8089"
179 |       database = "udp"
180 |       retention-policy = ""
181 |       batch-size = 5000
182 |       batch-pending = 10
183 |       read-buffer = 0
184 |       batch-timeout = "1s"
185 |       precision = ""
186 | 
187 |     [continuous_queries]
188 |       log-enabled = true
189 |       enabled = true
190 |       run-interval = "1s"
191 | 


--------------------------------------------------------------------------------
/manifests/ingress/test-hello.ing.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: extensions/v1beta1
 2 | kind: Ingress
 3 | metadata:
 4 |   name: test-hello
 5 | spec:
 6 |   rules:
 7 |   - host: hello.test.com
 8 |     http:
 9 |       paths:
10 |       - path: /
11 |         backend:
12 |           serviceName: test-hello
13 |           servicePort: 80
14 | 


--------------------------------------------------------------------------------
/manifests/ingress/traefik-ingress.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | kind: ClusterRole
 3 | apiVersion: rbac.authorization.k8s.io/v1
 4 | metadata:
 5 |   name: traefik-ingress-controller
 6 | rules:
 7 |   - apiGroups:
 8 |       - ""
 9 |     resources:
10 |       - pods
11 |       - services
12 |       - endpoints
13 |       - secrets
14 |     verbs:
15 |       - get
16 |       - list
17 |       - watch
18 |   - apiGroups:
19 |       - extensions
20 |     resources:
21 |       - ingresses
22 |     verbs:
23 |       - get
24 |       - list
25 |       - watch
26 | ---
27 | kind: ClusterRoleBinding
28 | apiVersion: rbac.authorization.k8s.io/v1
29 | metadata:
30 |   name: traefik-ingress-controller
31 | roleRef:
32 |   apiGroup: rbac.authorization.k8s.io
33 |   kind: ClusterRole
34 |   name: traefik-ingress-controller
35 | subjects:
36 | - kind: ServiceAccount
37 |   name: traefik-ingress-controller
38 |   namespace: kube-system
39 | ---
40 | apiVersion: v1
41 | kind: ServiceAccount
42 | metadata:
43 |   name: traefik-ingress-controller
44 |   namespace: kube-system
45 | ---
46 | kind: Deployment
47 | apiVersion: apps/v1
48 | metadata:
49 |   name: traefik-ingress-controller
50 |   namespace: kube-system
51 |   labels:
52 |     k8s-app: traefik-ingress-lb
53 | spec:
54 |   replicas: 1
55 |   selector:
56 |     matchLabels:
57 |       k8s-app: traefik-ingress-lb
58 |   template:
59 |     metadata:
60 |       labels:
61 |         k8s-app: traefik-ingress-lb
62 |         name: traefik-ingress-lb
63 |     spec:
64 |       serviceAccountName: traefik-ingress-controller
65 |       terminationGracePeriodSeconds: 60
66 |       containers:
67 |       - image: traefik
68 |         name: traefik-ingress-lb
69 |         args:
70 |         - --web
71 |         - --kubernetes
72 | ---
73 | kind: Service
74 | apiVersion: v1
75 | metadata:
76 |   name: traefik-ingress-service
77 |   namespace: kube-system
78 | spec:
79 |   selector:
80 |     k8s-app: traefik-ingress-lb
81 |   ports:
82 |     - protocol: TCP
83 |       # 该端口为 traefik ingress-controller的服务端口
84 |       port: 80
85 |       # 集群hosts文件中设置的 NODE_PORT_RANGE 作为 NodePort的可用范围
86 |       # 从默认20000~40000之间选一个可用端口，让ingress-controller暴露给外部的访问
87 |       nodePort: 23456
88 |       name: web
89 |     - protocol: TCP
90 |       # 该端口为 traefik 的管理WEB界面
91 |       port: 8080
92 |       name: admin
93 |   type: NodePort
94 | 


--------------------------------------------------------------------------------
/manifests/ingress/traefik-ui.ing.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: extensions/v1beta1
 3 | kind: Ingress
 4 | metadata:
 5 |   name: traefik-web-ui
 6 |   namespace: kube-system
 7 | spec:
 8 |   rules:
 9 |   - host: traefik-ui.test.com
10 |     http:
11 |       paths:
12 |       - path: /
13 |         backend:
14 |           serviceName: traefik-ingress-service
15 |           servicePort: 8080
16 | 


--------------------------------------------------------------------------------
/manifests/kubedns/kubedns.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | apiVersion: v1
  3 | kind: ConfigMap
  4 | metadata:
  5 |   name: kube-dns
  6 |   namespace: kube-system
  7 |   labels:
  8 |     addonmanager.kubernetes.io/mode: EnsureExists
  9 | 
 10 | ---
 11 | apiVersion: v1
 12 | kind: ServiceAccount
 13 | metadata:
 14 |   name: kube-dns
 15 |   namespace: kube-system
 16 |   labels:
 17 |     #kubernetes.io/cluster-service: "true"
 18 |     addonmanager.kubernetes.io/mode: Reconcile
 19 | 
 20 | ---
 21 | apiVersion: v1
 22 | kind: Service
 23 | metadata:
 24 |   name: kube-dns
 25 |   namespace: kube-system
 26 |   labels:
 27 |     k8s-app: kube-dns
 28 |     #kubernetes.io/cluster-service: "true"
 29 |     addonmanager.kubernetes.io/mode: Reconcile
 30 |     kubernetes.io/name: "KubeDNS"
 31 | spec:
 32 |   selector:
 33 |     k8s-app: kube-dns
 34 |   clusterIP: 10.68.0.2
 35 |   ports:
 36 |   - name: dns
 37 |     port: 53
 38 |     protocol: UDP
 39 |   - name: dns-tcp
 40 |     port: 53
 41 |     protocol: TCP
 42 | 
 43 | ---
 44 | apiVersion: apps/v1
 45 | kind: Deployment
 46 | metadata:
 47 |   name: kube-dns
 48 |   namespace: kube-system
 49 |   labels:
 50 |     k8s-app: kube-dns
 51 |     #kubernetes.io/cluster-service: "true"
 52 |     addonmanager.kubernetes.io/mode: Reconcile
 53 | spec:
 54 |   # replicas: not specified here:
 55 |   # 1. In order to make Addon Manager do not reconcile this replicas parameter.
 56 |   # 2. Default is 1.
 57 |   # 3. Will be tuned in real time if DNS horizontal auto-scaling is turned on.
 58 |   strategy:
 59 |     rollingUpdate:
 60 |       maxSurge: 10%
 61 |       maxUnavailable: 0
 62 |   selector:
 63 |     matchLabels:
 64 |       k8s-app: kube-dns
 65 |   template:
 66 |     metadata:
 67 |       labels:
 68 |         k8s-app: kube-dns
 69 |       annotations:
 70 |         scheduler.alpha.kubernetes.io/critical-pod: ''
 71 |     spec:
 72 |       tolerations:
 73 |       - key: "CriticalAddonsOnly"
 74 |         operator: "Exists"
 75 |       volumes:
 76 |       - name: kube-dns-config
 77 |         configMap:
 78 |           name: kube-dns
 79 |           optional: true
 80 |       containers:
 81 |       - name: kubedns
 82 |         #image: gcr.io/google_containers/k8s-dns-kube-dns-amd64:1.14.5
 83 |         image: mirrorgooglecontainers/k8s-dns-kube-dns-amd64:1.14.5
 84 |         resources:
 85 |           # TODO: Set memory limits when we've profiled the container for large
 86 |           # clusters, then set request = limit to keep this container in
 87 |           # guaranteed class. Currently, this container falls into the
 88 |           # "burstable" category so the kubelet doesn't backoff from restarting it.
 89 |           limits:
 90 |             memory: 170Mi
 91 |           requests:
 92 |             cpu: 100m
 93 |             memory: 70Mi
 94 |         livenessProbe:
 95 |           httpGet:
 96 |             path: /healthcheck/kubedns
 97 |             port: 10054
 98 |             scheme: HTTP
 99 |           initialDelaySeconds: 60
100 |           timeoutSeconds: 5
101 |           successThreshold: 1
102 |           failureThreshold: 5
103 |         readinessProbe:
104 |           httpGet:
105 |             path: /readiness
106 |             port: 8081
107 |             scheme: HTTP
108 |           # we poll on pod startup for the Kubernetes master service and
109 |           # only setup the /readiness HTTP server once that's available.
110 |           initialDelaySeconds: 3
111 |           timeoutSeconds: 5
112 |         args:
113 |         - --domain=cluster.local.
114 |         - --dns-port=10053
115 |         - --config-dir=/kube-dns-config
116 |         - --v=2
117 |         env:
118 |         - name: PROMETHEUS_PORT
119 |           value: "10055"
120 |         ports:
121 |         - containerPort: 10053
122 |           name: dns-local
123 |           protocol: UDP
124 |         - containerPort: 10053
125 |           name: dns-tcp-local
126 |           protocol: TCP
127 |         - containerPort: 10055
128 |           name: metrics
129 |           protocol: TCP
130 |         volumeMounts:
131 |         - name: kube-dns-config
132 |           mountPath: /kube-dns-config
133 |       - name: dnsmasq
134 |         #image: gcr.io/google_containers/k8s-dns-dnsmasq-nanny-amd64:1.14.5
135 |         image: mirrorgooglecontainers/k8s-dns-dnsmasq-nanny-amd64:1.14.5
136 |         livenessProbe:
137 |           httpGet:
138 |             path: /healthcheck/dnsmasq
139 |             port: 10054
140 |             scheme: HTTP
141 |           initialDelaySeconds: 60
142 |           timeoutSeconds: 5
143 |           successThreshold: 1
144 |           failureThreshold: 5
145 |         args:
146 |         - -v=2
147 |         - -logtostderr
148 |         - -configDir=/etc/k8s/dns/dnsmasq-nanny
149 |         - -restartDnsmasq=true
150 |         - --
151 |         - -k
152 |         - --cache-size=1000
153 |         - --log-facility=-
154 |         - --server=/cluster.local./127.0.0.1#10053
155 |         - --server=/in-addr.arpa/127.0.0.1#10053
156 |         - --server=/ip6.arpa/127.0.0.1#10053
157 |         ports:
158 |         - containerPort: 53
159 |           name: dns
160 |           protocol: UDP
161 |         - containerPort: 53
162 |           name: dns-tcp
163 |           protocol: TCP
164 |         # see: https://github.com/kubernetes/kubernetes/issues/29055 for details
165 |         resources:
166 |           requests:
167 |             cpu: 150m
168 |             memory: 20Mi
169 |         volumeMounts:
170 |         - name: kube-dns-config
171 |           mountPath: /etc/k8s/dns/dnsmasq-nanny
172 |       - name: sidecar
173 |         #image: gcr.io/google_containers/k8s-dns-sidecar-amd64:1.14.5
174 |         image: mirrorgooglecontainers/k8s-dns-sidecar-amd64:1.14.5
175 |         livenessProbe:
176 |           httpGet:
177 |             path: /metrics
178 |             port: 10054
179 |             scheme: HTTP
180 |           initialDelaySeconds: 60
181 |           timeoutSeconds: 5
182 |           successThreshold: 1
183 |           failureThreshold: 5
184 |         args:
185 |         - --v=2
186 |         - --logtostderr
187 |         - --probe=kubedns,127.0.0.1:10053,kubernetes.default.svc.cluster.local.,5,A
188 |         - --probe=dnsmasq,127.0.0.1:53,kubernetes.default.svc.cluster.local.,5,A
189 |         ports:
190 |         - containerPort: 10054
191 |           name: metrics
192 |           protocol: TCP
193 |         resources:
194 |           requests:
195 |             memory: 20Mi
196 |             cpu: 10m
197 |       dnsPolicy: Default  # Don't use cluster DNS.
198 |       serviceAccountName: kube-dns
199 | 


--------------------------------------------------------------------------------
/manifests/kubedns/readme.md:
--------------------------------------------------------------------------------
1 | ### 说明
2 | 
3 | + 本目录为k8s集群的插件 kube-dns的配置目录
4 | + 因kubedns.yaml文件中参数(CLUSTER_DNS_SVC_IP, CLUSTER_DNS_DOMAIN)根据hosts文件设置而定，需要使用ansible template模块替换参数后生成
5 | + 运行 `ansible-playbook 01.prepare.yml`后会重新生成该目录下的kubedns.yaml 文件
6 | + kubedns.yaml [模板文件](../../roles/deploy/templates/kubedns.yaml.j2)
7 | 


--------------------------------------------------------------------------------
/pics/alipay.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mendickxiao/kubeasz/c8987d051027b6d157da57041bb9d4a4c38075dd/pics/alipay.png


--------------------------------------------------------------------------------
/pics/ansible.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mendickxiao/kubeasz/c8987d051027b6d157da57041bb9d4a4c38075dd/pics/ansible.jpg


--------------------------------------------------------------------------------
/pics/docker.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mendickxiao/kubeasz/c8987d051027b6d157da57041bb9d4a4c38075dd/pics/docker.jpg


--------------------------------------------------------------------------------
/pics/grafana.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mendickxiao/kubeasz/c8987d051027b6d157da57041bb9d4a4c38075dd/pics/grafana.png


--------------------------------------------------------------------------------
/pics/influxdb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mendickxiao/kubeasz/c8987d051027b6d157da57041bb9d4a4c38075dd/pics/influxdb.png


--------------------------------------------------------------------------------
/pics/kube.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mendickxiao/kubeasz/c8987d051027b6d157da57041bb9d4a4c38075dd/pics/kube.jpg


--------------------------------------------------------------------------------
/roles/calico/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: 创建calico 相关目录
 2 |   file: name={{ item }} state=directory
 3 |   with_items:
 4 |   - /etc/calico/ssl
 5 |   - /root/local/kube-system/calico 
 6 | 
 7 | - name: 复制CA 证书到calico 证书目录
 8 |   copy: src={{ ca_dir }}/ca.pem dest=/etc/calico/ssl/ca.pem
 9 | 
10 | - name: 创建calico 证书请求
11 |   template: src=calico-csr.json.j2 dest=/etc/calico/ssl/calico-csr.json
12 | 
13 | - name: 创建 calico证书和私钥
14 |   shell: "cd /etc/calico/ssl && {{ bin_dir }}/cfssl gencert \
15 |         -ca={{ ca_dir }}/ca.pem \
16 |         -ca-key={{ ca_dir }}/ca-key.pem \
17 |         -config={{ ca_dir }}/ca-config.json \
18 |         -profile=kubernetes calico-csr.json | {{ bin_dir }}/cfssljson -bare calico"
19 | 
20 | - name: 准备 calico DaemonSet yaml文件
21 |   template: src=calico.yaml.j2 dest=/root/local/kube-system/calico/calico.yaml
22 | 
23 | - name: 准备 calico rbac文件 
24 |   template: src=calico-rbac.yaml.j2 dest=/root/local/kube-system/calico/calico-rbac.yaml
25 | 
26 | # 只需单节点执行一次，重复执行的报错可以忽略
27 | - name: 运行 calico网络
28 |   shell: "{{ bin_dir }}/kubectl create -f /root/local/kube-system/calico/ && sleep 15"
29 |   when: NODE_ID is defined and NODE_ID == "node1"
30 |   ignore_errors: true
31 | 
32 | # 删除原有cni配置
33 | - name: 删除默认cni配置
34 |   file: path=/etc/cni/net.d/10-default.conf state=absent
35 | 
36 | # 删除原有cni插件网卡mynet0
37 | - name: 删除默认cni插件网卡mynet0
38 |   shell: "ip link del mynet0"
39 |   ignore_errors: true
40 | 
41 | # [可选]cni calico plugins 已经在calico.yaml完成自动安装
42 | - name: 下载calicoctl 客户端
43 |   copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755
44 |   with_items:
45 |   #- calico
46 |   #- calico-ipam
47 |   #- loopback
48 |   - calicoctl
49 | 
50 | - name: 准备 calicoctl配置文件
51 |   template: src=calicoctl.cfg.j2 dest=/etc/calico/calicoctl.cfg
52 | 


--------------------------------------------------------------------------------
/roles/calico/templates/calico-csr.json.j2:
--------------------------------------------------------------------------------
 1 | {
 2 |   "CN": "calico",
 3 |   "hosts": [],
 4 |   "key": {
 5 |     "algo": "rsa",
 6 |     "size": 2048
 7 |   },
 8 |   "names": [
 9 |     {
10 |       "C": "CN",
11 |       "ST": "HangZhou",
12 |       "L": "XS",
13 |       "O": "k8s",
14 |       "OU": "System"
15 |     }
16 |   ]
17 | }
18 | 


--------------------------------------------------------------------------------
/roles/calico/templates/calico-rbac.yaml.j2:
--------------------------------------------------------------------------------
 1 | # Calico Version v2.6.5
 2 | # https://docs.projectcalico.org/v2.6/releases#v2.6.5
 3 | 
 4 | ---
 5 | 
 6 | kind: ClusterRole
 7 | apiVersion: rbac.authorization.k8s.io/v1
 8 | metadata:
 9 |   name: calico-kube-controllers
10 | rules:
11 |   - apiGroups:
12 |     - ""
13 |     - extensions
14 |     resources:
15 |       - pods
16 |       - namespaces
17 |       - networkpolicies
18 |       - nodes
19 |     verbs:
20 |       - watch
21 |       - list
22 | ---
23 | kind: ClusterRoleBinding
24 | apiVersion: rbac.authorization.k8s.io/v1
25 | metadata:
26 |   name: calico-kube-controllers
27 | roleRef:
28 |   apiGroup: rbac.authorization.k8s.io
29 |   kind: ClusterRole
30 |   name: calico-kube-controllers
31 | subjects:
32 | - kind: ServiceAccount
33 |   name: calico-kube-controllers
34 |   namespace: kube-system
35 | 
36 | ---
37 | 
38 | kind: ClusterRole
39 | apiVersion: rbac.authorization.k8s.io/v1
40 | metadata:
41 |   name: calico-node
42 | rules:
43 |   - apiGroups: [""]
44 |     resources:
45 |       - pods
46 |       - nodes
47 |     verbs:
48 |       - get
49 | 
50 | ---
51 | 
52 | apiVersion: rbac.authorization.k8s.io/v1
53 | kind: ClusterRoleBinding
54 | metadata:
55 |   name: calico-node
56 | roleRef:
57 |   apiGroup: rbac.authorization.k8s.io
58 |   kind: ClusterRole
59 |   name: calico-node
60 | subjects:
61 | - kind: ServiceAccount
62 |   name: calico-node
63 |   namespace: kube-system
64 | 
65 | 


--------------------------------------------------------------------------------
/roles/calico/templates/calico.yaml.j2:
--------------------------------------------------------------------------------
  1 | # Calico Version v2.6.5
  2 | # https://docs.projectcalico.org/v2.6/releases#v2.6.5
  3 | # This manifest includes the following component versions:
  4 | #   calico/node:v2.6.5
  5 | #   calico/cni:v1.11.2
  6 | #   calico/kube-controllers:v1.0.2
  7 | 
  8 | # This ConfigMap is used to configure a self-hosted Calico installation.
  9 | kind: ConfigMap
 10 | apiVersion: v1
 11 | metadata:
 12 |   name: calico-config
 13 |   namespace: kube-system
 14 | data:
 15 |   # Configure this with the location of your etcd cluster.
 16 |   etcd_endpoints: "{{ ETCD_ENDPOINTS }}"
 17 | 
 18 |   # Configure the Calico backend to use.
 19 |   calico_backend: "bird"
 20 | 
 21 |   # The CNI network configuration to install on each node.
 22 |   cni_network_config: |-
 23 |     {
 24 |         "name": "k8s-pod-network",
 25 |         "cniVersion": "0.1.0",
 26 |         "type": "calico",
 27 |         "etcd_endpoints": "{{ ETCD_ENDPOINTS }}",
 28 |         "etcd_key_file": "/etc/calico/ssl/calico-key.pem",
 29 |         "etcd_cert_file": "/etc/calico/ssl/calico.pem",
 30 |         "etcd_ca_cert_file": "/etc/calico/ssl/ca.pem",
 31 |         "log_level": "info",
 32 |         "mtu": 1500,
 33 |         "ipam": {
 34 |             "type": "calico-ipam"
 35 |         },
 36 |         "policy": {
 37 |             "type": "k8s"
 38 |         },
 39 |         "kubernetes": {
 40 |             "kubeconfig": "/root/.kube/config"
 41 |         }
 42 |     }
 43 | 
 44 |   # If you're using TLS enabled etcd uncomment the following.
 45 |   # You must also populate the Secret below with these files.
 46 |   etcd_ca: "/calico-secrets/ca.pem"
 47 |   etcd_cert: "/calico-secrets/calico.pem"
 48 |   etcd_key: "/calico-secrets/calico-key.pem"
 49 | ---
 50 | 
 51 | # This manifest installs the calico/node container, as well
 52 | # as the Calico CNI plugins and network config on
 53 | # each master and worker node in a Kubernetes cluster.
 54 | kind: DaemonSet
 55 | apiVersion: extensions/v1beta1
 56 | metadata:
 57 |   name: calico-node
 58 |   namespace: kube-system
 59 |   labels:
 60 |     k8s-app: calico-node
 61 | spec:
 62 |   selector:
 63 |     matchLabels:
 64 |       k8s-app: calico-node
 65 |   template:
 66 |     metadata:
 67 |       labels:
 68 |         k8s-app: calico-node
 69 |       annotations:
 70 |         scheduler.alpha.kubernetes.io/critical-pod: ''
 71 |         scheduler.alpha.kubernetes.io/tolerations: |
 72 |           [{"key": "dedicated", "value": "master", "effect": "NoSchedule" },
 73 |            {"key":"CriticalAddonsOnly", "operator":"Exists"}]
 74 |     spec:
 75 |       hostNetwork: true
 76 |       serviceAccountName: calico-node
 77 |       # Minimize downtime during a rolling upgrade or deletion; tell Kubernetes to do a "force
 78 |       # deletion": https://kubernetes.io/docs/concepts/workloads/pods/pod/#termination-of-pods.
 79 |       terminationGracePeriodSeconds: 0
 80 |       containers:
 81 |         # Runs calico/node container on each Kubernetes node.  This
 82 |         # container programs network policy and routes on each
 83 |         # host.
 84 |         - name: calico-node
 85 |           #image: quay.io/calico/node:v2.6.5
 86 |           image: calico/node:v2.6.5
 87 |           env:
 88 |             # The location of the Calico etcd cluster.
 89 |             - name: ETCD_ENDPOINTS
 90 |               valueFrom:
 91 |                 configMapKeyRef:
 92 |                   name: calico-config
 93 |                   key: etcd_endpoints
 94 |             # Choose the backend to use.
 95 |             - name: CALICO_NETWORKING_BACKEND
 96 |               valueFrom:
 97 |                 configMapKeyRef:
 98 |                   name: calico-config
 99 |                   key: calico_backend
100 |             # Cluster type to identify the deployment type
101 |             - name: CLUSTER_TYPE
102 |               value: "k8s,bgp"
103 |             # Disable file logging so `kubectl logs` works.
104 |             - name: CALICO_DISABLE_FILE_LOGGING
105 |               value: "true"
106 |             # Set Felix endpoint to host default action to ACCEPT.
107 |             - name: FELIX_DEFAULTENDPOINTTOHOSTACTION
108 |               value: "ACCEPT"
109 |             # Configure the IP Pool from which Pod IPs will be chosen.
110 |             - name: CALICO_IPV4POOL_CIDR
111 |               value: "{{ CLUSTER_CIDR }}"
112 |             - name: CALICO_IPV4POOL_IPIP
113 |               value: "{{ CALICO_IPV4POOL_IPIP }}"
114 |             # Set noderef for node controller.
115 |             - name: CALICO_K8S_NODE_REF
116 |               valueFrom:
117 |                 fieldRef:
118 |                   fieldPath: spec.nodeName
119 |             # Disable IPv6 on Kubernetes.
120 |             - name: FELIX_IPV6SUPPORT
121 |               value: "false"
122 |             # Set Felix logging to "info"
123 |             - name: FELIX_LOGSEVERITYSCREEN
124 |               value: "info"
125 |             # Set MTU for tunnel device used if ipip is enabled
126 |             - name: FELIX_IPINIPMTU
127 |               value: "1440"
128 |             # Location of the CA certificate for etcd.
129 |             - name: ETCD_CA_CERT_FILE
130 |               valueFrom:
131 |                 configMapKeyRef:
132 |                   name: calico-config
133 |                   key: etcd_ca
134 |             # Location of the client key for etcd.
135 |             - name: ETCD_KEY_FILE
136 |               valueFrom:
137 |                 configMapKeyRef:
138 |                   name: calico-config
139 |                   key: etcd_key
140 |             # Location of the client certificate for etcd.
141 |             - name: ETCD_CERT_FILE
142 |               valueFrom:
143 |                 configMapKeyRef:
144 |                   name: calico-config
145 |                   key: etcd_cert
146 |             # Auto-detect the BGP IP address.
147 |             - name: IP
148 |               value: ""
149 |             - name: IP_AUTODETECTION_METHOD
150 |               value: "{{ IP_AUTODETECTION_METHOD }}"
151 |             - name: FELIX_HEALTHENABLED
152 |               value: "true"
153 |           securityContext:
154 |             privileged: true
155 |           resources:
156 |             requests:
157 |               cpu: 250m
158 |           livenessProbe:
159 |             httpGet:
160 |               path: /liveness
161 |               port: 9099
162 |             periodSeconds: 10
163 |             initialDelaySeconds: 10
164 |             failureThreshold: 6
165 |           readinessProbe:
166 |             httpGet:
167 |               path: /readiness
168 |               port: 9099
169 |             periodSeconds: 10
170 |           volumeMounts:
171 |             - mountPath: /lib/modules
172 |               name: lib-modules
173 |               readOnly: true
174 |             - mountPath: /var/run/calico
175 |               name: var-run-calico
176 |               readOnly: false
177 |             - mountPath: /calico-secrets
178 |               name: etcd-certs
179 |         # This container installs the Calico CNI binaries
180 |         # and CNI network config file on each node.
181 |         - name: install-cni
182 |           #image: quay.io/calico/cni:v1.11.2
183 |           image: calico/cni:v1.11.2
184 |           command: ["/install-cni.sh"]
185 |           env:
186 |             # The location of the Calico etcd cluster.
187 |             - name: ETCD_ENDPOINTS
188 |               valueFrom:
189 |                 configMapKeyRef:
190 |                   name: calico-config
191 |                   key: etcd_endpoints
192 |             # The CNI network config to install on each node.
193 |             - name: CNI_NETWORK_CONFIG
194 |               valueFrom:
195 |                 configMapKeyRef:
196 |                   name: calico-config
197 |                   key: cni_network_config
198 |           volumeMounts:
199 |             - mountPath: /host/opt/cni/bin
200 |               name: cni-bin-dir
201 |             - mountPath: /host/etc/cni/net.d
202 |               name: cni-net-dir
203 |             - mountPath: /calico-secrets
204 |               name: etcd-certs
205 |       volumes:
206 |         # Used by calico/node.
207 |         - name: lib-modules
208 |           hostPath:
209 |             path: /lib/modules
210 |         - name: var-run-calico
211 |           hostPath:
212 |             path: /var/run/calico
213 |         # Used to install CNI.
214 |         - name: cni-bin-dir
215 |           hostPath:
216 |             path: {{ bin_dir }}
217 |         - name: cni-net-dir
218 |           hostPath:
219 |             path: /etc/cni/net.d
220 |         # Mount in the etcd TLS secrets.
221 |         - name: etcd-certs
222 |           hostPath:
223 |             path: /etc/calico/ssl
224 | 
225 | ---
226 | 
227 | # This manifest deploys the Calico Kubernetes controllers.
228 | # See https://github.com/projectcalico/kube-controllers
229 | apiVersion: extensions/v1beta1
230 | kind: Deployment
231 | metadata:
232 |   name: calico-kube-controllers
233 |   namespace: kube-system
234 |   labels:
235 |     k8s-app: calico-kube-controllers
236 |   annotations:
237 |     scheduler.alpha.kubernetes.io/critical-pod: ''
238 |     scheduler.alpha.kubernetes.io/tolerations: |
239 |       [{"key": "dedicated", "value": "master", "effect": "NoSchedule" },
240 |        {"key":"CriticalAddonsOnly", "operator":"Exists"}]
241 | spec:
242 |   # The controllers can only have a single active instance.
243 |   replicas: 1
244 |   strategy:
245 |     type: Recreate
246 |   selector:
247 |     matchLabels:
248 |       k8s-app: calico-kube-controllers
249 |   template:
250 |     metadata:
251 |       name: calico-kube-controllers
252 |       namespace: kube-system
253 |       labels:
254 |         k8s-app: calico-kube-controllers
255 |     spec:
256 |       # The controllers must run in the host network namespace so that
257 |       # it isn't governed by policy that would prevent it from working.
258 |       hostNetwork: true
259 |       serviceAccountName: calico-kube-controllers
260 |       containers:
261 |         - name: calico-kube-controllers
262 |           #image: quay.io/calico/kube-controllers:v1.0.2
263 |           image: calico/kube-controllers:v1.0.2
264 |           env:
265 |             # The location of the Calico etcd cluster.
266 |             - name: ETCD_ENDPOINTS
267 |               valueFrom:
268 |                 configMapKeyRef:
269 |                   name: calico-config
270 |                   key: etcd_endpoints
271 |             # Location of the CA certificate for etcd.
272 |             - name: ETCD_CA_CERT_FILE
273 |               valueFrom:
274 |                 configMapKeyRef:
275 |                   name: calico-config
276 |                   key: etcd_ca
277 |             # Location of the client key for etcd.
278 |             - name: ETCD_KEY_FILE
279 |               valueFrom:
280 |                 configMapKeyRef:
281 |                   name: calico-config
282 |                   key: etcd_key
283 |             # Location of the client certificate for etcd.
284 |             - name: ETCD_CERT_FILE
285 |               valueFrom:
286 |                 configMapKeyRef:
287 |                   name: calico-config
288 |                   key: etcd_cert
289 |             # Choose which controllers to run.
290 |             - name: ENABLED_CONTROLLERS
291 |               value: policy,profile,workloadendpoint,node
292 |           volumeMounts:
293 |             # Mount in the etcd TLS secrets.
294 |             - mountPath: /calico-secrets
295 |               name: etcd-certs
296 |       volumes:
297 |         # Mount in the etcd TLS secrets.
298 |         - name: etcd-certs
299 |           hostPath:
300 |             path: /etc/calico/ssl
301 | 
302 | ---
303 | 
304 | apiVersion: v1
305 | kind: ServiceAccount
306 | metadata:
307 |   name: calico-kube-controllers
308 |   namespace: kube-system
309 | 
310 | ---
311 | 
312 | apiVersion: v1
313 | kind: ServiceAccount
314 | metadata:
315 |   name: calico-node
316 |   namespace: kube-system
317 | 


--------------------------------------------------------------------------------
/roles/calico/templates/calicoctl.cfg.j2:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: calicoApiConfig
 3 | metadata:
 4 | spec:
 5 |   datastoreType: "etcdv2"
 6 |   etcdEndpoints: {{ ETCD_ENDPOINTS }}
 7 |   etcdKeyFile: /etc/calico/ssl/calico-key.pem
 8 |   etcdCertFile: /etc/calico/ssl/calico.pem
 9 |   etcdCACertFile: /etc/calico/ssl/ca.pem
10 | 


--------------------------------------------------------------------------------
/roles/deploy/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: prepare some dirs
 2 |   file: name={{ item }} state=directory
 3 |   with_items:
 4 |   - "{{ bin_dir }}"
 5 |   - "{{ ca_dir }}"
 6 |   - "{{ base_dir }}/roles/prepare/files/"
 7 | 
 8 | - name: 下载证书工具 CFSSL
 9 |   copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755
10 |   with_items:
11 |   - cfssl
12 |   - cfssl-certinfo
13 |   - cfssljson
14 | 
15 | - name: 准备CA配置文件
16 |   template: src=ca-config.json.j2 dest={{ ca_dir }}/ca-config.json
17 | 
18 | - name: 准备CA签名请求
19 |   template: src=ca-csr.json.j2 dest={{ ca_dir }}/ca-csr.json
20 | 
21 | - name: 生成 CA 证书和私钥
22 |   shell: "cd {{ ca_dir }} && {{ bin_dir }}/cfssl gencert -initca ca-csr.json | {{ bin_dir }}/cfssljson -bare ca" 
23 | 
24 | # 为了保证整个安装的幂等性，如果已经生成过CA证书，就使用已经存在的CA；删除/roles/prepare/files/ca* 可以使用新CA 证书
25 | - name: 准备分发 CA证书
26 |   copy: src={{ ca_dir }}/{{ item }} dest={{ base_dir }}/roles/prepare/files/{{ item }} force=no
27 |   with_items:
28 |   - ca.pem
29 |   - ca-key.pem
30 |   - ca.csr
31 |   - ca-config.json
32 | 
33 | # kubedns.yaml文件中部分参数根据hosts文件设置而定，因此需要用template模块替换参数
34 | - name: 准备 kubedns的部署文件 kubedns.yaml
35 |   template: src=kubedns.yaml.j2 dest={{ base_dir }}/manifests/kubedns/kubedns.yaml
36 | 
37 | 


--------------------------------------------------------------------------------
/roles/deploy/templates/ca-config.json.j2:
--------------------------------------------------------------------------------
 1 | {
 2 |   "signing": {
 3 |     "default": {
 4 |       "expiry": "87600h"
 5 |     },
 6 |     "profiles": {
 7 |       "kubernetes": {
 8 |         "usages": [
 9 |             "signing",
10 |             "key encipherment",
11 |             "server auth",
12 |             "client auth"
13 |         ],
14 |         "expiry": "87600h"
15 |       }
16 |     }
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/roles/deploy/templates/ca-csr.json.j2:
--------------------------------------------------------------------------------
 1 | {
 2 |   "CN": "kubernetes",
 3 |   "key": {
 4 |     "algo": "rsa",
 5 |     "size": 2048
 6 |   },
 7 |   "names": [
 8 |     {
 9 |       "C": "CN",
10 |       "ST": "HangZhou",
11 |       "L": "XS",
12 |       "O": "k8s",
13 |       "OU": "System"
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/roles/deploy/templates/kubedns.yaml.j2:
--------------------------------------------------------------------------------
  1 | ---
  2 | apiVersion: v1
  3 | kind: ConfigMap
  4 | metadata:
  5 |   name: kube-dns
  6 |   namespace: kube-system
  7 |   labels:
  8 |     addonmanager.kubernetes.io/mode: EnsureExists
  9 | 
 10 | ---
 11 | apiVersion: v1
 12 | kind: ServiceAccount
 13 | metadata:
 14 |   name: kube-dns
 15 |   namespace: kube-system
 16 |   labels:
 17 |     #kubernetes.io/cluster-service: "true"
 18 |     addonmanager.kubernetes.io/mode: Reconcile
 19 | 
 20 | ---
 21 | apiVersion: v1
 22 | kind: Service
 23 | metadata:
 24 |   name: kube-dns
 25 |   namespace: kube-system
 26 |   labels:
 27 |     k8s-app: kube-dns
 28 |     #kubernetes.io/cluster-service: "true"
 29 |     addonmanager.kubernetes.io/mode: Reconcile
 30 |     kubernetes.io/name: "KubeDNS"
 31 | spec:
 32 |   selector:
 33 |     k8s-app: kube-dns
 34 |   clusterIP: {{ CLUSTER_DNS_SVC_IP }}
 35 |   ports:
 36 |   - name: dns
 37 |     port: 53
 38 |     protocol: UDP
 39 |   - name: dns-tcp
 40 |     port: 53
 41 |     protocol: TCP
 42 | 
 43 | ---
 44 | apiVersion: apps/v1
 45 | kind: Deployment
 46 | metadata:
 47 |   name: kube-dns
 48 |   namespace: kube-system
 49 |   labels:
 50 |     k8s-app: kube-dns
 51 |     #kubernetes.io/cluster-service: "true"
 52 |     addonmanager.kubernetes.io/mode: Reconcile
 53 | spec:
 54 |   # replicas: not specified here:
 55 |   # 1. In order to make Addon Manager do not reconcile this replicas parameter.
 56 |   # 2. Default is 1.
 57 |   # 3. Will be tuned in real time if DNS horizontal auto-scaling is turned on.
 58 |   strategy:
 59 |     rollingUpdate:
 60 |       maxSurge: 10%
 61 |       maxUnavailable: 0
 62 |   selector:
 63 |     matchLabels:
 64 |       k8s-app: kube-dns
 65 |   template:
 66 |     metadata:
 67 |       labels:
 68 |         k8s-app: kube-dns
 69 |       annotations:
 70 |         scheduler.alpha.kubernetes.io/critical-pod: ''
 71 |     spec:
 72 |       tolerations:
 73 |       - key: "CriticalAddonsOnly"
 74 |         operator: "Exists"
 75 |       volumes:
 76 |       - name: kube-dns-config
 77 |         configMap:
 78 |           name: kube-dns
 79 |           optional: true
 80 |       containers:
 81 |       - name: kubedns
 82 |         #image: gcr.io/google_containers/k8s-dns-kube-dns-amd64:1.14.5
 83 |         image: mirrorgooglecontainers/k8s-dns-kube-dns-amd64:1.14.5
 84 |         resources:
 85 |           # TODO: Set memory limits when we've profiled the container for large
 86 |           # clusters, then set request = limit to keep this container in
 87 |           # guaranteed class. Currently, this container falls into the
 88 |           # "burstable" category so the kubelet doesn't backoff from restarting it.
 89 |           limits:
 90 |             memory: 170Mi
 91 |           requests:
 92 |             cpu: 100m
 93 |             memory: 70Mi
 94 |         livenessProbe:
 95 |           httpGet:
 96 |             path: /healthcheck/kubedns
 97 |             port: 10054
 98 |             scheme: HTTP
 99 |           initialDelaySeconds: 60
100 |           timeoutSeconds: 5
101 |           successThreshold: 1
102 |           failureThreshold: 5
103 |         readinessProbe:
104 |           httpGet:
105 |             path: /readiness
106 |             port: 8081
107 |             scheme: HTTP
108 |           # we poll on pod startup for the Kubernetes master service and
109 |           # only setup the /readiness HTTP server once that's available.
110 |           initialDelaySeconds: 3
111 |           timeoutSeconds: 5
112 |         args:
113 |         - --domain={{ CLUSTER_DNS_DOMAIN }}
114 |         - --dns-port=10053
115 |         - --config-dir=/kube-dns-config
116 |         - --v=2
117 |         env:
118 |         - name: PROMETHEUS_PORT
119 |           value: "10055"
120 |         ports:
121 |         - containerPort: 10053
122 |           name: dns-local
123 |           protocol: UDP
124 |         - containerPort: 10053
125 |           name: dns-tcp-local
126 |           protocol: TCP
127 |         - containerPort: 10055
128 |           name: metrics
129 |           protocol: TCP
130 |         volumeMounts:
131 |         - name: kube-dns-config
132 |           mountPath: /kube-dns-config
133 |       - name: dnsmasq
134 |         #image: gcr.io/google_containers/k8s-dns-dnsmasq-nanny-amd64:1.14.5
135 |         image: mirrorgooglecontainers/k8s-dns-dnsmasq-nanny-amd64:1.14.5
136 |         livenessProbe:
137 |           httpGet:
138 |             path: /healthcheck/dnsmasq
139 |             port: 10054
140 |             scheme: HTTP
141 |           initialDelaySeconds: 60
142 |           timeoutSeconds: 5
143 |           successThreshold: 1
144 |           failureThreshold: 5
145 |         args:
146 |         - -v=2
147 |         - -logtostderr
148 |         - -configDir=/etc/k8s/dns/dnsmasq-nanny
149 |         - -restartDnsmasq=true
150 |         - --
151 |         - -k
152 |         - --cache-size=1000
153 |         - --log-facility=-
154 |         - --server=/{{ CLUSTER_DNS_DOMAIN }}/127.0.0.1#10053
155 |         - --server=/in-addr.arpa/127.0.0.1#10053
156 |         - --server=/ip6.arpa/127.0.0.1#10053
157 |         ports:
158 |         - containerPort: 53
159 |           name: dns
160 |           protocol: UDP
161 |         - containerPort: 53
162 |           name: dns-tcp
163 |           protocol: TCP
164 |         # see: https://github.com/kubernetes/kubernetes/issues/29055 for details
165 |         resources:
166 |           requests:
167 |             cpu: 150m
168 |             memory: 20Mi
169 |         volumeMounts:
170 |         - name: kube-dns-config
171 |           mountPath: /etc/k8s/dns/dnsmasq-nanny
172 |       - name: sidecar
173 |         #image: gcr.io/google_containers/k8s-dns-sidecar-amd64:1.14.5
174 |         image: mirrorgooglecontainers/k8s-dns-sidecar-amd64:1.14.5
175 |         livenessProbe:
176 |           httpGet:
177 |             path: /metrics
178 |             port: 10054
179 |             scheme: HTTP
180 |           initialDelaySeconds: 60
181 |           timeoutSeconds: 5
182 |           successThreshold: 1
183 |           failureThreshold: 5
184 |         args:
185 |         - --v=2
186 |         - --logtostderr
187 |         - --probe=kubedns,127.0.0.1:10053,kubernetes.default.svc.{{ CLUSTER_DNS_DOMAIN }},5,A
188 |         - --probe=dnsmasq,127.0.0.1:53,kubernetes.default.svc.{{ CLUSTER_DNS_DOMAIN }},5,A
189 |         ports:
190 |         - containerPort: 10054
191 |           name: metrics
192 |           protocol: TCP
193 |         resources:
194 |           requests:
195 |             memory: 20Mi
196 |             cpu: 10m
197 |       dnsPolicy: Default  # Don't use cluster DNS.
198 |       serviceAccountName: kube-dns
199 | 


--------------------------------------------------------------------------------
/roles/docker/files/daemon.json:
--------------------------------------------------------------------------------
1 | {
2 |   "registry-mirrors": ["https://registry.docker-cn.com"], 
3 |   "max-concurrent-downloads": 6
4 | }
5 | 


--------------------------------------------------------------------------------
/roles/docker/files/docker-tag:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | curl -s -S "https://registry.hub.docker.com/v2/repositories/$@/tags/" | jq '."results"[]["name"]' |sort
3 | 


--------------------------------------------------------------------------------
/roles/docker/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ## ---------docker daemon配置部分-----------
 2 | - name: 下载 docker 二进制文件
 3 |   copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755
 4 |   with_items:
 5 |   - docker-containerd
 6 |   - docker-containerd-shim
 7 |   - docker-init
 8 |   - docker-runc
 9 |   - docker
10 |   - docker-containerd-ctr
11 |   - dockerd
12 |   - docker-proxy
13 | 
14 | - name: docker命令自动补全
15 |   copy: src=docker dest=/etc/bash_completion.d/docker mode=0644
16 | 
17 | - name: docker国内镜像加速
18 |   copy: src=daemon.json dest=/etc/docker/daemon.json
19 | 
20 | - name: flush-iptables
21 |   shell: "iptables -F && iptables -X \
22 |         && iptables -F -t nat && iptables -X -t nat \
23 |         && iptables -F -t raw && iptables -X -t raw \
24 |         && iptables -F -t mangle && iptables -X -t mangle"
25 | 
26 | - name: 创建docker的systemd unit文件
27 |   template: src=docker.service.j2 dest=/etc/systemd/system/docker.service
28 | 
29 | - name: 开启docker 服务
30 |   shell: systemctl daemon-reload && systemctl enable docker && systemctl restart docker
31 | 
32 | ## 可选 ------安装docker查询镜像 tag的小工具----
33 | # 先拉取下节点的ansible setup信息，起到缓存效果，否则后续when 判断可能失败
34 | - name: 缓存ansilbe setup信息
35 |   setup: gather_subset=min
36 |   tags: docker-tag
37 | 
38 | - name: apt安装轻量JSON处理程序
39 |   apt: name=jq state=latest
40 |   when: ansible_distribution == "Ubuntu" and ansible_distribution_major_version == "16"
41 |   tags: docker-tag
42 | 
43 | - name: yum安装轻量JSON处理程序
44 |   yum: name=jq state=latest
45 |   when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7"
46 |   tags: docker-tag
47 | 
48 | - name: 下载 docker-tag
49 |   copy: src=docker-tag dest={{ bin_dir }}/docker-tag mode=0755
50 |   tags: docker-tag
51 | 


--------------------------------------------------------------------------------
/roles/docker/templates/docker.service.j2:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Docker Application Container Engine
 3 | Documentation=http://docs.docker.io
 4 | 
 5 | [Service]
 6 | Environment="PATH={{ bin_dir }}:/bin:/sbin:/usr/bin:/usr/sbin"
 7 | ExecStart={{ bin_dir }}/dockerd --log-level=error 
 8 | ExecStartPost=/sbin/iptables -I FORWARD -s 0.0.0.0/0 -j ACCEPT
 9 | ExecReload=/bin/kill -s HUP $MAINPID
10 | Restart=on-failure
11 | RestartSec=5
12 | LimitNOFILE=infinity
13 | LimitNPROC=infinity
14 | LimitCORE=infinity
15 | Delegate=yes
16 | KillMode=process
17 | 
18 | [Install]
19 | WantedBy=multi-user.target
20 | 


--------------------------------------------------------------------------------
/roles/etcd/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: 下载etcd二进制文件
 2 |   copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755
 3 |   with_items:
 4 |   - etcd
 5 |   - etcdctl
 6 | 
 7 | - name: 创建etcd证书目录
 8 |   file: name=/etc/etcd/ssl state=directory
 9 | 
10 | - name: 创建etcd证书请求
11 |   template: src=etcd-csr.json.j2 dest=/etc/etcd/ssl/etcd-csr.json
12 | 
13 | - name: 创建 etcd证书和私钥
14 |   shell: "cd /etc/etcd/ssl && {{ bin_dir }}/cfssl gencert \
15 |         -ca={{ ca_dir }}/ca.pem \
16 |         -ca-key={{ ca_dir }}/ca-key.pem \
17 |         -config={{ ca_dir }}/ca-config.json \
18 |         -profile=kubernetes etcd-csr.json | {{ bin_dir }}/cfssljson -bare etcd"
19 | 
20 | - name: 创建etcd工作目录
21 |   file: name=/var/lib/etcd state=directory
22 | 
23 | - name: 创建etcd的systemd unit文件
24 |   template: src=etcd.service.j2 dest=/etc/systemd/system/etcd.service
25 | 
26 | - name: 开启etcd服务
27 |   shell: systemctl daemon-reload && systemctl enable etcd && systemctl restart etcd
28 | 


--------------------------------------------------------------------------------
/roles/etcd/templates/etcd-csr.json.j2:
--------------------------------------------------------------------------------
 1 | {
 2 |   "CN": "etcd",
 3 |   "hosts": [
 4 |     "127.0.0.1",
 5 |     "{{ NODE_IP }}"
 6 |   ],
 7 |   "key": {
 8 |     "algo": "rsa",
 9 |     "size": 2048
10 |   },
11 |   "names": [
12 |     {
13 |       "C": "CN",
14 |       "ST": "HangZhou",
15 |       "L": "XS",
16 |       "O": "k8s",
17 |       "OU": "System"
18 |     }
19 |   ]
20 | }
21 | 


--------------------------------------------------------------------------------
/roles/etcd/templates/etcd.service.j2:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Etcd Server
 3 | After=network.target
 4 | After=network-online.target
 5 | Wants=network-online.target
 6 | Documentation=https://github.com/coreos
 7 | 
 8 | [Service]
 9 | Type=notify
10 | WorkingDirectory=/var/lib/etcd/
11 | ExecStart={{ bin_dir }}/etcd \
12 |   --name={{ NODE_NAME }} \
13 |   --cert-file=/etc/etcd/ssl/etcd.pem \
14 |   --key-file=/etc/etcd/ssl/etcd-key.pem \
15 |   --peer-cert-file=/etc/etcd/ssl/etcd.pem \
16 |   --peer-key-file=/etc/etcd/ssl/etcd-key.pem \
17 |   --trusted-ca-file={{ ca_dir }}/ca.pem \
18 |   --peer-trusted-ca-file={{ ca_dir }}/ca.pem \
19 |   --initial-advertise-peer-urls=https://{{ NODE_IP }}:2380 \
20 |   --listen-peer-urls=https://{{ NODE_IP }}:2380 \
21 |   --listen-client-urls=https://{{ NODE_IP }}:2379,http://127.0.0.1:2379 \
22 |   --advertise-client-urls=https://{{ NODE_IP }}:2379 \
23 |   --initial-cluster-token=etcd-cluster-0 \
24 |   --initial-cluster={{ ETCD_NODES }} \
25 |   --initial-cluster-state=new \
26 |   --data-dir=/var/lib/etcd
27 | Restart=on-failure
28 | RestartSec=5
29 | LimitNOFILE=65536
30 | 
31 | [Install]
32 | WantedBy=multi-user.target
33 | 


--------------------------------------------------------------------------------
/roles/flannel/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: 创建flannel cni 相关目录
 2 |   file: name={{ item }} state=directory
 3 |   with_items:
 4 |   - /etc/cni/net.d
 5 |   - /root/local/kube-system/flannel
 6 | 
 7 | - name: 下载flannel cni plugins
 8 |   copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755
 9 |   with_items:
10 |   - bridge
11 |   - flannel
12 |   - host-local
13 |   - loopback
14 |   - portmap
15 | 
16 | - name: 准备 flannel DaemonSet yaml文件
17 |   template: src=kube-flannel.yaml.j2 dest=/root/local/kube-system/flannel/kube-flannel.yaml
18 | 
19 | # 只需单节点执行一次，重复执行的报错可以忽略
20 | - name: 运行 flannel网络
21 |   shell: "{{ bin_dir }}/kubectl create -f /root/local/kube-system/flannel/ && sleep 15"
22 |   when: NODE_ID is defined and NODE_ID == "node1"
23 |   ignore_errors: true
24 | 
25 | # 删除原有cni配置
26 | - name: 删除默认cni配置 
27 |   file: path=/etc/cni/net.d/10-default.conf state=absent
28 | 
29 | 


--------------------------------------------------------------------------------
/roles/flannel/templates/kube-flannel.yaml.j2:
--------------------------------------------------------------------------------
  1 | ---
  2 | kind: ClusterRole
  3 | apiVersion: rbac.authorization.k8s.io/v1
  4 | metadata:
  5 |   name: flannel
  6 | rules:
  7 |   - apiGroups:
  8 |       - ""
  9 |     resources:
 10 |       - pods
 11 |     verbs:
 12 |       - get
 13 |   - apiGroups:
 14 |       - ""
 15 |     resources:
 16 |       - nodes
 17 |     verbs:
 18 |       - list
 19 |       - watch
 20 |   - apiGroups:
 21 |       - ""
 22 |     resources:
 23 |       - nodes/status
 24 |     verbs:
 25 |       - patch
 26 | ---
 27 | kind: ClusterRoleBinding
 28 | apiVersion: rbac.authorization.k8s.io/v1
 29 | metadata:
 30 |   name: flannel
 31 | roleRef:
 32 |   apiGroup: rbac.authorization.k8s.io
 33 |   kind: ClusterRole
 34 |   name: flannel
 35 | subjects:
 36 | - kind: ServiceAccount
 37 |   name: flannel
 38 |   namespace: kube-system
 39 | ---
 40 | apiVersion: v1
 41 | kind: ServiceAccount
 42 | metadata:
 43 |   name: flannel
 44 |   namespace: kube-system
 45 | ---
 46 | kind: ConfigMap
 47 | apiVersion: v1
 48 | metadata:
 49 |   name: kube-flannel-cfg
 50 |   namespace: kube-system
 51 |   labels:
 52 |     tier: node
 53 |     app: flannel
 54 | data:
 55 |   cni-conf.json: |
 56 |     {
 57 |       "name": "cbr0",
 58 |       "plugins": [
 59 |         {
 60 |           "type": "flannel",
 61 |           "delegate": {
 62 |             "hairpinMode": true,
 63 |             "isDefaultGateway": true
 64 |           }
 65 |         },
 66 |         {
 67 |           "type": "portmap",
 68 |           "capabilities": {
 69 |             "portMappings": true
 70 |           }
 71 |         }
 72 |       ]
 73 |     }
 74 |   net-conf.json: |
 75 |     {
 76 |       "Network": "{{ CLUSTER_CIDR }}",
 77 |       "Backend": {
 78 |         "Type": "{{ FLANNEL_BACKEND }}"
 79 |       }
 80 |     }
 81 | ---
 82 | apiVersion: extensions/v1beta1
 83 | kind: DaemonSet
 84 | metadata:
 85 |   name: kube-flannel-ds
 86 |   namespace: kube-system
 87 |   labels:
 88 |     tier: node
 89 |     app: flannel
 90 | spec:
 91 |   template:
 92 |     metadata:
 93 |       labels:
 94 |         tier: node
 95 |         app: flannel
 96 |     spec:
 97 |       hostNetwork: true
 98 |       nodeSelector:
 99 |         beta.kubernetes.io/arch: amd64
100 |       tolerations:
101 |       - key: node-role.kubernetes.io/master
102 |         operator: Exists
103 |         effect: NoSchedule
104 |       serviceAccountName: flannel
105 |       initContainers:
106 |       - name: install-cni
107 |         image: jmgao1983/flannel:v0.9.1-amd64
108 |         #image: quay.io/coreos/flannel:v0.9.1-amd64
109 |         command:
110 |         - cp
111 |         args:
112 |         - -f
113 |         - /etc/kube-flannel/cni-conf.json
114 |         - /etc/cni/net.d/10-flannel.conflist
115 |         volumeMounts:
116 |         - name: cni
117 |           mountPath: /etc/cni/net.d
118 |         - name: flannel-cfg
119 |           mountPath: /etc/kube-flannel/
120 |       containers:
121 |       - name: kube-flannel
122 |         #image: quay.io/coreos/flannel:v0.9.1-amd64
123 |         image: jmgao1983/flannel:v0.9.1-amd64
124 |         command:
125 |         - /opt/bin/flanneld
126 |         args:
127 |         - --ip-masq
128 |         - --kube-subnet-mgr
129 |         resources:
130 |           requests:
131 |             cpu: "100m"
132 |             memory: "50Mi"
133 |           limits:
134 |             cpu: "100m"
135 |             memory: "50Mi"
136 |         securityContext:
137 |           privileged: true
138 |         env:
139 |         - name: POD_NAME
140 |           valueFrom:
141 |             fieldRef:
142 |               fieldPath: metadata.name
143 |         - name: POD_NAMESPACE
144 |           valueFrom:
145 |             fieldRef:
146 |               fieldPath: metadata.namespace
147 |         volumeMounts:
148 |         - name: run
149 |           mountPath: /run
150 |         - name: flannel-cfg
151 |           mountPath: /etc/kube-flannel/
152 |       volumes:
153 |         - name: run
154 |           hostPath:
155 |             path: /run
156 |         - name: cni
157 |           hostPath:
158 |             path: /etc/cni/net.d
159 |         - name: flannel-cfg
160 |           configMap:
161 |             name: kube-flannel-cfg
162 | 


--------------------------------------------------------------------------------
/roles/harbor/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: 下载docker compose 二进制文件
 2 |   copy: src={{ base_dir }}/bin/docker-compose dest={{ bin_dir }}/docker-compose mode=0755
 3 | 
 4 | # 注册变量result，根据result结果判断是否已经安装过harbor
 5 | # result|failed 说明没有安装过harbor，下一步进行安装
 6 | # result|succeeded 说明已经安装过harbor，下一步跳过安装
 7 | - name: 注册变量result
 8 |   command: ls /data/registry
 9 |   register: result
10 |   ignore_errors: True
11 | 
12 | - name: 解压harbor离线安装包
13 |   unarchive: 
14 |     src: "{{ base_dir }}/down/harbor-offline-installer-v1.2.2.tgz"
15 |     dest: /root/local
16 |     copy: yes
17 |     keep_newer: yes
18 |   when: result|failed
19 | 
20 | - name: 导入harbor所需 docker images
21 |   shell: "{{ bin_dir }}/docker load -i /root/local/harbor/harbor.v1.2.2.tar.gz"
22 |   when: result|failed
23 | 
24 | - name: 创建harbor证书请求
25 |   template: src=harbor-csr.json.j2 dest={{ ca_dir }}/harbor-csr.json
26 |   when: result|failed
27 | 
28 | - name: 创建harbor证书和私钥
29 |   shell: "cd {{ ca_dir }} && {{ bin_dir }}/cfssl gencert \
30 |         -ca={{ ca_dir }}/ca.pem \
31 |         -ca-key={{ ca_dir }}/ca-key.pem \
32 |         -config={{ ca_dir }}/ca-config.json \
33 |         -profile=kubernetes harbor-csr.json | {{ bin_dir }}/cfssljson -bare harbor"
34 |   when: result|failed
35 | 
36 | - name: 配置 harbor.cfg 文件
37 |   template: src=harbor.cfg.j2 dest=/root/local/harbor/harbor.cfg
38 |   when: result|failed
39 | 
40 | - name: 安装 harbor
41 |   shell: "cd /root/local/harbor && \
42 | 	export PATH={{ bin_dir }}:$PATH && \
43 | 	 ./install.sh"
44 |   when: result|failed
45 | 


--------------------------------------------------------------------------------
/roles/harbor/templates/harbor-csr.json.j2:
--------------------------------------------------------------------------------
 1 | {
 2 |   "CN": "harbor",
 3 |   "hosts": [
 4 |     "127.0.0.1",
 5 |     "{{ NODE_IP }}",
 6 |     "{{ HARBOR_DOMAIN }}"
 7 |   ],
 8 |   "key": {
 9 |     "algo": "rsa",
10 |     "size": 2048
11 |   },
12 |   "names": [
13 |     {
14 |       "C": "CN",
15 |       "ST": "HangZhou",
16 |       "L": "XS",
17 |       "O": "k8s",
18 |       "OU": "System"
19 |     }
20 |   ]
21 | }
22 | 


--------------------------------------------------------------------------------
/roles/harbor/templates/harbor.cfg.j2:
--------------------------------------------------------------------------------
  1 | ## Configuration file of Harbor
  2 | 
  3 | #The IP address or hostname to access admin UI and registry service.
  4 | #DO NOT use localhost or 127.0.0.1, because Harbor needs to be accessed by external clients.
  5 | hostname = {{ NODE_IP }}
  6 | 
  7 | #The protocol for accessing the UI and token/notification service, by default it is http.
  8 | #It can be set to https if ssl is enabled on nginx.
  9 | ui_url_protocol = https
 10 | 
 11 | #The password for the root user of mysql db, change this before any production use.
 12 | db_password = Harbor12345
 13 | 
 14 | #Maximum number of job workers in job service  
 15 | max_job_workers = 3 
 16 | 
 17 | #Determine whether or not to generate certificate for the registry's token.
 18 | #If the value is on, the prepare script creates new root cert and private key 
 19 | #for generating token to access the registry. If the value is off the default key/cert will be used.
 20 | #This flag also controls the creation of the notary signer's cert.
 21 | customize_crt = on
 22 | 
 23 | #The path of cert and key files for nginx, they are applied only the protocol is set to https
 24 | ssl_cert = {{ ca_dir }}/harbor.pem
 25 | ssl_cert_key = {{ ca_dir }}/harbor-key.pem
 26 | 
 27 | #The path of secretkey storage
 28 | secretkey_path = /data
 29 | 
 30 | #Admiral's url, comment this attribute, or set its value to NA when Harbor is standalone
 31 | admiral_url = NA
 32 | 
 33 | #The password of the Clair's postgres database, only effective when Harbor is deployed with Clair.
 34 | #Please update it before deployment, subsequent update will cause Clair's API server and Harbor unable to access Clair's database.
 35 | clair_db_password = password
 36 | 
 37 | #NOTES: The properties between BEGIN INITIAL PROPERTIES and END INITIAL PROPERTIES
 38 | #only take effect in the first boot, the subsequent changes of these properties 
 39 | #should be performed on web ui
 40 | 
 41 | #************************BEGIN INITIAL PROPERTIES************************
 42 | 
 43 | #Email account settings for sending out password resetting emails.
 44 | 
 45 | #Email server uses the given username and password to authenticate on TLS connections to host and act as identity.
 46 | #Identity left blank to act as username.
 47 | email_identity = 
 48 | 
 49 | email_server = smtp.mydomain.com
 50 | email_server_port = 25
 51 | email_username = sample_admin@mydomain.com
 52 | email_password = abc
 53 | email_from = admin <sample_admin@mydomain.com>
 54 | email_ssl = false
 55 | 
 56 | ##The initial password of Harbor admin, only works for the first time when Harbor starts. 
 57 | #It has no effect after the first launch of Harbor.
 58 | #Change the admin password from UI after launching Harbor.
 59 | harbor_admin_password = Harbor12345
 60 | 
 61 | ##By default the auth mode is db_auth, i.e. the credentials are stored in a local database.
 62 | #Set it to ldap_auth if you want to verify a user's credentials against an LDAP server.
 63 | auth_mode = db_auth
 64 | 
 65 | #The url for an ldap endpoint.
 66 | ldap_url = ldaps://ldap.mydomain.com
 67 | 
 68 | #A user's DN who has the permission to search the LDAP/AD server. 
 69 | #If your LDAP/AD server does not support anonymous search, you should configure this DN and ldap_search_pwd.
 70 | #ldap_searchdn = uid=searchuser,ou=people,dc=mydomain,dc=com
 71 | 
 72 | #the password of the ldap_searchdn
 73 | #ldap_search_pwd = password
 74 | 
 75 | #The base DN from which to look up a user in LDAP/AD
 76 | ldap_basedn = ou=people,dc=mydomain,dc=com
 77 | 
 78 | #Search filter for LDAP/AD, make sure the syntax of the filter is correct.
 79 | #ldap_filter = (objectClass=person)
 80 | 
 81 | # The attribute used in a search to match a user, it could be uid, cn, email, sAMAccountName or other attributes depending on your LDAP/AD  
 82 | ldap_uid = uid 
 83 | 
 84 | #the scope to search for users, 1-LDAP_SCOPE_BASE, 2-LDAP_SCOPE_ONELEVEL, 3-LDAP_SCOPE_SUBTREE
 85 | ldap_scope = 3 
 86 | 
 87 | #Timeout (in seconds)  when connecting to an LDAP Server. The default value (and most reasonable) is 5 seconds.
 88 | ldap_timeout = 5
 89 | 
 90 | #Turn on or off the self-registration feature
 91 | self_registration = on
 92 | 
 93 | #The expiration time (in minute) of token created by token service, default is 30 minutes
 94 | token_expiration = 30
 95 | 
 96 | #The flag to control what users have permission to create projects
 97 | #The default value "everyone" allows everyone to creates a project. 
 98 | #Set to "adminonly" so that only admin user can create project.
 99 | project_creation_restriction = everyone
100 | 
101 | #Determine whether the job service should verify the ssl cert when it connects to a remote registry.
102 | #Set this flag to off when the remote registry uses a self-signed or untrusted certificate.
103 | verify_remote_cert = on
104 | #************************END INITIAL PROPERTIES************************
105 | #############
106 | 
107 | 


--------------------------------------------------------------------------------
/roles/kube-master/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: 下载 kube-master 二进制
 2 |   copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755
 3 |   with_items:
 4 |   - kube-apiserver
 5 |   - kube-controller-manager
 6 |   - kube-scheduler
 7 |   - kubectl
 8 |   - kube-proxy
 9 |   - kubelet
10 | 
11 | - name: 创建 kubernetes 证书签名请求
12 |   template: src=kubernetes-csr.json.j2 dest={{ ca_dir }}/kubernetes-csr.json
13 | 
14 | - name: 创建 kubernetes 证书和私钥
15 |   shell: "cd {{ ca_dir }} && {{ bin_dir }}/cfssl gencert \
16 |         -ca={{ ca_dir }}/ca.pem \
17 |         -ca-key={{ ca_dir }}/ca-key.pem \
18 |         -config={{ ca_dir }}/ca-config.json \
19 |         -profile=kubernetes kubernetes-csr.json | {{ bin_dir }}/cfssljson -bare kubernetes"
20 | 
21 | - name: 创建 token.csv
22 |   template: src=token.csv.j2 dest={{ ca_dir }}/token.csv
23 | 
24 | - name: 创建 basic-auth.csv
25 |   template: src=basic-auth.csv.j2 dest={{ ca_dir }}/basic-auth.csv
26 | 
27 | - name: 创建kube-apiserver的systemd unit文件
28 |   template: src=kube-apiserver.service.j2 dest=/etc/systemd/system/kube-apiserver.service
29 | 
30 | - name: 创建kube-controller-manager的systemd unit文件
31 |   template: src=kube-controller-manager.service.j2 dest=/etc/systemd/system/kube-controller-manager.service
32 | 
33 | - name: 创建kube-scheduler的systemd unit文件
34 |   template: src=kube-scheduler.service.j2 dest=/etc/systemd/system/kube-scheduler.service
35 | 
36 | - name: daemon-reload
37 |   shell: systemctl daemon-reload
38 | 
39 | - name: enable-kube-apiserver
40 |   shell: systemctl enable kube-apiserver
41 | 
42 | - name: enable-kube-controller-manager
43 |   shell: systemctl enable kube-controller-manager
44 | 
45 | - name: enable-kube-scheduler
46 |   shell: systemctl enable kube-scheduler
47 | 
48 | - name: start-kube-apiserver
49 |   shell: systemctl restart kube-apiserver
50 | 
51 | - name: start-kube-controller-manager
52 |   shell: systemctl restart kube-controller-manager
53 | 
54 | - name: start-kube-scheduler
55 |   shell: systemctl restart kube-scheduler
56 | 


--------------------------------------------------------------------------------
/roles/kube-master/templates/basic-auth.csv.j2:
--------------------------------------------------------------------------------
1 | {{ BASIC_AUTH_PASS }},{{ BASIC_AUTH_USER }},1
2 | readonly,readonly,2
3 | 


--------------------------------------------------------------------------------
/roles/kube-master/templates/kube-apiserver.service.j2:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Kubernetes API Server
 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes
 4 | After=network.target
 5 | 
 6 | [Service]
 7 | ExecStart={{ bin_dir }}/kube-apiserver \
 8 |   --admission-control=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,ResourceQuota,NodeRestriction \
 9 |   --bind-address={{ NODE_IP }} \
10 |   --insecure-bind-address=127.0.0.1 \
11 |   --authorization-mode=Node,RBAC \
12 |   --kubelet-https=true \
13 |   --anonymous-auth=false \
14 |   --basic-auth-file={{ ca_dir }}/basic-auth.csv \
15 |   --enable-bootstrap-token-auth \
16 |   --token-auth-file={{ ca_dir }}/token.csv \
17 |   --service-cluster-ip-range={{ SERVICE_CIDR }} \
18 |   --service-node-port-range={{ NODE_PORT_RANGE }} \
19 |   --tls-cert-file={{ ca_dir }}/kubernetes.pem \
20 |   --tls-private-key-file={{ ca_dir }}/kubernetes-key.pem \
21 |   --client-ca-file={{ ca_dir }}/ca.pem \
22 |   --service-account-key-file={{ ca_dir }}/ca-key.pem \
23 |   --etcd-cafile={{ ca_dir }}/ca.pem \
24 |   --etcd-certfile={{ ca_dir }}/kubernetes.pem \
25 |   --etcd-keyfile={{ ca_dir }}/kubernetes-key.pem \
26 |   --etcd-servers={{ ETCD_ENDPOINTS }} \
27 |   --enable-swagger-ui=true \
28 |   --allow-privileged=true \
29 |   --audit-log-maxage=30 \
30 |   --audit-log-maxbackup=3 \
31 |   --audit-log-maxsize=100 \
32 |   --audit-log-path=/var/lib/audit.log \
33 |   --event-ttl=1h \
34 |   --v=2
35 | Restart=on-failure
36 | RestartSec=5
37 | Type=notify
38 | LimitNOFILE=65536
39 | 
40 | [Install]
41 | WantedBy=multi-user.target
42 | 


--------------------------------------------------------------------------------
/roles/kube-master/templates/kube-controller-manager.service.j2:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Kubernetes Controller Manager
 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes
 4 | 
 5 | [Service]
 6 | ExecStart={{ bin_dir }}/kube-controller-manager \
 7 |   --address=127.0.0.1 \
 8 |   --master=http://127.0.0.1:8080 \
 9 |   --allocate-node-cidrs=true \
10 |   --service-cluster-ip-range={{ SERVICE_CIDR }} \
11 |   --cluster-cidr={{ CLUSTER_CIDR }} \
12 |   --cluster-name=kubernetes \
13 |   --cluster-signing-cert-file={{ ca_dir }}/ca.pem \
14 |   --cluster-signing-key-file={{ ca_dir }}/ca-key.pem \
15 |   --service-account-private-key-file={{ ca_dir }}/ca-key.pem \
16 |   --root-ca-file={{ ca_dir }}/ca.pem \
17 |   --horizontal-pod-autoscaler-use-rest-clients=false \
18 |   --leader-elect=true \
19 |   --v=2
20 | Restart=on-failure
21 | RestartSec=5
22 | 
23 | [Install]
24 | WantedBy=multi-user.target
25 | 


--------------------------------------------------------------------------------
/roles/kube-master/templates/kube-scheduler.service.j2:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Kubernetes Scheduler
 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes
 4 | 
 5 | [Service]
 6 | ExecStart={{ bin_dir }}/kube-scheduler \
 7 |   --address=127.0.0.1 \
 8 |   --master=http://127.0.0.1:8080 \
 9 |   --leader-elect=true \
10 |   --v=2
11 | Restart=on-failure
12 | RestartSec=5
13 | 
14 | [Install]
15 | WantedBy=multi-user.target
16 | 


--------------------------------------------------------------------------------
/roles/kube-master/templates/kubernetes-csr.json.j2:
--------------------------------------------------------------------------------
 1 | {
 2 |   "CN": "kubernetes",
 3 |   "hosts": [
 4 |     "127.0.0.1",
 5 |     "{{ MASTER_IP }}",
 6 |     "{{ NODE_IP }}",
 7 |     "{{ CLUSTER_KUBERNETES_SVC_IP }}",
 8 |     "kubernetes",
 9 |     "kubernetes.default",
10 |     "kubernetes.default.svc",
11 |     "kubernetes.default.svc.cluster",
12 |     "kubernetes.default.svc.cluster.local"
13 |   ],
14 |   "key": {
15 |     "algo": "rsa",
16 |     "size": 2048
17 |   },
18 |   "names": [
19 |     {
20 |       "C": "CN",
21 |       "ST": "HangZhou",
22 |       "L": "XS",
23 |       "O": "k8s",
24 |       "OU": "System"
25 |     }
26 |   ]
27 | }
28 | 


--------------------------------------------------------------------------------
/roles/kube-master/templates/token.csv.j2:
--------------------------------------------------------------------------------
1 | {{ BOOTSTRAP_TOKEN }},kubelet-bootstrap,10001,"system:kubelet-bootstrap"
2 | 


--------------------------------------------------------------------------------
/roles/kube-node/tasks/main.yml:
--------------------------------------------------------------------------------
  1 | # 创建kubelet,kube-proxy工作目录和cni配置目录
  2 | - name: 创建kube-node 相关目录
  3 |   file: name={{ item }} state=directory
  4 |   with_items:
  5 |   - /var/lib/kubelet
  6 |   - /var/lib/kube-proxy
  7 |   - /etc/cni/net.d
  8 | 
  9 | - name: 下载 kubelet,kube-proxy 二进制和基础 cni plugins
 10 |   copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755
 11 |   with_items:
 12 |   - kubelet
 13 |   - kube-proxy
 14 |   - bridge
 15 |   - host-local
 16 |   - loopback
 17 |   
 18 | ##----------kubelet 配置部分--------------
 19 | # kubelet 启动时向 kube-apiserver 发送 TLS bootstrapping 请求，需要绑定该角色
 20 | # 只需单节点执行一次，重复执行的报错可以忽略
 21 | # 增加15s等待kube-apiserver正常工作
 22 | - name: kubelet-bootstrap-setting
 23 |   shell: "sleep 15 && {{ bin_dir }}/kubectl create clusterrolebinding kubelet-bootstrap \
 24 |         --clusterrole=system:node-bootstrapper --user=kubelet-bootstrap"
 25 |   when: NODE_ID is defined and NODE_ID == "node1"
 26 |   ignore_errors: true
 27 | 
 28 | #创建bootstrap.kubeconfig配置文件
 29 | - name: 设置集群参数
 30 |   shell: "{{ bin_dir }}/kubectl config set-cluster kubernetes \
 31 |         --certificate-authority={{ ca_dir }}/ca.pem \
 32 |         --embed-certs=true \
 33 |         --server={{ KUBE_APISERVER }} \
 34 |         --kubeconfig=bootstrap.kubeconfig"
 35 | - name: 设置客户端认证参数
 36 |   shell: "{{ bin_dir }}/kubectl config set-credentials kubelet-bootstrap \
 37 |         --token={{ BOOTSTRAP_TOKEN }} \
 38 |         --kubeconfig=bootstrap.kubeconfig"
 39 | - name: 设置上下文参数
 40 |   shell: "{{ bin_dir }}/kubectl config set-context default \
 41 | 	--cluster=kubernetes \
 42 | 	--user=kubelet-bootstrap \
 43 | 	--kubeconfig=bootstrap.kubeconfig"
 44 | - name: 选择默认上下文
 45 |   shell: "{{ bin_dir }}/kubectl config use-context default --kubeconfig=bootstrap.kubeconfig"
 46 | 
 47 | - name: 安装bootstrap.kubeconfig配置文件
 48 |   shell: "mv $HOME/bootstrap.kubeconfig /etc/kubernetes/bootstrap.kubeconfig"
 49 | 
 50 | - name: 准备 cni配置文件
 51 |   template: src=cni-default.conf.j2 dest=/etc/cni/net.d/10-default.conf
 52 | 
 53 | - name: 创建kubelet的systemd unit文件
 54 |   template: src=kubelet.service.j2 dest=/etc/systemd/system/kubelet.service
 55 |   tags: kubelet
 56 | 
 57 | - name: 开启kubelet 服务
 58 |   shell: systemctl daemon-reload && systemctl enable kubelet && systemctl restart kubelet
 59 |   tags: kubelet 
 60 | 
 61 | - name: approve-kubelet-csr
 62 |   shell: "sleep 15 && {{ bin_dir }}/kubectl get csr|grep 'Pending' | awk 'NR>0{print $1}'| xargs {{ bin_dir }}/kubectl certificate approve"
 63 |   when: NODE_ID is defined and NODE_ID == "node1"
 64 |   ignore_errors: true
 65 | 
 66 | ##-------kube-proxy部分----------------
 67 | - name: 准备kube-proxy 证书签名请求
 68 |   template: src=kube-proxy-csr.json.j2 dest={{ ca_dir }}/kube-proxy-csr.json
 69 | 
 70 | - name: 创建 kube-proxy证书与私钥
 71 |   shell: "cd {{ ca_dir }} && {{ bin_dir }}/cfssl gencert \
 72 |         -ca={{ ca_dir }}/ca.pem \
 73 |         -ca-key={{ ca_dir }}/ca-key.pem \
 74 |         -config={{ ca_dir }}/ca-config.json \
 75 |         -profile=kubernetes kube-proxy-csr.json | {{ bin_dir }}/cfssljson -bare kube-proxy"
 76 | 
 77 | #创建kube-proxy.kubeconfig配置文件
 78 | - name: 设置集群参数
 79 |   shell: "{{ bin_dir }}/kubectl config set-cluster kubernetes \
 80 |         --certificate-authority={{ ca_dir }}/ca.pem \
 81 |         --embed-certs=true \
 82 |         --server={{ KUBE_APISERVER }} \
 83 |         --kubeconfig=kube-proxy.kubeconfig"
 84 | - name: 设置客户端认证参数
 85 |   shell: "{{ bin_dir }}/kubectl config set-credentials kube-proxy \
 86 | 	--client-certificate={{ ca_dir }}/kube-proxy.pem \
 87 | 	--client-key={{ ca_dir }}/kube-proxy-key.pem \
 88 |         --embed-certs=true \
 89 |         --kubeconfig=kube-proxy.kubeconfig"
 90 | - name: 设置上下文参数
 91 |   shell: "{{ bin_dir }}/kubectl config set-context default \
 92 |         --cluster=kubernetes \
 93 |         --user=kube-proxy \
 94 |         --kubeconfig=kube-proxy.kubeconfig"
 95 | - name: 选择默认上下文
 96 |   shell: "{{ bin_dir }}/kubectl config use-context default --kubeconfig=kube-proxy.kubeconfig"
 97 | 
 98 | - name: 安装kube-proxy.kubeconfig配置文件
 99 |   shell: "mv $HOME/kube-proxy.kubeconfig /etc/kubernetes/kube-proxy.kubeconfig"
100 | 
101 | - name: 创建kube-proxy 服务文件
102 |   tags: reload-kube-proxy
103 |   template: src=kube-proxy.service.j2 dest=/etc/systemd/system/kube-proxy.service
104 | 
105 | - name: 开启kube-proxy 服务
106 |   tags: reload-kube-proxy
107 |   shell: systemctl daemon-reload && systemctl enable kube-proxy && systemctl restart kube-proxy
108 | 
109 | 


--------------------------------------------------------------------------------
/roles/kube-node/templates/cni-default.conf.j2:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name": "mynet",
 3 | 	"type": "bridge",
 4 | 	"bridge": "mynet0",
 5 | 	"isDefaultGateway": true,
 6 | 	"ipMasq": true,
 7 | 	"hairpinMode": true,
 8 | 	"ipam": {
 9 | 		"type": "host-local",
10 | 		"subnet": "{{ CLUSTER_CIDR }}"
11 | 	}
12 | }
13 | 


--------------------------------------------------------------------------------
/roles/kube-node/templates/kube-proxy-csr.json.j2:
--------------------------------------------------------------------------------
 1 | {
 2 |   "CN": "system:kube-proxy",
 3 |   "hosts": [],
 4 |   "key": {
 5 |     "algo": "rsa",
 6 |     "size": 2048
 7 |   },
 8 |   "names": [
 9 |     {
10 |       "C": "CN",
11 |       "ST": "HangZhou",
12 |       "L": "XS",
13 |       "O": "k8s",
14 |       "OU": "System"
15 |     }
16 |   ]
17 | }
18 | 


--------------------------------------------------------------------------------
/roles/kube-node/templates/kube-proxy.service.j2:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Kubernetes Kube-Proxy Server
 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes
 4 | After=network.target
 5 | 
 6 | [Service]
 7 | # kube-proxy 根据 --cluster-cidr 判断集群内部和外部流量，指定 --cluster-cidr 或 --masquerade-all 选项后
 8 | # kube-proxy 会对访问 Service IP 的请求做 SNAT，这个特性与calico 实现 network policy冲突，因此禁用
 9 | WorkingDirectory=/var/lib/kube-proxy
10 | ExecStart={{ bin_dir }}/kube-proxy \
11 |   --bind-address={{ NODE_IP }} \
12 |   --hostname-override={{ NODE_IP }} \
13 |   --kubeconfig=/etc/kubernetes/kube-proxy.kubeconfig \
14 |   --logtostderr=true \
15 |   --v=2
16 | Restart=on-failure
17 | RestartSec=5
18 | LimitNOFILE=65536
19 | 
20 | [Install]
21 | WantedBy=multi-user.target
22 | 


--------------------------------------------------------------------------------
/roles/kube-node/templates/kubelet.service.j2:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Kubernetes Kubelet
 3 | Documentation=https://github.com/GoogleCloudPlatform/kubernetes
 4 | After=docker.service
 5 | Requires=docker.service
 6 | 
 7 | [Service]
 8 | WorkingDirectory=/var/lib/kubelet
 9 | #--pod-infra-container-image=registry.access.redhat.com/rhel7/pod-infrastructure:latest
10 | ExecStart={{ bin_dir }}/kubelet \
11 |   --address={{ NODE_IP }} \
12 |   --hostname-override={{ NODE_IP }} \
13 |   --pod-infra-container-image={{ POD_INFRA_CONTAINER_IMAGE }} \
14 |   --experimental-bootstrap-kubeconfig=/etc/kubernetes/bootstrap.kubeconfig \
15 |   --kubeconfig=/etc/kubernetes/kubelet.kubeconfig \
16 |   --cert-dir={{ ca_dir }} \
17 |   --network-plugin=cni \
18 |   --cni-conf-dir=/etc/cni/net.d \
19 |   --cni-bin-dir={{ bin_dir }} \
20 |   --cluster-dns={{ CLUSTER_DNS_SVC_IP }} \
21 |   --cluster-domain={{ CLUSTER_DNS_DOMAIN }} \
22 |   --hairpin-mode hairpin-veth \
23 |   --allow-privileged=true \
24 |   --fail-swap-on=false \
25 |   --logtostderr=true \
26 |   --v=2
27 | #kubelet cAdvisor 默认在所有接口监听 4194 端口的请求, 以下iptables限制内网访问
28 | ExecStartPost=/sbin/iptables -A INPUT -s 10.0.0.0/8 -p tcp --dport 4194 -j ACCEPT
29 | ExecStartPost=/sbin/iptables -A INPUT -s 172.16.0.0/12 -p tcp --dport 4194 -j ACCEPT
30 | ExecStartPost=/sbin/iptables -A INPUT -s 192.168.0.0/16 -p tcp --dport 4194 -j ACCEPT
31 | ExecStartPost=/sbin/iptables -A INPUT -p tcp --dport 4194 -j DROP
32 | Restart=on-failure
33 | RestartSec=5
34 | 
35 | [Install]
36 | WantedBy=multi-user.target
37 | 


--------------------------------------------------------------------------------
/roles/kubectl/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: 下载kubectl二进制
 2 |   copy: src={{ base_dir }}/bin/kubectl dest={{ bin_dir }}/kubectl mode=0755
 3 | 
 4 | - name: 准备kubectl使用的admin 证书签名请求
 5 |   template: src=admin-csr.json.j2 dest={{ ca_dir }}/admin-csr.json
 6 | 
 7 | - name: 创建 admin证书与私钥
 8 |   shell: "cd {{ ca_dir }} && {{ bin_dir }}/cfssl gencert \
 9 |         -ca={{ ca_dir }}/ca.pem \
10 |         -ca-key={{ ca_dir }}/ca-key.pem \
11 |         -config={{ ca_dir }}/ca-config.json \
12 |         -profile=kubernetes admin-csr.json | {{ bin_dir }}/cfssljson -bare admin"
13 | 
14 | # 创建kubectl kubeconfig 文件
15 | - name: 设置集群参数
16 |   shell: "{{ bin_dir }}/kubectl config set-cluster kubernetes \
17 | 	--certificate-authority={{ ca_dir }}/ca.pem \
18 | 	--embed-certs=true \
19 | 	--server={{ KUBE_APISERVER }}"
20 | - name: 设置客户端认证参数
21 |   shell: "{{ bin_dir }}/kubectl config set-credentials admin \
22 | 	--client-certificate={{ ca_dir }}/admin.pem \
23 | 	--embed-certs=true \
24 | 	--client-key={{ ca_dir }}/admin-key.pem"
25 | - name: 设置上下文参数
26 |   shell: "{{ bin_dir }}/kubectl config set-context kubernetes \
27 | 	--cluster=kubernetes --user=admin"
28 | - name: 选择默认上下文
29 |   shell: "{{ bin_dir }}/kubectl config use-context kubernetes"
30 | 


--------------------------------------------------------------------------------
/roles/kubectl/templates/admin-csr.json.j2:
--------------------------------------------------------------------------------
 1 | {
 2 |   "CN": "admin",
 3 |   "hosts": [],
 4 |   "key": {
 5 |     "algo": "rsa",
 6 |     "size": 2048
 7 |   },
 8 |   "names": [
 9 |     {
10 |       "C": "CN",
11 |       "ST": "HangZhou",
12 |       "L": "XS",
13 |       "O": "system:masters",
14 |       "OU": "System"
15 |     }
16 |   ]
17 | }
18 | 


--------------------------------------------------------------------------------
/roles/lb/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | # 先拉取下节点的ansible setup信息，起到缓存效果，否则后续when 判断可能失败
 2 | - name: 缓存ansilbe setup信息
 3 |   setup: gather_subset=min
 4 |  
 5 | - name: apt更新缓存刷新
 6 |   apt: update_cache=yes cache_valid_time=72000
 7 |   when: ansible_distribution == "Ubuntu" and ansible_distribution_major_version == "16"
 8 | 
 9 | - name: apt安装 haproxy
10 |   apt: name=haproxy state=latest
11 |   when: ansible_distribution == "Ubuntu" and ansible_distribution_major_version == "16"
12 | 
13 | - name: yum安装 haproxy
14 |   yum: name=haproxy state=latest
15 |   when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7"
16 | 
17 | - name: 创建haproxy配置目录
18 |   file: name=/etc/haproxy state=directory
19 | 
20 | - name: 修改centos的haproxy.service
21 |   template: src=haproxy.service.j2 dest=/usr/lib/systemd/system/haproxy.service
22 |   when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7"
23 | 
24 | - name: 配置 haproxy
25 |   template: src=haproxy.cfg.j2 dest=/etc/haproxy/haproxy.cfg
26 | 
27 | - name: apt安装 keepalived
28 |   apt: name=keepalived state=latest
29 |   when: ansible_distribution == "Ubuntu" and ansible_distribution_major_version == "16"
30 | 
31 | - name: yum安装 keepalived
32 |   yum: name=keepalived state=latest
33 |   when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7"
34 | 
35 | # CentOS 需要安装psmisc 才能使用命令killall，它在keepalive的监测脚本中使用到
36 | - name: yum安装 psmisc
37 |   yum: name=psmisc state=latest
38 |   when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7"
39 | 
40 | - name: 创建keepalived配置目录
41 |   file: name=/etc/keepalived state=directory
42 | 
43 | - name: 配置 keepalived 主节点
44 |   template: src=keepalived-master.conf.j2 dest=/etc/keepalived/keepalived.conf
45 |   when: LB_ROLE == "master"
46 | 
47 | - name: 配置 keepalived 备节点
48 |   template: src=keepalived-backup.conf.j2 dest=/etc/keepalived/keepalived.conf
49 |   when: LB_ROLE == "backup"
50 | 
51 | - name: daemon-reload for haproxy.service
52 |   shell: systemctl daemon-reload
53 | 
54 | - name: 重启haproxy服务
55 |   shell: systemctl enable haproxy && systemctl restart haproxy
56 | 
57 | - name: 重启keepalived服务
58 |   shell: systemctl enable keepalived && systemctl restart keepalived
59 | 


--------------------------------------------------------------------------------
/roles/lb/templates/haproxy.cfg.j2:
--------------------------------------------------------------------------------
 1 | global
 2 |         log /dev/log    local0
 3 |         log /dev/log    local1 notice
 4 |         chroot /var/lib/haproxy
 5 |         stats socket /run/haproxy/admin.sock mode 660 level admin
 6 |         stats timeout 30s
 7 |         user haproxy
 8 |         group haproxy
 9 |         daemon
10 |         nbproc 1
11 | 
12 | defaults
13 |         log     global
14 |         timeout connect 5000
15 |         timeout client  50000
16 |         timeout server  50000
17 | 
18 | listen kube-master
19 |         bind 0.0.0.0:{{ MASTER_PORT }}
20 |         mode tcp
21 |         option tcplog
22 |         balance source
23 |         server s1 {{ LB_EP1 }}  check inter 10000 fall 2 rise 2 weight 1
24 |         server s2 {{ LB_EP2 }}  check inter 10000 fall 2 rise 2 weight 1
25 | 


--------------------------------------------------------------------------------
/roles/lb/templates/haproxy.service.j2:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=HAProxy Load Balancer
 3 | After=syslog.target network.target
 4 | 
 5 | [Service]
 6 | EnvironmentFile=/etc/sysconfig/haproxy
 7 | ExecStartPre=/usr/bin/mkdir -p /run/haproxy
 8 | ExecStart=/usr/sbin/haproxy-systemd-wrapper -f /etc/haproxy/haproxy.cfg -p /run/haproxy.pid $OPTIONS
 9 | ExecReload=/bin/kill -USR2 $MAINPID
10 | KillMode=mixed
11 | 
12 | [Install]
13 | WantedBy=multi-user.target
14 | 


--------------------------------------------------------------------------------
/roles/lb/templates/keepalived-backup.conf.j2:
--------------------------------------------------------------------------------
 1 | global_defs {
 2 |     router_id lb-backup
 3 | }
 4 | 
 5 | vrrp_instance VI-kube-master {
 6 |     state BACKUP
 7 |     priority 110
 8 |     dont_track_primary
 9 |     interface {{ LB_IF }}
10 |     virtual_router_id 51
11 |     advert_int 3
12 |     virtual_ipaddress {
13 |         {{ MASTER_IP }}
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/roles/lb/templates/keepalived-master.conf.j2:
--------------------------------------------------------------------------------
 1 | global_defs {
 2 |     router_id lb-master
 3 | }
 4 | 
 5 | vrrp_script check-haproxy {
 6 |     script "killall -0 haproxy"
 7 |     interval 5
 8 |     weight -30
 9 | }
10 | 
11 | vrrp_instance VI-kube-master {
12 |     state MASTER
13 |     priority 120
14 |     dont_track_primary
15 |     interface {{ LB_IF }}
16 |     virtual_router_id 51
17 |     advert_int 3
18 |     track_script {
19 |         check-haproxy
20 |     }
21 |     virtual_ipaddress {
22 |         {{ MASTER_IP }}
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/roles/prepare/files/95-k8s-sysctl.conf:
--------------------------------------------------------------------------------
1 | net.ipv4.ip_forward = 1
2 | net.bridge.bridge-nf-call-iptables = 1
3 | net.bridge.bridge-nf-call-ip6tables = 1
4 | net.bridge.bridge-nf-call-arptables = 1
5 | 


--------------------------------------------------------------------------------
/roles/prepare/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: prepare some dirs
 2 |   file: name={{ item }} state=directory
 3 |   with_items:
 4 |   - "{{ bin_dir }}"
 5 |   - "{{ ca_dir }}"
 6 |   - /root/.kube
 7 |   - /etc/docker
 8 | 
 9 | #- name: 集群hosts文件更新
10 | #  copy: src=hosts.j2 dest=/etc/hosts
11 | 
12 | - name: 写入环境变量$PATH 
13 |   shell: "sed -i '/export PATH=/d' /etc/profile && \
14 | 	echo export PATH={{ bin_dir }}:$PATH >> /etc/profile"
15 | 
16 | - name: 下载证书工具 CFSSL
17 |   copy: src={{ base_dir }}/bin/{{ item }} dest={{ bin_dir }}/{{ item }} mode=0755
18 |   with_items:
19 |   - cfssl
20 |   - cfssl-certinfo
21 |   - cfssljson
22 | 
23 | - name: 分发CA 证书
24 |   copy: src={{ item }} dest={{ ca_dir }}/{{ item }} mode=0644
25 |   with_items:
26 |   - ca.pem
27 |   - ca-key.pem
28 |   - ca.csr
29 |   - ca-config.json
30 | 
31 | # 先拉取下节点的ansible setup信息，起到缓存效果，否则后续when 判断可能失败
32 | - name: 缓存ansilbe setup信息
33 |   setup: gather_subset=min
34 | 
35 | # 删除默认安装
36 | - name: 删除ubuntu默认安装
37 |   when: ansible_distribution == "Ubuntu"
38 |   apt: name={{ item }} state=absent
39 |   with_items:
40 |   - ufw
41 |   - lxd
42 |   - lxd-client
43 |   - lxcfs
44 |   - lxc-common
45 | 
46 | # 删除默认安装
47 | - name: 删除centos默认安装
48 |   when: ansible_distribution == "CentOS"
49 |   yum: name={{ item }} state=absent
50 |   with_items:
51 |   - firewalld
52 |   - firewalld-filesystem
53 |   - python-firewall
54 | 
55 | - name: 关闭 selinux
56 |   shell: "setenforce 0 && echo SELINUX=disabled > /etc/selinux/config"
57 |   when: ansible_distribution == "CentOS"
58 |   ignore_errors: true
59 | 
60 | # 设置系统参数for k8s
61 | # 消除docker info 警告WARNING: bridge-nf-call-ip[6]tables is disabled
62 | - name: 设置系统参数
63 |   copy: src=95-k8s-sysctl.conf dest=/etc/sysctl.d/95-k8s-sysctl.conf
64 | 
65 | - name: 生效系统参数
66 |   shell: "sysctl -p /etc/sysctl.d/95-k8s-sysctl.conf"
67 |   ignore_errors: true
68 | 


--------------------------------------------------------------------------------