├── roles ├── helm │ └── tasks │ │ ├── main.yml │ │ ├── all.yml │ │ ├── helm_reset.yml │ │ ├── charts_deploy.yml │ │ ├── helm.yml │ │ └── deploy_batch.yml ├── tools │ └── tasks │ │ ├── main.yml │ │ ├── weave_reset.yml │ │ ├── reboot.yml │ │ ├── cluster_sanity.yml │ │ ├── reset_drain.yml │ │ ├── labels.yml │ │ ├── postinstall_messages.yml │ │ └── reset.yml ├── post_deploy │ └── tasks │ │ ├── main.yml │ │ ├── all.yml │ │ ├── post_cluster_deploy.yml │ │ ├── sanity.yml │ │ ├── taints.yml │ │ └── network.yml ├── storage │ ├── tasks │ │ ├── main.yml │ │ ├── all.yml │ │ ├── create_all.yml │ │ ├── nfs_reset.yml │ │ ├── nfs.yml │ │ ├── remove_pvs.yml │ │ ├── vsphere.yml │ │ ├── rook_reset.yml │ │ └── rook.yml │ ├── templates │ │ ├── rook_ceph_conf.j2 │ │ ├── rook-storageclass.j2 │ │ ├── rook-pool.j2 │ │ ├── rook-cluster.j2 │ │ └── nfs.j2 │ └── files │ │ └── vsphere_bug_fix.sh ├── common │ ├── tasks │ │ ├── main.yml │ │ ├── firewalld.yml │ │ ├── swap.yml │ │ ├── rook.yml │ │ ├── all.yml │ │ ├── various.yml │ │ ├── ntpd.yml │ │ ├── decide_master_name.yml │ │ ├── selinux.yml │ │ ├── kube_config.yml │ │ ├── kernel_modules.yml │ │ ├── iptables.yml │ │ ├── aliases_completion.yml │ │ └── install_k8s_packages.yml │ ├── templates │ │ └── cloud-config.j2 │ ├── files │ │ └── 90-kubeadm.conf │ └── handlers │ │ └── main.yml ├── primary-master │ ├── handlers │ │ └── main.yml │ └── templates │ │ └── cloud-config-vsphere-secret.j2 ├── non-primary-master │ └── handlers │ │ └── main.yml └── keepalived │ ├── templates │ ├── check_apiserver.sh.j2 │ └── keepalived.conf.j2 │ └── tasks │ └── main.yaml ├── _config.yml ├── templates ├── kured_profile1.j2 ├── metallb_profile1.j2 ├── tigera-operator_profile1.j2 ├── dashboard_profile1.j2 ├── cert-manager_profile1.j2 └── nginx-ingress_profile1.j2 ├── .gitignore ├── ansible.cfg.example ├── docs ├── architecture.md ├── upgrade_cluster.md ├── Troubleshooting.md ├── add-remove-nodes.md ├── portable_machine_setup.md ├── PRODUCTION_TIPS.md └── popular_helm_charts_cli_deploy.md ├── demo ├── demo-svc.yml ├── demo-ingress.yml ├── demo-claim.yml └── demo-pod.yml ├── .gitattributes ├── .github └── workflows │ ├── greetings.yml │ └── stale.yml ├── scripts └── test.sh ├── allow-all-all-rbac.yml ├── group_vars └── all │ ├── JoinConfiguration.yml │ ├── InitConfiguration.yml │ ├── KubeProxyConfiguration.yml │ ├── ClusterConfiguration.yml │ ├── KubeletConfiguration.yml │ ├── storage.yml │ └── network.yml ├── vagrant_known_issues.md ├── LICENSE.md ├── hosts.example ├── other_tools ├── k8s_cli_tools.sh └── dockerize.sh ├── pre_sanity.yml ├── batch_deploy_serial_non_parallel.yml ├── all_reset.yml ├── only_nodes_only_install.yml ├── only_secondaryMasters_only_install.yml ├── all_install.yml ├── Vagrantfile └── site.yml /roles/helm/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include_tasks: "{{task}}.yml" -------------------------------------------------------------------------------- /roles/tools/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include_tasks: "{{task}}.yml" -------------------------------------------------------------------------------- /roles/post_deploy/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include_tasks: "{{task}}.yml" -------------------------------------------------------------------------------- /roles/storage/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - include_tasks: "{{task}}.yml" -------------------------------------------------------------------------------- /roles/common/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - include_tasks: "{{task}}.yml" 3 | -------------------------------------------------------------------------------- /roles/common/templates/cloud-config.j2: -------------------------------------------------------------------------------- 1 | 2 | {{ cloud_config | indent (0) }} 3 | -------------------------------------------------------------------------------- /roles/helm/tasks/all.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - include_tasks: helm_reset.yml 3 | - include_tasks: helm.yml -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman 2 | include: ["README.md"] 3 | exclude: ["*.*"] 4 | -------------------------------------------------------------------------------- /roles/common/files/90-kubeadm.conf: -------------------------------------------------------------------------------- 1 | net.bridge.bridge-nf-call-iptables = 1 2 | net.bridge.bridge-nf-call-ip6tables = 1 3 | -------------------------------------------------------------------------------- /templates/kured_profile1.j2: -------------------------------------------------------------------------------- 1 | extraArgs: 2 | period: 0h07m0s 3 | image: 4 | repository: '{{ images_repo | default ("ghcr.io") }}/kubereboot/kured' 5 | -------------------------------------------------------------------------------- /roles/post_deploy/tasks/all.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - include_tasks: network.yml 3 | - include_tasks: post_cluster_deploy.yml 4 | - include_tasks: sanity.yml 5 | - include_tasks: taints.yml 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.retry 2 | *.log 3 | *.tmp 4 | test.sh 5 | tmp.* 6 | temp.* 7 | .vagrant 8 | ansible.cfg 9 | ssh_config* 10 | /hosts 11 | #/group_vars/all/tmp.yaml 12 | #/group_vars/all/temp.yaml 13 | -------------------------------------------------------------------------------- /roles/storage/templates/rook_ceph_conf.j2: -------------------------------------------------------------------------------- 1 | kind: ConfigMap 2 | apiVersion: v1 3 | metadata: 4 | name: rook-config-override 5 | namespace: rook 6 | data: 7 | config: | 8 | {{ rook.ceph_conf | indent(4) }} 9 | 10 | -------------------------------------------------------------------------------- /roles/storage/tasks/all.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - include_tasks: remove_pvs.yml 3 | - include_tasks: nfs_reset.yml 4 | - include_tasks: nfs.yml 5 | - include_tasks: rook_reset.yml 6 | - include_tasks: rook.yml 7 | - include_tasks: vsphere.yml 8 | -------------------------------------------------------------------------------- /roles/storage/tasks/create_all.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #- include_tasks: remove_pvs.yml 3 | #- include_tasks: nfs_reset.yml 4 | - include_tasks: nfs.yml 5 | #- include_tasks: rook_reset.yml 6 | - include_tasks: rook.yml 7 | - include_tasks: vsphere.yml 8 | -------------------------------------------------------------------------------- /roles/primary-master/handlers/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Restart docker 3 | systemd: name=docker state=restarted enabled=yes 4 | 5 | - name: Reload systemd 6 | command: systemctl daemon-reload 7 | 8 | - name: Restart kubelet 9 | systemd: name=kubelet state=restarted enabled=yes 10 | -------------------------------------------------------------------------------- /roles/non-primary-master/handlers/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Restart docker 3 | systemd: name=docker state=restarted enabled=yes 4 | 5 | - name: Reload systemd 6 | command: systemctl daemon-reload 7 | 8 | - name: Restart kubelet 9 | systemd: name=kubelet state=restarted enabled=yes 10 | -------------------------------------------------------------------------------- /ansible.cfg.example: -------------------------------------------------------------------------------- 1 | [defaults] 2 | #remote_user=vagrant 3 | become=true 4 | become_method=sudo 5 | stdout_callback = debug 6 | 7 | [ssh_connection] 8 | ssh_args = -C -o ControlMaster=auto -o ControlPersist=60s -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -F ./ssh_config 9 | pipelining = True 10 | -------------------------------------------------------------------------------- /docs/architecture.md: -------------------------------------------------------------------------------- 1 | ## Namespaces: 2 | ### monitoring 3 | Holding: 4 | - prometheus operator 5 | - metrics-server 6 | 7 | ### cert-manager 8 | Holding: 9 | - cert-manager 10 | 11 | ### kube-system 12 | Holding: 13 | - nginx-controller 14 | - kured 15 | - heapster 16 | - networking (e.g. flannel) 17 | - dashboard 18 | -------------------------------------------------------------------------------- /demo/demo-svc.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | app: demo-svc 6 | name: demo-svc 7 | namespace: default 8 | spec: 9 | ports: 10 | - port: 80 11 | protocol: TCP 12 | targetPort: 80 13 | selector: 14 | app: demo 15 | sessionAffinity: None 16 | type: NodePort 17 | -------------------------------------------------------------------------------- /demo/demo-ingress.yml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Ingress 3 | metadata: 4 | name: demo 5 | namespace: default 6 | spec: 7 | rules: 8 | - host: pv.k8s.cloud.corp.example.com 9 | http: 10 | paths: 11 | - backend: 12 | serviceName: demo-svc 13 | servicePort: 80 14 | path: / 15 | -------------------------------------------------------------------------------- /roles/common/tasks/firewalld.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Disable firewalld (CentOS/RHEL) 3 | systemd: name=firewalld state=stopped enabled=no 4 | when: ansible_os_family == "RedHat" 5 | ignore_errors: true # in case it does not exist 6 | # For developing firewalld friendly solution, check: 7 | # https://github.com/kubernetes/contrib/tree/master/ansible/roles/ 8 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # THIS IS ONLY FOR THE gitattributes REPOSITORY. 2 | # Handle line endings automatically for files detected as text 3 | # and leave all files detected as binary untouched. 4 | * text=auto 5 | 6 | # 7 | # The above will handle all files NOT found below 8 | # 9 | # These files are text and should be normalized (Convert crlf => lf) 10 | *.gitattributes text 11 | .gitignore text 12 | *.md text 13 | *.yml text 14 | *.yaml text 15 | -------------------------------------------------------------------------------- /.github/workflows/greetings.yml: -------------------------------------------------------------------------------- 1 | name: Greetings 2 | 3 | on: [pull_request, issues] 4 | 5 | jobs: 6 | greeting: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/first-interaction@v1 10 | with: 11 | repo-token: ${{ secrets.GITHUB_TOKEN }} 12 | issue-message: 'Your constructive feedback makes this project stronger! Thank you!'' first issue' 13 | pr-message: | 14 | Many thanks! Hope you enjoyed it as much as we did! 15 | -------------------------------------------------------------------------------- /demo/demo-claim.yml: -------------------------------------------------------------------------------- 1 | kind: PersistentVolumeClaim 2 | apiVersion: v1 3 | metadata: 4 | name: demo-claim 5 | #annotations: # When not defined, the default storageClass is used (if any defined) 6 | #volume.beta.kubernetes.io/storage-class: rook-block 7 | #volume.beta.kubernetes.io/storage-class: thin #vsphere 8 | #volume.beta.kubernetes.io/storage-class: nfs.k8s 9 | spec: 10 | accessModes: 11 | - ReadWriteOnce 12 | resources: 13 | requests: 14 | storage: 10Mi 15 | -------------------------------------------------------------------------------- /templates/metallb_profile1.j2: -------------------------------------------------------------------------------- 1 | controller: 2 | metrics: 3 | enabled: true 4 | serviceMonitor: 5 | enabled: true 6 | nodeSelector: 7 | node-role.kubernetes.io/infra: "" 8 | global: 9 | imageRegistry: '{{ images_repo | default ("docker.io") }}' 10 | installCRDs: true 11 | prometheusRule: 12 | enabled: true 13 | speaker: 14 | metrics: 15 | enabled: true 16 | serviceMonitor: 17 | enabled: true 18 | nodeSelector: 19 | node-role.kubernetes.io/infra: "" 20 | -------------------------------------------------------------------------------- /roles/primary-master/templates/cloud-config-vsphere-secret.j2: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: "{{ cloud_config_vsphere_specific.secret_name }}" 5 | namespace: "{{ cloud_config_vsphere_specific.secret_namespace }}" 6 | type: Opaque 7 | data: 8 | {{ cloud_config_vsphere_specific.server }}.username: {{ cloud_config_vsphere_specific.username | b64encode }} 9 | {{ cloud_config_vsphere_specific.server }}.password: {{ cloud_config_vsphere_specific.password | b64encode }} 10 | 11 | -------------------------------------------------------------------------------- /roles/common/tasks/swap.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ### Handling SWAP (k8s 1.8 expects swapoff or configure kubelet to accept it 3 | - block: 4 | - name: swapoff (prereq k8s 1.8) 5 | command: swapoff --all 6 | 7 | - name: Remove swap from /etc/fstab (when named swap) 8 | mount: 9 | name: swap 10 | fstype: swap 11 | state: absent 12 | 13 | - name: Remove swap from /etc/fstab (when named none) 14 | mount: 15 | name: none 16 | fstype: swap 17 | state: absent 18 | when: turn_swapoff | default (true) -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ~/work/ 4 | mv -f ./kubeadm-playbook ./kubeadm-playbook.old || true 5 | sudo cp -rp ~researchiteng/git/kubeadm-playbook . 6 | sudo chown -R `id -u`:`id -g` ./kubeadm-playbook 7 | cd ./kubeadm-playbook 8 | cp -p .././kubeadm-playbook.old/hosts . 9 | sed -i 's/myk8s.corp.example.com/ap/' group_vars/all/network.yml 10 | sudo kubeadm reset -f 11 | ansible-playbook -i hosts site.yml 12 | sudo cp -pf /etc/kubernetes/admin.conf ~/.kube/config 13 | sudo chown -R `id -u`:`id -g` ~/.kube/config 14 | 15 | -------------------------------------------------------------------------------- /roles/common/tasks/rook.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ### Optionally install packages for rook 3 | - block: 4 | - name: Install packages required by rook (ceph) storage 5 | package: name={{ item }} state={{ package_state | default ('present') }} 6 | with_items: 7 | - ceph-common 8 | 9 | - name: Install packages required by rook (ceph) storage setup (usually required only on masters) 10 | package: name={{ item }} state={{ package_state | default ('present') }} 11 | with_items: 12 | - jq 13 | when: rook is defined and rook.enabled 14 | -------------------------------------------------------------------------------- /roles/storage/templates/rook-storageclass.j2: -------------------------------------------------------------------------------- 1 | apiVersion: storage.k8s.io/v1 2 | kind: StorageClass 3 | metadata: 4 | name: rook-block 5 | provisioner: rook.io/block 6 | parameters: 7 | {{ rook.rbd.storageclass_parameters | to_yaml | indent(2) }} 8 | 9 | # as per https://github.com/rook/rook/blob/master/demo/kubernetes/rook-storageclass.yaml 10 | #pool: replicapool 11 | # Specify the Rook cluster from which to create volumes. If not specified, it will use `rook` as the namespace and name of the cluster. 12 | # clusterName: rook 13 | # clusterNamespace: rook 14 | 15 | -------------------------------------------------------------------------------- /allow-all-all-rbac.yml: -------------------------------------------------------------------------------- 1 | # Create the clusterrole and clusterrolebinding: 2 | # $ kubectl create -f allow-all-all-rbac.yml 3 | --- 4 | kind: ClusterRoleBinding 5 | apiVersion: rbac.authorization.k8s.io/v1beta1 6 | metadata: 7 | name: cluster-admin-binding 8 | roleRef: 9 | apiGroup: rbac.authorization.k8s.io 10 | kind: ClusterRole 11 | name: cluster-admin 12 | subjects: 13 | - kind: ServiceAccount 14 | name: default 15 | namespace: kube-system 16 | - kind: ServiceAccount 17 | name: default 18 | namespace: ceph 19 | - kind: ServiceAccount 20 | name: default 21 | namespace: default 22 | -------------------------------------------------------------------------------- /roles/tools/tasks/weave_reset.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #- hosts: all 3 | # become: yes 4 | # become_method: sudo 5 | # tags: 6 | # - weave 7 | # - reset 8 | # tasks: 9 | 10 | # - name: Copy weave net script 11 | # environment: '{{ proxy_env | default ({}) }}' 12 | # get_url: url=https://raw.githubusercontent.com/weaveworks/weave/master/weave dest=/usr/local/bin/weave mode=u+rxw force=yes 13 | # ignore_errors: true # Currently there is no way to check if the user is using weave 14 | 15 | # This is also part of the reset.yml 16 | - name: Reset weave 17 | shell: /usr/local/bin/weave reset 18 | ignore_errors: true 19 | 20 | 21 | -------------------------------------------------------------------------------- /roles/common/tasks/all.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #- include_tasks: reset.yml 3 | - include_tasks: selinux.yml # THIS MIGHT REBOOT MACHINE!!! 4 | - include_tasks: install_k8s_packages.yml 5 | - include_tasks: docker.yml 6 | - include_tasks: iptables.yml 7 | - include_tasks: firewalld.yml 8 | - include_tasks: kube_config.yml 9 | - include_tasks: swap.yml 10 | - include_tasks: kernel_modules.yml 11 | when: kernel_modules_setup | default (True) 12 | - include_tasks: ntpd.yml 13 | when: ntp_setup | default (True) 14 | - include_tasks: rook.yml 15 | when: rook is defined and rook.enabled | default (False) 16 | - include_tasks: various.yml 17 | - include_tasks: aliases_completion.yml 18 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Mark stale issues and pull requests 2 | 3 | on: 4 | schedule: 5 | - cron: "0 23 * * *" 6 | 7 | jobs: 8 | stale: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/stale@v1 14 | with: 15 | repo-token: ${{ secrets.GITHUB_TOKEN }} 16 | stale-issue-message: 'Stale issue' 17 | stale-pr-message: 'Stale pull request' 18 | stale-issue-label: 'no-issue-activity' 19 | stale-pr-label: 'no-pr-activity' 20 | exempt-issue-label: 'enhancement' 21 | exempt-pr-label: 'awaiting-approval' 22 | days-before-stale: 30 23 | days-before-close: 7 24 | -------------------------------------------------------------------------------- /demo/demo-pod.yml: -------------------------------------------------------------------------------- 1 | kind: Pod 2 | apiVersion: v1 3 | metadata: 4 | name: demo-pod 5 | labels: 6 | app: demo 7 | spec: 8 | volumes: 9 | - name: demo-storage 10 | persistentVolumeClaim: 11 | claimName: demo-claim 12 | 13 | containers: 14 | - name: demo-container 15 | image: nginx 16 | ports: 17 | - containerPort: 80 18 | name: "http-server" 19 | volumeMounts: 20 | - mountPath: "/usr/share/nginx/html" 21 | name: demo-storage 22 | nodeSelector: 23 | node-role.kubernetes.io/control-plane: "" 24 | tolerations: 25 | - key: "node-role.kubernetes.io/control-plane" 26 | effect: NoSchedule 27 | -------------------------------------------------------------------------------- /roles/storage/templates/rook-pool.j2: -------------------------------------------------------------------------------- 1 | apiVersion: rook.io/v1alpha1 2 | kind: Pool 3 | metadata: 4 | name: replicapool 5 | namespace: rook 6 | spec: 7 | {{ rook.rbd.pool_spec | to_yaml | indent(2) }} 8 | 9 | # as per https://github.com/rook/rook/blob/master/demo/kubernetes/rook-storageclass.yaml and https://github.com/rook/rook/blob/master/Documentation/pool-tpr.md 10 | #replication: 11 | # size: 1 12 | # For an erasure-coded pool, comment out the replication size above and uncomment the following settings. 13 | # Make sure you have enough OSDs to support the replica size or erasure code chunks. 14 | #erasureCode: 15 | # codingChunks: 2 16 | # dataChunks: 2 17 | -------------------------------------------------------------------------------- /templates/tigera-operator_profile1.j2: -------------------------------------------------------------------------------- 1 | calicoctl: 2 | image: '{{ images_repo | default ("quay.io") }}/calico/ctl' 3 | cni: 4 | image: '{{ images_repo | default ("quay.io") }}/calico/cni' 5 | flexvol: 6 | image: '{{ images_repo | default ("quay.io") }}/calico/pod2daemon-flexvol' 7 | installation: 8 | registry: '{{ images_repo | default ("docker.io") }}' 9 | kubeControllers: 10 | image: '{{ images_repo | default ("quay.io") }}/calico/kube-controllers' 11 | node: 12 | image: '{{ images_repo | default ("quay.io") }}/calico/node' 13 | tigeraOperator: 14 | registry: '{{ images_repo | default ("quay.io") }}' 15 | typha: 16 | image: '{{ images_repo | default ("quay.io") }}/calico/typha' 17 | 18 | -------------------------------------------------------------------------------- /roles/common/tasks/various.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: set hostname with fqdn 3 | hostname: 4 | name: "{{ inventory_hostname }}" 5 | when: set_hostname_to_inventory_hostname | default ( False ) 6 | 7 | - name: set hostname with fqdn - sol2 8 | shell: hostnamectl set-hostname {{ inventory_hostname }} 9 | when: set_hostname_to_inventory_hostname | default ( False ) 10 | # This does not require restart (as per docs) 11 | 12 | - name: create dir /var/log/journal (so node-problem-detector finds any issues with the nodes, should there be any) 13 | file: 14 | path: /var/log/journal 15 | state: directory 16 | mode: 0755 17 | 18 | # Forcing restart of services 19 | #- meta: flush_handlers 20 | -------------------------------------------------------------------------------- /roles/tools/tasks/reboot.yml: -------------------------------------------------------------------------------- 1 | - name: Reboot 2 | shell: sleep 2 && /sbin/shutdown -r now 3 | async: 1 4 | poll: 0 5 | ignore_errors: true 6 | when: allow_restart | default ( false ) 7 | 8 | #- name: Pause till machine is up again 9 | # pause: 10 | # seconds: 30 11 | 12 | - name: Wait for server come back from restart 13 | local_action: wait_for 14 | args: 15 | host: "{{ inventory_hostname }}" 16 | port: 22 17 | state: started 18 | delay: 15 19 | timeout: 180 20 | 21 | #Starting Ansible 2.3 one can do: 22 | #- name: Wait for system to become reachable # Ansible 2.3+ 23 | # wait_for_connection: 24 | # timeout: 200 25 | 26 | #- name: Gather facts for first time after restart 27 | # setup: 28 | 29 | -------------------------------------------------------------------------------- /roles/post_deploy/tasks/post_cluster_deploy.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # - hosts: master 3 | # become: yes 4 | # become_method: sudo 5 | # tags: 6 | # - k8s_addons 7 | # tasks: 8 | - set_fact: 9 | env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/bin" }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}' 10 | tags: 11 | - always 12 | 13 | # k8s_addons_urls is usually empty 14 | - name: Install k8s_addons_urls (with proxy) 15 | command: kubectl apply -f {{ item }} 16 | with_items: "{{ k8s_addons_urls | default ('') }}" 17 | environment: '{{env_kc}}' 18 | when: 19 | - k8s_addons_urls is defined 20 | - k8s_addons_urls | length > 0 21 | tags: 22 | - k8s_addons 23 | -------------------------------------------------------------------------------- /roles/keepalived/templates/check_apiserver.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | errorExit() { 4 | echo "*** $*" 1>&2 5 | exit 1 6 | } 7 | 8 | curl --silent --max-time 2 --insecure https://localhost:{{ InitConfiguration.localAPIEndpoint.bindPort | default (6443) }}/healthz -o /dev/null || errorExit "Error GET https://localhost:{{ InitConfiguration.localAPIEndpoint.bindPort | default (6443) }}/healthz" 9 | if ip addr | grep -q {{ custom.networking.masterha_ip }}; then 10 | curl --silent --max-time 2 --insecure https://{{ custom.networking.masterha_ip }}:{{ InitConfiguration.localAPIEndpoint.bindPort | default (6443) }}/healthz -o /dev/null || errorExit "Error GET https://{{ custom.networking.masterha_ip }}:{{ InitConfiguration.localAPIEndpoint.bindPort | default (6443) }}/healthz" 11 | fi 12 | 13 | -------------------------------------------------------------------------------- /docs/upgrade_cluster.md: -------------------------------------------------------------------------------- 1 | Kubeadm upgrade is pretty clear and simple, there is no need for much automation around it. 2 | Mainly run in a loop across all the machines (start with masters): 3 | (I would make a backup of /etc/kubernetes folder as first step on each node). 4 | https://kubernetes.io/docs/tasks/administer-cluster/kubeadm/kubeadm-upgrade/#upgrading-control-plane-nodes 5 | 6 | Upgrade only a version at a time (don't jump major versions). 7 | (ideally get familiar with the process on another machine before) 8 | 9 | PS: 10 | The concept of "primary master" is there only part of the install flow, to denote where will be the first set of commands and where we'll run commands like: get join tokens, etc. 11 | The cluster as such does not have/need such a concept. 12 | -------------------------------------------------------------------------------- /roles/post_deploy/tasks/sanity.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Wait few seconds for network to start deploying 3 | pause: seconds=3 4 | changed_when: false 5 | 6 | - name: sanity - wait for alls pod to be running (besides kube-dns,coredns, tiller-deploy for now, as it might be forced to a node if master did not allow it due to tains) 7 | environment: 8 | KUBECONFIG: /etc/kubernetes/admin.conf 9 | shell: "kubectl get --namespace kube-system pods --no-headers | grep -v -w 'Running' | grep -v 'kube-dns' | grep -v 'coredns' | grep -v 'tiller-deploy' || true " 10 | register: command_result 11 | tags: 12 | - k8s_network_addons 13 | - sanity 14 | until: command_result.stdout == "" 15 | retries: "{{ RETRIES | default(40) }}" 16 | delay: 3 17 | changed_when: false 18 | -------------------------------------------------------------------------------- /templates/dashboard_profile1.j2: -------------------------------------------------------------------------------- 1 | image: 2 | repository: '{{ images_repo | default ("docker.io") }}/kubernetesui/dashboard' 3 | ingress: 4 | enabled: true 5 | hosts: 6 | - 'dashboard.{{ custom.networking.dnsDomain }}' 7 | - '{{ custom.networking.masterha_fqdn | default (groups["primary-master"][0]) }}' 8 | - '{{ groups["primary-master"][0] }}' 9 | metricsScraper: 10 | enabled: true 11 | image: 12 | repository: '{{ images_repo | default ("docker.io") }}/kubernetesui/metrics-scraper' 13 | nodeSelector: 14 | node-role.kubernetes.io/infra: "" 15 | protocolHttp: true 16 | rbac: 17 | clusterReadOnlyRole: true 18 | create: true 19 | tolerations: 20 | - effect: NoSchedule 21 | key: node-role.kubernetes.io/infra 22 | - effect: PreferNoSchedule 23 | key: node-role.kubernetes.io/infra 24 | -------------------------------------------------------------------------------- /roles/keepalived/templates/keepalived.conf.j2: -------------------------------------------------------------------------------- 1 | ! Configuration File for keepalived 2 | global_defs { 3 | router_id {{ CLUSTER_NAME }} 4 | } 5 | vrrp_script check_apiserver { 6 | script "/etc/keepalived/check_apiserver.sh" 7 | interval 3 8 | weight -2 9 | fall 10 10 | rise 2 11 | } 12 | 13 | vrrp_instance VI_{{ CLUSTER_NAME }}_1 { 14 | {% if 'primary-master' in group_names %} 15 | state MASTER 16 | {% else %} 17 | state BACKUP 18 | {% endif %} 19 | interface {{ ansible_default_ipv4.interface }} 20 | virtual_router_id 97 21 | {% if 'primary-master' in group_names %} 22 | priority 101 23 | {% else %} 24 | priority 100 25 | {% endif %} 26 | authentication { 27 | auth_type PASS 28 | auth_pass e1{{ CLUSTER_NAME }}483e10ad1d 29 | } 30 | virtual_ipaddress { 31 | {{ custom.networking.masterha_ip }} 32 | } 33 | track_script { 34 | check_apiserver 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /roles/storage/files/vsphere_bug_fix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | export KUBECONFIG=/etc/kubernetes/admin.conf 3 | NeedRebootList="" 4 | for h in $(kubectl get nodes | tail -n +2 | awk '{print $1}'); do 5 | uuid=$(kubectl describe node/$h | grep -i UUID | tr '[:upper:]' '[:lower:]' | awk '{print $3}') 6 | eval kubectl patch node $h -p \'{\"spec\":{\"providerID\":\"vsphere://${uuid}\"}}\' | grep 'no change' >/dev/null 7 | if [[ $? -gt 0 ]]; then 8 | kubectl delete node $h # As per vmware support suggetion: delete node and restart kubelet (see code: https://github.com/kubernetes/kubernetes/blob/v1.14.1/pkg/cloudprovider/providers/vsphere/vsphere.go#L278 ) 9 | NeedRebootList="$NeedRebootList $h" 10 | fi 11 | done 12 | if [[ -n $NeedRebootList ]]; then 13 | echo "$NeedRebootList" | tr ' ' '\n' | tail -n +2 14 | fi 15 | ### NeedRebootList holds the list of machines where there was a change and requrie reboot (or maybe at least kubelet restart) 16 | 17 | -------------------------------------------------------------------------------- /roles/common/handlers/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Restart docker 3 | systemd: name=docker state=restarted enabled=yes daemon_reload=yes 4 | 5 | - name: Restart containerd 6 | systemd: name=containerd state=restarted enabled=yes daemon_reload=yes 7 | 8 | - name: Restart crio 9 | systemd: name=crio state=restarted enabled=yes daemon_reload=yes 10 | 11 | - name: Restart docker-storage-setup 12 | systemd: name=docker-storage-setup state=restarted 13 | 14 | - name: Reload systemd 15 | command: systemctl daemon-reload 16 | 17 | - name: Restart kubelet 18 | systemd: name=kubelet state=restarted enabled=yes daemon_reload=yes 19 | 20 | #Debian is ntp, RedHat ntpd 21 | #- name: Restart ntpd 22 | # systemd: name=ntpd state=restarted enabled=yes 23 | 24 | - name: Restart iptables 25 | systemd: name=iptables state=restarted enabled=yes 26 | 27 | - name: Reboot 28 | shell: sleep 2 && /sbin/shutdown -r now 29 | async: 1 30 | poll: 0 31 | ignore_errors: true 32 | when: allow_restart | default ( false ) 33 | -------------------------------------------------------------------------------- /roles/storage/tasks/nfs_reset.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # - hosts: master 3 | # gather_facts: False 4 | # become: yes 5 | # become_method: sudo 6 | # tags: 7 | # - reset 8 | # - nfs_storage 9 | # tasks: 10 | 11 | - set_fact: 12 | env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin" }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}' 13 | tags: 14 | - reset 15 | - nfs_storage 16 | 17 | - name: delete old nfs provisioner 18 | environment: 19 | KUBECONFIG: /etc/kubernetes/admin.conf 20 | shell: "kubectl delete --namespace kube-system -f /tmp/nfs.yml" 21 | when: nfs_k8s is defined and nfs_k8s.enabled 22 | tags: 23 | - reset 24 | - nfs_storage 25 | ignore_errors: true 26 | 27 | - name: wipe nfs host_path 28 | file: path={{nfs_k8s.host_path}} state=absent 29 | when: nfs_k8s is defined and nfs_k8s.enabled and nfs_k8s.wipe 30 | tags: 31 | - reset 32 | - nfs_storage 33 | ignore_errors: true 34 | 35 | -------------------------------------------------------------------------------- /roles/common/tasks/ntpd.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ### ntpd: 3 | - block: 4 | - name: ntpd and ntpdate | it is mandatory to have the time from all machines in sync 5 | package: state=present name={{ item }} 6 | environment: '{{ proxy_env | default ({}) }}' 7 | with_items: 8 | - ntp 9 | register: ntp_output 10 | # notify: # Debian & RH diff service name... 11 | # - Restart ntpd 12 | 13 | - name: ntpd configuration 14 | copy: content={{ntp_conf}} dest=/etc/ntp.conf backup=yes 15 | when: ntp_conf is defined 16 | # notify: # Debian & RH diff service name... 17 | # - Restart ntpd 18 | 19 | - name: Restart ntpd - RedHat/CentOS 20 | systemd: name=ntpd state=restarted enabled=yes 21 | when: ansible_os_family == "RedHat" 22 | 23 | - name: Restart ntp - Debian 24 | systemd: name=ntp state=restarted enabled=yes 25 | when: ansible_os_family == "Debian" 26 | 27 | when: 28 | - ntp_setup is defined 29 | - ntp_setup 30 | - ntp_package is defined 31 | - ntp_package == "ntp" 32 | tags: 33 | - ntp 34 | -------------------------------------------------------------------------------- /group_vars/all/JoinConfiguration.yml: -------------------------------------------------------------------------------- 1 | #https://pkg.go.dev/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3?tab=doc#JoinConfiguration 2 | #check latest api ver here: https://pkg.go.dev/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm?tab=subdirectories 3 | JoinConfiguration: 4 | timeouts: 5 | controlPlaneComponentHealthCheck: 8m0s 6 | # controlPlane: # define controlPlane section only in secondary-masters; not required in minions. 7 | # localAPIEndpoint: 8 | # advertiseAddress: 9 | # bindPort: 6443 10 | apiVersion: kubeadm.k8s.io/v1beta4 11 | # caCertPath: /etc/kubernetes/pki/ca.crt 12 | # discovery: 13 | # bootstrapToken: 14 | # apiServerEndpoint: 10.1.2.3:6443 15 | # token: abcdef.0123456789abcdef 16 | # unsafeSkipCAVerification: true 17 | # timeout: 5m0s 18 | # tlsBootstrapToken: abcdef.0123456789abcdef 19 | # file: #either file or tlsBootstrapToken 20 | kind: JoinConfiguration 21 | # nodeRegistration: 22 | # criSocket: /var/run/dockershim.sock 23 | # name: 24 | # skipPhases: 25 | # patches: 26 | -------------------------------------------------------------------------------- /roles/storage/tasks/nfs.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ## NFS 3 | - name: install nfs-utils package - RedHat/CentOS 4 | package: 5 | name: "nfs-utils" 6 | state: present 7 | when: 8 | - nfs_k8s is defined and nfs_k8s.enabled 9 | - ansible_os_family == "RedHat" 10 | tags: 11 | - nfs_storage 12 | 13 | - name: install nfs-common package - Debian 14 | package: 15 | name: "nfs-common" 16 | state: present 17 | when: 18 | - nfs_k8s is defined and nfs_k8s.enabled 19 | - ansible_os_family == "Debian" 20 | tags: 21 | - nfs_storage 22 | 23 | - name: prepare nfs provisioner 24 | template: 25 | src: nfs.j2 26 | dest: /tmp/nfs.yml 27 | #backup: yes 28 | force: yes 29 | when: nfs_k8s is defined and nfs_k8s.enabled 30 | tags: 31 | - nfs_storage 32 | 33 | - name: create nfs provisioner 34 | environment: 35 | KUBECONFIG: /etc/kubernetes/admin.conf 36 | shell: "kubectl create --namespace kube-system -f /tmp/nfs.yml" 37 | when: nfs_k8s is defined and nfs_k8s.enabled 38 | tags: 39 | - nfs_storage 40 | 41 | -------------------------------------------------------------------------------- /docs/Troubleshooting.md: -------------------------------------------------------------------------------- 1 | # Ansible related errors: 2 | ### forks, number of open files 3 | ERROR: "ERROR! A worker was found in a dead state" 4 | REASON: this might appear if you have low limits of number of open files, and your number of hosts in inventory is big. 5 | Solution(s): 6 | - run this command before starting ansible: `ulimit -Sn $(ulimit -Hn)` to increase the softlimit up to the hard limit (which I suggest 16384 or more) 7 | - increase both soft limit and hardlimits (see links below) 8 | - if `lsof | wc -l` is more than 1/2 of `ulimit -Sn`, you **may** want to reboot the control machine (from where you invoke ansible).(ideally reboot also the target machines if they were not restarted for very long time) 9 | - limit number of forks ansible uses, by using the -f1 parameter on the ansible. 10 | Other related resources: 11 | - https://github.com/ansible/ansible/issues/32554 12 | - https://www.whatan00b.com/posts/debugging-a-segfault-from-ansible/ 13 | - https://stackoverflow.com/questions/21752067/counting-open-files-per-process 14 | - https://www.tecmint.com/increase-set-open-file-limits-in-linux/ 15 | -------------------------------------------------------------------------------- /templates/cert-manager_profile1.j2: -------------------------------------------------------------------------------- 1 | acmesolver: 2 | image: 3 | repository: '{{ images_repo | default ("quay.io") }}/jetstack/cert-manager-acmesolver' 4 | cainjector: 5 | image: 6 | repository: '{{ images_repo | default ("quay.io") }}/jetstack/cert-manager-cainjector' 7 | http_proxy: "{{proxy_env.http_proxy | default ('') }}" 8 | https_proxy: "{{proxy_env.https_proxy | default ('') }}" 9 | image: 10 | repository: '{{ images_repo | default ("quay.io") }}/jetstack/cert-manager-controller' 11 | installCRDs: true 12 | no_proxy: '{{proxy_env.no_proxy | default ("") | replace(",","\\,") }}' 13 | nodeSelector: 14 | node-role.kubernetes.io/infra: "" 15 | prometheus: 16 | servicemonitor: 17 | enabled: true 18 | #namespace: monitoring 19 | startupapicheck: 20 | image: 21 | repository: '{{ images_repo | default ("quay.io") }}/jetstack/cert-manager-ctl' 22 | tolerations: 23 | - effect: NoSchedule 24 | key: node-role.kubernetes.io/infra 25 | - effect: PreferNoSchedule 26 | key: node-role.kubernetes.io/infra 27 | webhook: 28 | image: 29 | repository: '{{ images_repo | default ("quay.io") }}/jetstack/cert-manager-webhook' 30 | -------------------------------------------------------------------------------- /vagrant_known_issues.md: -------------------------------------------------------------------------------- 1 | ## Virtualbox bugs (for those using vagrant solution): 2 | - Issue: After some times it shows on console: 3 | "kernel:NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! kworker" 4 | and the vm is no longer responding. It happens on master. 5 | Solution: 6 | a) The project already implemented the code to change hdd controller from IDE to SATA. 7 | Status: SOLVED 8 | 9 | 10 | - Issue: at boot time, it says: 11 | "kernel: piix4_smbus 0000:00:07.0: SMBus base address uninitialized - upgrade BIOS or use force_addr=0xaddr" 12 | 13 | Tried: 14 | - vi /etc/default/grub #in the GRUB_CMDLINE_LINUX line, at the end, add: pci=noacpi acpi=off noapic 15 | #and run: 16 | grub2-mkconfig -o /boot/grub2/grub.cfg 17 | 18 | But did not work. 19 | 20 | - change motherboard chipset frp, piix3 to some other version 21 | But did not work. 22 | 23 | - echo -e "\nblacklist i2c_piix4\n" >> /etc/modprobe.d/blacklist.conf 24 | #echo -e "\nintel_powerclamp\n" >> /etc/modprobe.d/blacklist.conf # did not try 25 | and reboot 26 | But did not help either (actually block machine login via ssh). Maybe try to put blacklist i2c_piix4 also in /etc/dracut.conf.d/nofloppy.conf's omit_drivers list. 27 | 28 | -------------------------------------------------------------------------------- /roles/storage/templates/rook-cluster.j2: -------------------------------------------------------------------------------- 1 | apiVersion: rook.io/v1alpha1 2 | kind: Cluster 3 | metadata: 4 | name: rook 5 | namespace: rook 6 | spec: 7 | {{ rook.cluster_spec | to_yaml | indent(2) }} 8 | 9 | # See more options here: https://github.com/rook/rook/blob/master/demo/kubernetes/rook-cluster.yaml and https://github.com/rook/rook/blob/master/Documentation/cluster-tpr.md 10 | # Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named 11 | # nodes below will be used as storage resources. Each node's 'name' field should match their 'kubernetes.io/hostname' label. 12 | # nodes: 13 | # - name: "172.17.4.101" 14 | # directories: # specific directores to use for storage can be specified for each node 15 | # - path: "/rook/storage-dir" 16 | # - name: "172.17.4.201" 17 | # devices: # specific devices to use for storage can be specified for each node 18 | # - name: "sdb" 19 | # - name: "sdc" 20 | # storeConfig: # configuration can be specified at the node level which overrides the cluster level config 21 | # storeType: bluestore 22 | # - name: "172.17.4.301" 23 | # deviceFilter: "^sd." 24 | 25 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /hosts.example: -------------------------------------------------------------------------------- 1 | 2 | # Use FullyQualifiedDomainNames (FQDN) (e.g. machine1.corp.example.com) 3 | # make sure `hostname -s` returns indeed short name (e.g. master1) 4 | # make sure `hostname -f` returns indeed the desired fqdn name (e.g. master1.corp.example.com) 5 | # normally, ansible_fqdn should return the same as `hostname -f` (otherwise one can reach this: https://github.com/ansible/ansible/issues/38777 ) 6 | # try: `ansible localhost -m setup -a "filter=ansible_fqdn"` and make sure it matches: `hostname -f` 7 | [primary-master] 8 | master1.corp.example.com 9 | 10 | [secondary-masters] 11 | # If there is only one master, make this section empty 12 | master[2:3].corp.example.com 13 | 14 | [masters:children] 15 | primary-master 16 | secondary-masters 17 | 18 | [nodes] 19 | # If there is only one machine both master and node, make this section empty 20 | # Best practice is to have few machines allocated for Prometheus/Ingresses/eventual ELK. 21 | # These are usually labeled "infra", and tainted with NoSchedule or at least PreferNoSchedule 22 | # See "taint_for_label" in group_vars/all/global.yaml 23 | node[1:2].corp.example.com label=node-role.kubernetes.io/infra= 24 | # All other nodes are automatically labeled "compute" and without any taint. 25 | node[3:7].corp.example.com # label=node-role.kubernetes.io/compute= 26 | 27 | -------------------------------------------------------------------------------- /group_vars/all/InitConfiguration.yml: -------------------------------------------------------------------------------- 1 | #https://pkg.go.dev/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta4?tab=doc#InitConfiguration 2 | #check latest api ver here: https://pkg.go.dev/k8s.io/kubernetes/cmd/kubeadm/app/apis #/kubeadm?tab=subdirectories 3 | InitConfiguration: 4 | kind: InitConfiguration 5 | timeouts: 6 | controlPlaneComponentHealthCheck: 8m0s 7 | localAPIEndpoint: 8 | # advertiseAddress: 1.2.3.4 9 | # bindPort: 6443 10 | apiVersion: kubeadm.k8s.io/v1beta4 11 | # bootstrapTokens: 12 | # - groups: 13 | # - system:bootstrappers:kubeadm:default-node-token 14 | # token: abcdef.0123456789abcdef 15 | # ttl: 24h0m0s 16 | # usages: 17 | # - signing 18 | # - authentication 19 | nodeRegistration: 20 | # criSocket: unix:///var/run/dockershim.sock 21 | # criSocket: unix:///var/run/crio/crio.sock 22 | # criSocket: unix:///run/cri-dockerd.sock # https://github.com/Mirantis/cri-dockerd 23 | criSocket: unix:///run/cri-dockerd.sock 24 | # name: 25 | imagePullSerial: false 26 | kubeletExtraArgs: 27 | - name: cgroup-driver 28 | value: "systemd" 29 | - name: fail-swap-on 30 | value: "false" 31 | taints: 32 | - effect: NoSchedule 33 | key: node-role.kubernetes.io/control-plane 34 | # kubeletExtraArgs: 35 | # cgroupDriver: "cgroupfs" 36 | -------------------------------------------------------------------------------- /templates/nginx-ingress_profile1.j2: -------------------------------------------------------------------------------- 1 | controller: 2 | admissionWebhooks: 3 | enabled: false 4 | patch: 5 | image: 6 | registry: '{{ images_repo | default ("registry.k8s.io") }}' 7 | config: 8 | hide-headers: Server 9 | server-tokens: "false" 10 | hostNetwork: true 11 | image: 12 | registry: '{{ images_repo | default ("registry.k8s.io") }}' 13 | ingressClassResource: 14 | default: true 15 | kind: DaemonSet 16 | metrics: 17 | enabled: true 18 | serviceMonitor: 19 | additionalLabels: 20 | monitoring: prometheusoperator 21 | enabled: false 22 | #namespace: monitoring 23 | nodeSelector: 24 | node-role.kubernetes.io/infra: "" 25 | service: 26 | type: ClusterIP 27 | stats: 28 | enabled: true 29 | tolerations: 30 | - effect: NoSchedule 31 | key: node-role.kubernetes.io/infra 32 | - effect: PreferNoSchedule 33 | key: node-role.kubernetes.io/infra 34 | watchIngressWithoutClass: true 35 | defaultBackend: 36 | image: 37 | image: 'defaultbackend-{{ HOST_ARCH | default ("amd64") }}' 38 | rbac: 39 | create: true 40 | serviceAccount: 41 | create: true 42 | 43 | #https://github.com/kubernetes/ingress-nginx/blob/master/charts/ingress-nginx/Chart.yaml#L5 44 | # PARAMS explained: https://kubernetes.github.io/ingress-nginx/deploy/baremetal/ and https://github.com/kubernetes/ingress-nginx/blob/main/charts/ingress-nginx/values.yaml -------------------------------------------------------------------------------- /roles/post_deploy/tasks/taints.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: tainting as per taint_for_label mapping 3 | environment: 4 | KUBECONFIG: /etc/kubernetes/admin.conf 5 | shell: 'kubectl taint nodes --selector {{ item.label }} {{ item.label }}:{{ item.taint }} --overwrite' 6 | with_items: "{{ taint_for_label }}" 7 | when: 8 | - groups['all'] | length > 1 9 | tags: 10 | - taint 11 | 12 | - block: 13 | - name: get name of primary_master when single node cluster 14 | environment: 15 | KUBECONFIG: /etc/kubernetes/admin.conf 16 | shell: 'kubectl get no -o=jsonpath="{.items[0].metadata.name}"' 17 | register: result_primary_master_name 18 | 19 | - name: when cluster is one machine only, remove NoSchedule taint from master 20 | ## TODO: Use InitConfiguration to remove the taint on master, with the same condition. 21 | environment: 22 | KUBECONFIG: /etc/kubernetes/admin.conf 23 | shell: 'kubectl taint nodes {{ result_primary_master_name.stdout_lines[0] }} {{ item }} --overwrite' 24 | with_items: #'{{ taints_master }}' 25 | - 'node-role.kubernetes.io/control-plane:NoSchedule-' 26 | - 'node-role.kubernetes.io/control-plane=:PreferNoSchedule' 27 | - 'node-role.kubernetes.io/infra=:PreferNoSchedule' 28 | ignore_errors: true 29 | tags: 30 | - taints 31 | when: 32 | - groups['all'] | length == 1 33 | tags: 34 | - taints 35 | 36 | -------------------------------------------------------------------------------- /roles/storage/tasks/remove_pvs.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # - hosts: master 3 | # become: yes 4 | # become_method: sudo 5 | # environment: 6 | # KUBECONFIG: /etc/kubernetes/admin.conf 7 | # tags: 8 | # - reset 9 | # - node 10 | # tasks: 11 | 12 | - block: 13 | - name: get all pvcs 14 | shell: kubectl get --all-namespaces pvc --no-headers -- | awk '{print "--namespace " $1 " pvc/" $2}' 15 | register: command_results 16 | ignore_errors: true 17 | changed_when: false 18 | 19 | - name: delete all pvcs 20 | environment: 21 | KUBECONFIG: /etc/kubernetes/admin.conf 22 | command: kubectl delete {{ item }} 23 | ignore_errors: true 24 | with_items: "{{command_results.stdout_lines}}" 25 | 26 | - name: wait till all pvcs are removed/cleaned 27 | shell: "kubectl get pvc --no-headers -- || true" 28 | register: command_result 29 | until: command_result.stdout == "" 30 | retries: 10 31 | delay: 3 32 | ignore_errors: true 33 | changed_when: false 34 | 35 | - name: wait till all pvs are removed/cleaned 36 | shell: "kubectl get pv --no-headers -- || true" 37 | register: command_result 38 | until: command_result.stdout == "" 39 | retries: "{{ RETRIES | default(40) }}" 40 | delay: 3 41 | ignore_errors: true 42 | changed_when: false 43 | 44 | when: storage.delete_pvs is defined and storage.delete_pvs 45 | environment: 46 | KUBECONFIG: /etc/kubernetes/admin.conf 47 | tags: 48 | - reset 49 | 50 | -------------------------------------------------------------------------------- /roles/keepalived/tasks/main.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Configure keepalived for the masters. 3 | # 4 | --- 5 | - name: "keepalived from package" 6 | block: 7 | - name: Install keepalived via package manager 8 | package: name=keepalived-{{ KEEPALIVED_VERSION }} state=present 9 | 10 | - name: Enable and start keepalived service 11 | service: name=keepalived enabled=yes state=restarted 12 | when: custom.networking.masterha_vip_keepalived_deploy_type == 'package' 13 | 14 | - name: Copy check script 15 | template: src=check_apiserver.sh.j2 dest=/etc/keepalived/check_apiserver.sh owner=root group=root mode=0755 16 | 17 | - name: Generate configuraton file 18 | template: src=keepalived.conf.j2 dest=/etc/keepalived/keepalived.conf 19 | 20 | - name: keepalived with docker image 21 | block: 22 | - name: cleanup previous keepalived 23 | shell: docker rm -f keepalived 24 | ignore_errors: true 25 | 26 | - name: Use keepalived from docker image 27 | shell: docker run --restart Always --name keepalived --env KEEPALIVED_INTERFACE=$(ip route | grep default | head -1 | cut -d' ' -f5) --env KEEPALIVED_PASSWORD='d0cker' --cap-add=NET_ADMIN --net=host --volume /etc/keepalived/keepalived.conf:/container/service/keepalived/assets/keepalived.conf --volume /etc/keepalived/check_apiserver.sh:/etc/keepalived/check_apiserver.sh --detach {{ masterha_vip_keepalived_docker_image | default ("osixia/keepalived:2.0.17") }} --copy-service # --loglevel debug 28 | register: docker_result 29 | 30 | when: custom.networking.masterha_vip_keepalived_deploy_type == 'docker' 31 | 32 | -------------------------------------------------------------------------------- /other_tools/k8s_cli_tools.sh: -------------------------------------------------------------------------------- 1 | 2 | ######################## 3 | ######################## 4 | echo "kubectx" 5 | 6 | sudo git clone -b master --single-branch https://github.com/ahmetb/kubectx.git /opt/kubectx 7 | sudo ln -s /opt/kubectx/kubectx /usr/local/bin/kubectx 8 | sudo ln -s /opt/kubectx/kubens /usr/local/bin/kubens 9 | 10 | # Bash completions 11 | COMPDIR=$(pkg-config --variable=completionsdir bash-completion) 12 | sudo ln -sf /opt/kubectx/completion/kubens.bash $COMPDIR/kubens 13 | sudo ln -sf /opt/kubectx/completion/kubectx.bash $COMPDIR/kubectx 14 | 15 | # Zsh completions 16 | mkdir -p ~/.oh-my-zsh/completions 17 | chmod -R 755 ~/.oh-my-zsh/completions 18 | ln -s /opt/kubectx/completion/kubectx.zsh ~/.oh-my-zsh/completions/_kubectx.zsh 19 | ln -s /opt/kubectx/completion/kubens.zsh ~/.oh-my-zsh/completions/_kubens.zsh 20 | 21 | ######################## 22 | ######################## 23 | echo "krew (kubectl krew package manager)" 24 | tmpdir="$(mktemp -d)" 25 | cd $tmpdir 26 | curl -fsSLO "https://github.com/kubernetes-sigs/krew/releases/download/v0.3.2/krew.{tar.gz,yaml}" 27 | tar zxvf krew.tar.gz 28 | ./krew-"$(uname | tr '[:upper:]' '[:lower:]')_amd64" install \ 29 | --manifest=krew.yaml --archive=krew.tar.gz 30 | cd - 31 | rm -rf $tmpdir 32 | sudo cp ~/.krew/bin/kubectl-krew /usr/local/bin 33 | 34 | ######################## 35 | ######################## 36 | echo "kubeval" 37 | curl -sSL https://github.com/instrumenta/kubeval/releases/download/0.14.0/kubeval-linux-amd64.tar.gz | sudo tar -xzf - -C /usr/local/bin/ 38 | sudo chmod +x /usr/local/bin/kubeval 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /docs/add-remove-nodes.md: -------------------------------------------------------------------------------- 1 | # Adding nodes (either secondary-masters or infra or compute) 2 | It's important to understand that both secondary-masters and nodes are treated the same. 3 | E.g. Adding secondary-masters to existing cluster - it runs the same flow it's actually working even when defined from first run: it does initially the primary-master, and then adds all those in the [secondary-masters] 4 | the steps to add (either additional master or additional compute nodes) 5 | 6 | Here are the steps to be performed to add nodes post install: 7 | 1. prepare the hosts file and make sure: 8 | a. it has the [primary-master] defined 9 | b. in the other groups it has **ONLY** the machines you want to add (either masters under the [secondary-masters] or nodes under [nodes] 10 | 2. run: `ansible-playbook -i hosts site.yml --tags node` 11 | 12 | # Removing nodes: 13 | To remove a node, do similarly: 14 | 1. Put in the inventory (hosts file), under [nodes] group only the machines you wish to reset(remove) as well as populate the [primary-master] with the proper primary-master machine. 15 | 2. `ansible-playbook -i hosts site.yml --tags node` 16 | 17 | Note: the primary-master won't be touched, but it's required in order to properly drain the nodes before reset). 18 | 19 | # Removig secondary-masters: 20 | For safety reasons, currently it was decided that only nodes can be removed, while any [master] (being it primary-master or secondary-masters ) won't be automatically removed. 21 | If you want to remove a machine that is secondary-master, you have to **move** it under [nodes] group, (and remove it from the [secondary-masters] group) - and follow the "Removing nodes" steps above. 22 | -------------------------------------------------------------------------------- /roles/common/tasks/decide_master_name.yml: -------------------------------------------------------------------------------- 1 | ## Decide how to approach the master: inventory or force fqdn (for non MasterHA cases); via masterha_ip,MasterHA (for MasterHA cases) 2 | ## TODO: 3 | # if inventory_hostname is already fqdn, do not use ansible_fqdn, as ansible_fqdn is problematic: 4 | # https://github.com/ReSearchITEng/kubeadm-playbook/issues/81 ( https://github.com/ansible/ansible/issues/38777 ) 5 | - block: 6 | - name: by default set master name to inventory definition (no MasterHA case) 7 | set_fact: master_name={{ groups['primary-master'][0] }} 8 | when: 9 | - groups['masters'] | length == 1 10 | 11 | - name: force use fqdn for master name (no MasterHA case) if inventory was not defined fqdn and we have to discover... 12 | set_fact: master_name={{ hostvars[groups['primary-master'][0]]['ansible_fqdn'] }} 13 | when: 14 | - custom.networking.fqdn.always or custom.networking.fqdn.master 15 | - groups['masters'] | length == 1 16 | - '"." not in groups["primary-master"][0]' # meaning it was not defined with fqdn, but we would like to force fqdn (per above custom.networking.fqdn condition) 17 | 18 | - name: force use fqdn for master name (MasterHA case) 19 | set_fact: master_name={{ custom.networking.masterha_fqdn }} 20 | when: 21 | - custom.networking.fqdn.always or custom.networking.fqdn.master 22 | - groups['masters'] | length > 1 23 | 24 | - name: force use ip for master name (MasterHA case) 25 | set_fact: master_name={{ custom.networking.masterha_ip }} 26 | when: 27 | - not custom.networking.fqdn.always 28 | - not custom.networking.fqdn.master 29 | - groups['masters'] | length > 1 30 | 31 | tags: 32 | - always 33 | -------------------------------------------------------------------------------- /group_vars/all/KubeProxyConfiguration.yml: -------------------------------------------------------------------------------- 1 | #https://pkg.go.dev/k8s.io/kube-proxy/config/v1alpha1?tab=doc#KubeProxyConfiguration 2 | #check latest api ver here: https://pkg.go.dev/k8s.io/kube-proxy/config/ 3 | KubeProxyConfiguration: 4 | apiVersion: kubeproxy.config.k8s.io/v1alpha1 5 | # bindAddress: 0.0.0.0 6 | # clientConnection: 7 | # acceptContentTypes: "" 8 | # burst: 10 9 | # contentType: application/vnd.kubernetes.protobuf 10 | # kubeconfig: /var/lib/kube-proxy/kubeconfig.conf 11 | # qps: 5 12 | # clusterCIDR: "" 13 | clusterCIDR: "{{ POD_NETWORK_CIDR }}" 14 | ##podSubnet -> Calico is now able to autodetect. If calico is used, this can be commented out. 15 | # Not required: if here is empty it will read from the ClusterConfiguration. 16 | # configSyncPeriod: 15m0s 17 | # conntrack: 18 | # max: null 19 | # maxPerCore: 32768 20 | # min: 131072 21 | # tcpCloseWaitTimeout: 1h0m0s 22 | # tcpEstablishedTimeout: 24h0m0s 23 | # enableProfiling: false 24 | # healthzBindAddress: 0.0.0.0:10256 25 | # hostnameOverride: "" 26 | # iptables: 27 | # masqueradeAll: false 28 | # masqueradeBit: 14 29 | # minSyncPeriod: 0s 30 | # syncPeriod: 30s 31 | ipvs: 32 | strictARP: true 33 | # excludeCIDRs: null 34 | # minSyncPeriod: 0s 35 | # scheduler: "" 36 | # syncPeriod: 30s 37 | kind: KubeProxyConfiguration 38 | # metricsBindAddress: 127.0.0.1:10249 39 | # mode: "" 40 | mode: "ipvs" 41 | ## Leave mode undefined or "" for the default, which usually is the old iptables method 42 | # nodePortAddresses: null 43 | # oomScoreAdj: -999 44 | # portRange: "" 45 | # resourceContainer: /kube-proxy 46 | # udpIdleTimeout: 250ms 47 | -------------------------------------------------------------------------------- /pre_sanity.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - hosts: all 3 | become: yes 4 | become_method: sudo 5 | tags: 6 | - pre_sanity 7 | - master 8 | - node 9 | tasks: 10 | 11 | - name: check 127.0.1.1 /etc/hosts 12 | # For a system with a permanent IP address, that permanent IP address should be used here instead of 127.0.1.1 13 | # https://www.debian.org/doc/manuals/debian-reference/ch05.en.html#_the_hostname_resolution 14 | # This is a good rule, while not mandatory. If you want, you can skip this task with --skip-tags check_etc_hosts 15 | command: grep '^\s*127.0.1.1' /etc/hosts 16 | register: command_result 17 | failed_when: command_result.stdout != "" 18 | tags: 19 | - check_etc_hosts 20 | changed_when: false 21 | 22 | - name: check docker is running 23 | command: 'docker info' 24 | tags: 25 | - check_docker 26 | when: docker_setup is defined and docker_setup=false 27 | changed_when: false 28 | 29 | - name: check docker hello world 30 | shell: docker run --rm -i hello-world | awk '/Hello/ {print $1}' 31 | register: command_result 32 | failed_when: command_result.stdout != "Hello" 33 | tags: 34 | - check_docker 35 | when: docker_setup is defined and docker_setup=false 36 | changed_when: false 37 | 38 | # Ideally, to ensure there is no x509 certificate error like: 39 | #' docker pull gcr.io/google_containers/kube-apiserver-amd64:v1.7.2 40 | #Trying to pull repository gcr.io/google_containers/kube-apiserver-amd64 ... 41 | #Get https://gcr.io/v1/_ping: x509: certificate signed by unknown authority ' 42 | # yum check-update ca-certificates; (($?==100)) && yum update ca-certificates || yum reinstall ca-certificates 43 | # update-ca-trust extract 44 | 45 | # Check ports: https://kubernetes.io/docs/setup/independent/install-kubeadm/ 46 | -------------------------------------------------------------------------------- /roles/common/tasks/selinux.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ### SELINUX 3 | 4 | - block: 5 | - name: Disable selinux 6 | selinux: 7 | policy: "{{ selinux_policy | default ('targeted') }}" 8 | state: "{{ selinux_state | default ('permissive') }}" 9 | register: command_result 10 | # changed_when: command_result.reboot_required # works in Ansible 2.4+. To support 2.3, we keep current solution which reboots when there is any time of change... 11 | # changed_when: "'state change will take effect next reboot' in command_result.stdout" 12 | # notify: 13 | # - Reboot 14 | 15 | - name: trigger Reboot when required due to selinux changes 16 | command: /bin/true 17 | notify: 18 | - Reboot 19 | #when: command_result.reboot_required 20 | when: command_result.changed 21 | 22 | - name: flush_handlers (trigger reboot when required) 23 | meta: flush_handlers 24 | changed_when: command_result.changed 25 | #changed_when: command_result.reboot_required 26 | 27 | - name: Pause till machine is up again 28 | pause: 29 | seconds: 30 30 | when: 31 | - command_result.reboot_required | default( false ) 32 | - allow_restart | default ( false ) # Is ansible able to keep this state after reboot? 33 | 34 | - name: Wait for server come back from restart (if selinux required it) 35 | local_action: wait_for 36 | args: 37 | host: "{{ inventory_hostname }}" 38 | port: 22 39 | state: started 40 | delay: 15 41 | timeout: 180 42 | 43 | #Starting Ansible 2.3 one can do: 44 | #- name: Wait for system to become reachable # Ansible 2.3+ 45 | # wait_for_connection: 46 | # timeout: 200 47 | 48 | - name: Gather facts for first time after restart 49 | setup: 50 | 51 | tags: 52 | - selinux 53 | when: 54 | - ansible_os_family == "RedHat" # Is ansible able to keep this state after reboot? 55 | - selinux_state is defined 56 | -------------------------------------------------------------------------------- /roles/tools/tasks/cluster_sanity.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - block: 3 | ## Decide how to approach the master: inventory or force fqdn (for non MasterHA cases); via masterha_ip,MasterHA (for MasterHA cases) 4 | - name: decide_master_name 5 | include_role: 6 | name: common 7 | tasks_from: decide_master_name 8 | 9 | 10 | - name: "Wait 300 seconds for master at {{ master_name }}:{{ InitConfiguration.localAPIEndpoint.bindPort | default (6443) }} to become open (MasterHA)" 11 | wait_for: 12 | port: "{{ InitConfiguration.localAPIEndpoint.bindPort | default (6443) }}" 13 | host: "{{ master_name }}" 14 | delay: 1 15 | timeout: 300 16 | ### END CODE DUPLICATION 17 | 18 | - name: kubectl get nodes 19 | shell: "kubectl get nodes --no-headers | grep -v -w 'Ready' || true " 20 | register: command_result 21 | tags: 22 | - node_sanity 23 | until: command_result.stdout == "" 24 | retries: "{{ RETRIES | default(40) }}" 25 | delay: 3 26 | changed_when: false 27 | 28 | - name: kubectl get pods 29 | shell: "kubectl get --namespace kube-system pods --no-headers | grep -v -w 'Running' || true " 30 | register: command_result 31 | tags: 32 | - pod_sanity 33 | until: command_result.stdout == "" 34 | retries: "{{ RETRIES | default(40) }}" 35 | delay: 3 36 | changed_when: false 37 | 38 | # >= and not == because we may use this role to only to add nodes also. 39 | - name: Check all nodes were registered 40 | shell: "/usr/bin/test $(kubectl get nodes | grep -ow Ready | wc -l) -ge {{ groups['all'] | length }}" 41 | register: command_result 42 | retries: 30 43 | delay: 3 44 | until: command_result is success 45 | changed_when: false 46 | tags: 47 | - cluster_info 48 | - cluster_status 49 | - node_sanity 50 | 51 | environment: 52 | KUBECONFIG: /etc/kubernetes/admin.conf 53 | tags: 54 | - sanity 55 | - cluster_sanity 56 | 57 | -------------------------------------------------------------------------------- /roles/common/tasks/kube_config.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Make sure folder /etc/kubernetes/ exists 3 | file: path=/etc/kubernetes state=directory mode=0755 4 | 5 | ### prepare cloud-config 6 | - block: 7 | - name: prepare cloud-config file for cloudProvider {{ClusterConfiguration.cloudProvider}} 8 | set_fact: fact_cloud_provider='--cloud-provider={{ ClusterConfiguration.cloudProvider }} --cloud-config=/etc/kubernetes/cloud-config' 9 | 10 | - name: prepare cloud-config file 11 | template: 12 | src: cloud-config.j2 13 | dest: /etc/kubernetes/cloud-config 14 | force: yes 15 | tags: 16 | - init 17 | # notify: 18 | # - Restart kubelet 19 | 20 | # BLOCK ENDS with its condition: 21 | when: 22 | - ClusterConfiguration is defined 23 | - ClusterConfiguration.cloudProvider is defined 24 | - inventory_hostname in groups['masters'] 25 | tags: 26 | - kubelet 27 | 28 | ### kubeadm settings: 29 | - name: Replace {{ClusterConfiguration.networking.dnsDomain}} under cluster-domain in kubelet.service.d/10-kubeadm.conf 30 | replace: 31 | dest: /etc/systemd/system/kubelet.service.d/10-kubeadm.conf 32 | regexp: '(--cluster-domain=)[A-Za-z0-9\-\.]+(.*)' 33 | replace: '\1{{ ClusterConfiguration.networking.dnsDomain | default("cluster.local") }}\2' 34 | #backup: yes 35 | when: ClusterConfiguration is defined and ClusterConfiguration.networking is defined and ClusterConfiguration.networking.dnsDomain is defined 36 | notify: 37 | - Reload systemd # already by other handler 38 | - Restart kubelet 39 | tags: 40 | - kubeadm 41 | - kubelet 42 | 43 | - name: Create kubeadm sysctl file 44 | copy: src=90-kubeadm.conf dest=/etc/sysctl.d/90-kubeadm.conf 45 | tags: 46 | - kubeadm 47 | - kubelet 48 | 49 | - name: Set sysctl settings 50 | command: sysctl --system 51 | ignore_errors: true # ignore errors which appear in vm simulated with docker 52 | tags: 53 | - kubeadm 54 | - kubelet 55 | 56 | -------------------------------------------------------------------------------- /roles/tools/tasks/reset_drain.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #- hosts: master 3 | # gather_facts: False 4 | # become: yes 5 | # become_method: sudo 6 | 7 | ## TODO: 8 | # 1. We should make sure master is tainted so pods will not move there either. 9 | # 2. k get no emtpy: Now works only if we removed all nodes (which might not be the case; should be limited to machines in groups.node ) 10 | 11 | - block: 12 | #- set_fact: 13 | # env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/bin/" }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}' 14 | # tags: 15 | # - always 16 | 17 | - name: get nodes 18 | shell: "kubectl get nodes --no-headers -- | cut -f 1 -d ' '" 19 | register: command_results 20 | changed_when: false 21 | 22 | - name: drain nodes 23 | command: kubectl drain {{ item }} --delete-local-data --force --ignore-daemonsets --grace-period=5 --timeout=60s 24 | #with_items: "{{command_results.stdout_lines}}" 25 | with_items: groups.nodes 26 | ignore_errors: true 27 | 28 | - name: delete nodes 29 | command: kubectl delete node {{ item }} 30 | #with_items: "{{command_results.stdout_lines}}" 31 | with_items: groups.nodes 32 | ignore_errors: true 33 | 34 | #shell: "kubectl get nodes -o jsonpath='{.items[*].metadata.name}'" 35 | #with_items: "{{ groups['nodes'] }}" 36 | 37 | - name: kubectl get nodes must be empty by now (if target was full cluster and not partial update) 38 | shell: "kubectl get nodes --no-headers | grep -v 'node-role.kubernetes.io/control-plane' | grep -v -w 'Ready' || true" 39 | register: command_result 40 | until: command_result.stdout == "" 41 | retries: 10 42 | delay: 3 43 | ignore_errors: true 44 | changed_when: false 45 | 46 | environment: 47 | KUBECONFIG: /etc/kubernetes/admin.conf 48 | when: reset_gracefully is defined and reset_gracefully 49 | tags: 50 | - reset 51 | - drain 52 | 53 | -------------------------------------------------------------------------------- /other_tools/dockerize.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | set -e 3 | # Ubuntu 4 | #sudo apt-get update 5 | #sudo apt-get install -y git docker #ansible 6 | 7 | # CentOS/RHEL 8 | #sudo yum install -y git docker ansible curl tar zip unzip 9 | #ssh-copy-id 10 | sudo yum install -y docker iptables-services 11 | sudo sh -c 'echo EXTRA_STORAGE_OPTIONS=\"--storage-opt overlay2.override_kernel_check=true\">/etc/sysconfig/docker-storage-setup' 12 | sudo sh -c 'echo STORAGE_DRIVER=\"overlay2\" >>/etc/sysconfig/docker-storage-setup' 13 | sudo rm -f /etc/sysconfig/docker-storage || true 14 | 15 | # Firewalld (and selinux) do not play well with k8s (and especially with kubeadm). 16 | # NOTE: A machine reboot may be required if SELinux was enforced previously 17 | systemctl stop firewalld || true 18 | systemctl disable firewalld || true 19 | systemctl mask firewalld || true 20 | systemctl start iptables 21 | systemctl enable iptables 22 | systemctl unmask iptables 23 | 24 | sudo systemctl stop docker 25 | sudo systemctl start docker-storage-setup 26 | sudo systemctl restart docker 27 | sudo systemctl enable docker 28 | #sudo chown vagrant /var/run/docker.sock # optional 29 | 30 | # SET Default Policies to ACCEPT 31 | iptables -P FORWARD ACCEPT 32 | iptables -P INPUT ACCEPT 33 | iptables -P OUTPUT ACCEPT 34 | 35 | # Remove the Default REJECT rules, so it will hit the default Policy 36 | iptables -D INPUT -j REJECT --reject-with icmp-host-prohibited 37 | iptables -D FORWARD -j REJECT --reject-with icmp-host-prohibited 38 | 39 | # If someone wants to enable only some ports (there will be many, and most of them dynamic), here is a start: 6443 (k8s api), 10250, etc. (maybe both tcp and udp...) 40 | #sudo iptables -I INPUT -p tcp --dport 6443 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT 41 | #sudo iptables -I OUTPUT -p tcp --sport 6443 -m conntrack --ctstate ESTABLISHED -j ACCEPT 42 | 43 | # DEBUG LIVE WITH: 44 | # watch -n1 iptables -vnL 45 | -------------------------------------------------------------------------------- /roles/post_deploy/tasks/network.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # - hosts: master 3 | # become: yes 4 | # become_method: sudo 5 | # tasks: 6 | - set_fact: 7 | env_kc: '{{ proxy_env |default({}) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}' 8 | tags: 9 | - always 10 | 11 | # Not clear if mandatory for weave: 12 | # https://www.weave.works/docs/net/latest/kubernetes/kube-addon/ 13 | - name: Copy weave net script 14 | environment: '{{ proxy_env | default ({}) }}' 15 | get_url: url=https://raw.githubusercontent.com/weaveworks/weave/master/weave dest=/usr/local/bin/weave mode=u+rxw force=yes 16 | ignore_errors: true # Currently there is no way to check if the user is using weave 17 | when: 18 | - podNetwork is defined 19 | - podNetwork == "weave" 20 | tags: 21 | - weave 22 | - network 23 | 24 | - name: Create cluster network (when is via manifest instead of helm chart) 25 | command: /usr/bin/kubectl apply -f {{ item }} 26 | with_items: "{{ k8s_network_addons_urls }}" 27 | #with_items: "{{ k8s_network_addons_urls | default ([]) }}" 28 | #with_items: '{{ k8s_network_addons_urls | default ("https://docs.projectcalico.org/manifests/calico.yaml") }}' 29 | environment: '{{env_kc}}' 30 | when: 31 | - k8s_network_addons_urls is defined 32 | # - k8s_network_addons_urls | length > 0 33 | - ( k8s_network_addons_urls | default([]) ) | length > 0 34 | tags: 35 | - k8s_network_addons 36 | - network 37 | 38 | - name: sanity - wait for alls pod to be running (besides kube-dns for now, as it might be forced to a node if master did not allow it due to tains) 39 | environment: 40 | KUBECONFIG: /etc/kubernetes/admin.conf 41 | shell: "kubectl get --namespace kube-system pods --no-headers | grep -v -w 'Running' | grep -v 'kube-dns' | grep -v 'coredns' || true " 42 | register: command_result 43 | tags: 44 | - k8s_network_addons 45 | - sanity 46 | - network 47 | until: command_result.stdout == "" 48 | retries: "{{ RETRIES | default(40) }}" 49 | delay: 3 50 | changed_when: false 51 | -------------------------------------------------------------------------------- /docs/portable_machine_setup.md: -------------------------------------------------------------------------------- 1 | # What is included: 2 | If the inventory has only [primary-master] section populated, it understands it's a one machine cluster (at least for now). 3 | The playbook will do most of the settings accordingly. 4 | 5 | # ingress with local binding 6 | For ingress controller to listen to 127.*, you may want to use option 2 of the ingress controller defined in addons.yml 7 | 8 | # Portable IP address: 9 | Should you have this installation in a vm, and your IP address changes, you may want to make it "portable", so it does not depend on the ip address.. 10 | 11 | ``` 12 | echo "make installation agnostic to ip address" 13 | CURRENT_IP=`hostname -I | cut -d" " -f1` 14 | sudo perl -p -i -e "s/${CURRENT_IP}/127.0.0.1/g" ` find /etc/kubernetes/ -type f \( -name \*.yaml -o -name \*.conf \) ` 15 | ``` 16 | 17 | # add ingresses to hosts file 18 | In such cases, most probably you don't have a wildcard dns either, so create similar entries in the /etc/hosts file. 19 | (of course, these entries have to be in sync with the group_vars/all/network.yml (and, if you customized, eventually hosts/domains defined in addons.yml) . 20 | # dns entries for ingresses. 21 | ``` 22 | echo "127.0.1.2 dashboard.k8s.local.example.com prometheus.k8s.local.example.com grafana.k8s.corp.example.com" | sudo tee -a /etc/hosts >/dev/null 23 | ``` 24 | 25 | # compress image 26 | In case you want to ship such a portable vm image with k8s inside, you may want to make it as small as possible before shutdown. 27 | 28 | ``` 29 | sudo systemctl stop kubelet || true 30 | sudo systemctl disable kubelet || true 31 | docker rmi -f $(docker images -q) 32 | ``` 33 | 34 | # If you want to temporary turn off your kubernetes (keep only its configuration), do: 35 | ``` 36 | sudo systemctl stop kubelet; sudo systemctl disable kubelet; docker ps | grep kube | cut -d" " -f1 | xargs docker stop ; docker ps | grep k8s | cut -d" " -f1 | xargs docker stop; docker ps 37 | ``` 38 | 39 | # To save space, you may want to also delete some or even all docker images which are not currently used: 40 | `docker rmi $(docker images -q)` 41 | 42 | # other tipcs: 43 | you may want to do `sudo fstrim /` 44 | -------------------------------------------------------------------------------- /roles/storage/tasks/vsphere.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ## vsphere/vmware/vcenter 3 | - block: 4 | - set_fact: 5 | env_kc: '{{ proxy_env |default({}) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}' 6 | tags: 7 | - always 8 | 9 | - name: Create vsphere storage class 10 | command: /usr/bin/kubectl apply -f {{ item }} 11 | with_items: "{{ vsphere_storageclass_urls | default ([]) }}" 12 | environment: '{{env_kc}}' 13 | when: 14 | - vsphere_storageclass_urls is defined 15 | tags: 16 | - vsphere_storageclass_urls 17 | - vsphere 18 | 19 | - block: 20 | - name: vsphere_bug_fix github.com/vmware/kubernetes/issues/495 21 | copy: src=vsphere_bug_fix.sh dest=/tmp/vsphere_bug_fix.sh mode='0755' 22 | 23 | - name: execute vsphere_bug_fix.sh 24 | environment: '{{env_kc}}' 25 | shell: /tmp/vsphere_bug_fix.sh 26 | register: list 27 | 28 | - name: build machine reboot list due to vsphere_bug 29 | add_host: name={{item}} group=mustrebootlist 30 | with_items: 31 | '{{list.stdout_lines}}' 32 | 33 | # NOW it's done via .sh, but in future maybe do: 34 | # - name: "vpshere bug (No VM found) => so we need to delete master(s) (and restart kubelet)" 35 | # environment: '{{env_kc}}' 36 | # command: kubectl delete node {{ inventory_hostname_short }} 37 | # ignore_errors: true 38 | # notify: # kubelet restart is needed for 1.14+, for cloud=vsphere, otherwise we get: "Unable to find VM by UUID. VM UUID:" or Error "No VM found" node info for node 39 | # - Restart kubelet 40 | 41 | # # - meta: flush_handlers is not enough, as sometimes delete was not with success, so forcing trigger like this: 42 | # - name: "vpshere bug (No VM found) =>trigger kubelet restart (after master node deleted)" 43 | # debug: msg="vpshere bug =>trigger kubelet restart (after master node deleted)" 44 | # notify: 45 | # - Restart kubelet 46 | # changed_when: true 47 | when: 48 | - vsphere_bug_fix is defined 49 | - vsphere_bug_fix 50 | tags: 51 | - vsphere_bug_fix 52 | 53 | when: 54 | - ClusterConfiguration.cloudProvider is defined 55 | - ClusterConfiguration.cloudProvider == 'vsphere' 56 | tags: 57 | - vsphere 58 | 59 | -------------------------------------------------------------------------------- /group_vars/all/ClusterConfiguration.yml: -------------------------------------------------------------------------------- 1 | #https://pkg.go.dev/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3?tab=doc#ClusterConfiguration 2 | #check latest api ver here: https://pkg.go.dev/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm?tab=subdirectories 3 | ClusterConfiguration: 4 | apiVersion: kubeadm.k8s.io/v1beta4 5 | apiServer: 6 | #extraEnvs: [] 7 | extraArgs: # https://kubernetes.io/docs/admin/kube-apiserver/ 8 | - name: endpoint-reconciler-type 9 | value: "lease" # needs k8s 1.9+ More info: https://kubernetes.io/docs/admin/high-availability/building/#endpoint-reconciler 10 | - name: service-node-port-range 11 | value: '79-32767' #Default 32000-32767 ; Ensure the local ports on all nodes are set accordingly 12 | # auditPolicy: 13 | # logDir: /var/log/kubernetes/audit 14 | # logMaxAge: 2 15 | # path: "" 16 | # certificatesDir: /etc/kubernetes/pki 17 | # clusterName: kubernetes 18 | clusterName: "{{ CLUSTER_NAME }}" 19 | # controlPlaneEndpoint: "" 20 | # etcd: 21 | # local: 22 | # serverCertSANs: 23 | # - "10.33.46.215" 24 | # extraArgs: 25 | # cipher-suites: TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 26 | # dataDir: /var/lib/etcd 27 | # image: "" 28 | imageRepository: '{{ images_repo | default ("registry.k8s.io") }}' 29 | kind: ClusterConfiguration 30 | kubernetesVersion: "v{{ KUBERNETES_VERSION }}" 31 | # dns: 32 | networking: 33 | # dnsDomain: cluster.local 34 | serviceSubnet: "{{ SERVICE_NETWORK_CIDR }}" 35 | # podSubnet: "" 36 | podSubnet: "{{ POD_NETWORK_CIDR }}" 37 | ##podSubnet -> Calico is now able to autodetect. If calico is used, this can be commented out. 38 | #cloudProvider: 'vsphere' # WE NEED THIS Even after 1.11 (v1alpha2) (due to a bug in ansible on vars with "-"); this is also required: govc vm.change -e="disk.enableUUID=1" -vm= and requires setup of cloud_config below 39 | # controllerManager: 40 | # controllerManagerExtraArgs: # https://kubernetes.io/docs/admin/kube-controller-manager/ 41 | # pod-eviction-timeout: '3m00s' # Default 5m0s #PodEvictionTimeout controls grace peroid for deleting pods on failed nodes. Takes time duration string (e.g. '300ms' or '2m30s'). Valid time units are 'ns', 'us', 'ms', 's', 'm', 'h'. 42 | # : 43 | 44 | 45 | -------------------------------------------------------------------------------- /roles/storage/tasks/rook_reset.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # - hosts: master 3 | # become: yes 4 | # become_method: sudo 5 | # tags: 6 | # - rook 7 | # - reset 8 | # tasks: 9 | 10 | - block: 11 | - set_fact: 12 | env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin" }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}' 13 | 14 | - name: deploy rook operator (using helm chart) - prefered method 15 | environment: '{{env_kc}}' 16 | command: 'helm delete --purge rook-operator ' 17 | when: rook.operator_url is not defined 18 | ignore_errors: true 19 | 20 | - name: clean install rook operator 21 | environment: '{{env_kc}}' 22 | command: /usr/bin/kubectl delete {{ item }} 23 | with_items: 24 | - "-n rook po/rook-client" 25 | - "-n rook po/rook-tools" 26 | - "-n rook cm/rook-config-override" 27 | - "-n rook deploy/rook-api" 28 | - "-n rook ds/osd" 29 | - "-n rook cluster rook" 30 | - "-n rook serviceaccount rook-api" 31 | - "clusterrole rook-api" 32 | - "clusterrolebinding rook-api" 33 | - "thirdpartyresources cluster.rook.io pool.rook.io" 34 | - "secret rook-rook-user" 35 | - "namespace rook" 36 | - "sc rook-block" 37 | tags: 38 | - reset 39 | ignore_errors: true 40 | 41 | - name: clean install rook operator 42 | environment: '{{env_kc}}' 43 | command: /usr/bin/kubectl delete {{ item }} 44 | when: rook.operator_url is defined 45 | with_items: 46 | - "-f {{ rook.operator_url }}" 47 | 48 | - name: clean install rook secrets from all rook.allowed_consumer_namespaces 49 | # TODO: scan all namespaces and remove it. Do not rely on the previously defined rook.allowed_consumer_namespaces 50 | environment: '{{env_kc}}' 51 | command: /usr/bin/kubectl delete -n {{ item }} secret rook-admin 52 | when: rook.allowed_consumer_namespaces is defined 53 | with_items: "{{ rook.allowed_consumer_namespaces }}" 54 | ignore_errors: true 55 | 56 | - name: rook post cleanup/reset sanity 57 | environment: 58 | KUBECONFIG: /etc/kubernetes/admin.conf 59 | shell: "kubectl get --all-namespaces pods --no-headers | grep -v -w 'Running' || true " 60 | register: command_result 61 | tags: 62 | - sanity 63 | until: command_result.stdout == "" 64 | retries: 30 65 | delay: 3 66 | changed_when: false 67 | 68 | - name: clean rook ceph persistant storage directories 69 | file: path=/storage/rook/* state=absent force=yes 70 | tags: 71 | - uninstall 72 | when: rook.reset.storage_delete is defined and rook.reset.storage_delete 73 | 74 | when: rook is defined and rook.enabled 75 | tags: 76 | - rook 77 | - reset 78 | -------------------------------------------------------------------------------- /roles/helm/tasks/helm_reset.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #- hosts: master 3 | # become: yes 4 | # become_method: sudo 5 | # tags: 6 | # - helm_reset 7 | # - helm 8 | # - reset 9 | # tasks: 10 | 11 | - set_fact: 12 | env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/bin" }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}' 13 | tags: 14 | - helm_reset 15 | - reset 16 | - helm_purge 17 | - helm2 18 | 19 | - block: 20 | - name: helm reset - helm list all apps 21 | environment: '{{env_kc}}' 22 | shell: helm list | tail -n +2 | awk '{print $1}' 23 | register: command_results 24 | ignore_errors: true 25 | changed_when: false 26 | tags: 27 | - helm_purge 28 | 29 | - name: helm reset - helm delete purge all apps 30 | environment: '{{env_kc}}' 31 | command: helm delete --purge {{ item }} 32 | with_items: "{{ command_results.stdout_lines }}" 33 | ignore_errors: true 34 | tags: 35 | - helm_purge 36 | 37 | - name: helm reset - wait till all helm apps are deleted/purged 38 | environment: '{{env_kc}}' 39 | shell: helm list | tail -n +2 40 | register: command_result 41 | until: command_result.stdout == "" 42 | retries: 20 43 | delay: 3 44 | changed_when: false 45 | ignore_errors: true 46 | tags: 47 | - helm_purge 48 | 49 | - name: helm reset - remove/cleanup 50 | environment: '{{env_kc}}' 51 | shell: ( helm reset --force --remove-helm-home --tiller-connection-timeout 30 || true ) && sleep 2 && ( kubectl --namespace=kube-system delete --cascade=true --now=true --wait=true --ignore-not-found=true deployment tiller-deploy || true ) && ( kubectl delete service --namespace=kube-system tiller-deploy || true ) && ( kubectl delete clusterrolebinding tiller || true ) && ( kubectl delete --namespace=kube-system sa tiller ) 52 | ignore_errors: true 53 | 54 | - name: helm reset - list all k8s resources with tiller-deploy in their name 55 | environment: '{{env_kc}}' 56 | shell: kubectl --namespace=kube-system get all --no-headers | grep tiller-deploy | awk '{print $1}' 57 | register: command_results 58 | ignore_errors: true 59 | changed_when: false 60 | 61 | - name: helm reset - delete all k8s resources with tiller-deploy in their name 62 | environment: '{{env_kc}}' 63 | command: kubectl --namespace=kube-system delete --cascade=true --now=true --wait=true --ignore-not-found=true {{ item }} 64 | with_items: "{{ command_results.stdout_lines }}" 65 | ignore_errors: true 66 | tags: 67 | - helm_purge 68 | when: 69 | - full_helm_reinstall is defined and full_helm_reinstall 70 | tags: 71 | - helm_reset 72 | - reset 73 | - helm2 74 | 75 | -------------------------------------------------------------------------------- /group_vars/all/KubeletConfiguration.yml: -------------------------------------------------------------------------------- 1 | #https://pkg.go.dev/k8s.io/kubelet/config/v1beta1?tab=doc#KubeletConfiguration 2 | #https://pkg.go.dev/k8s.io/kubernetes/pkg/kubelet/apis/config/v1beta1?tab=doc#KubeletConfiguration 3 | #check latest api ver here: https://pkg.go.dev/k8s.io/kubernetes/pkg/kubelet/apis/config?tab=subdirectories 4 | # https://pkg.go.dev/k8s.io/kubelet/config #?tab=subdirectories 5 | KubeletConfiguration: 6 | #nodeRegistration: 7 | # kubeletExtraArgs: 8 | # --cloud-provider={{ kubeadm_master_config.cloudProvider }} --cloud-config=/etc/kubernetes/cloud-config 9 | # address: 0.0.0.0 10 | apiVersion: kubelet.config.k8s.io/v1beta1 11 | # authentication: 12 | # anonymous: 13 | # enabled: false 14 | # webhook: 15 | # cacheTTL: 2m0s 16 | # enabled: true 17 | # x509: 18 | # clientCAFile: /etc/kubernetes/pki/ca.crt 19 | # authorization: 20 | # mode: Webhook 21 | # webhook: 22 | # cacheAuthorizedTTL: 5m0s 23 | # cacheUnauthorizedTTL: 30s 24 | cgroupDriver: systemd #cgroupfs # systemd should be the new default with 1.24+ 25 | # cgroupsPerQOS: true 26 | # clusterDNS: 27 | # - 10.96.0.10 28 | # clusterDomain: cluster.local 29 | # configMapAndSecretChangeDetectionStrategy: Watch 30 | # containerLogMaxFiles: 5 31 | # containerLogMaxSize: 10Mi 32 | # contentType: application/vnd.kubernetes.protobuf 33 | # cpuCFSQuota: true 34 | # cpuCFSQuotaPeriod: 100ms 35 | # cpuManagerPolicy: none 36 | # cpuManagerReconcilePeriod: 10s 37 | # enableControllerAttachDetach: true 38 | # enableDebuggingHandlers: true 39 | # enforceNodeAllocatable: 40 | # - pods 41 | # eventBurst: 10 42 | # eventRecordQPS: 5 43 | # evictionHard: 44 | # imagefs.available: 15% 45 | # memory.available: 100Mi 46 | # nodefs.available: 10% 47 | # nodefs.inodesFree: 5% 48 | # evictionPressureTransitionPeriod: 5m0s 49 | # failSwapOn: true 50 | # fileCheckFrequency: 20s 51 | # hairpinMode: promiscuous-bridge 52 | # healthzBindAddress: 127.0.0.1 53 | # healthzPort: 10248 54 | # httpCheckFrequency: 20s 55 | imageGCHighThresholdPercent: 70 56 | imageGCLowThresholdPercent: 60 57 | # imageMinimumGCAge: 2m0s 58 | # iptablesDropBit: 15 59 | # iptablesMasqueradeBit: 14 60 | kind: KubeletConfiguration 61 | # kubeAPIBurst: 10 62 | # kubeAPIQPS: 5 63 | # makeIPTablesUtilChains: true 64 | # maxOpenFiles: 1000000 65 | # maxPods: 110 66 | # nodeLeaseDurationSeconds: 40 67 | # nodeStatusUpdateFrequency: 10s 68 | # oomScoreAdj: -999 69 | # podPidsLimit: -1 70 | # port: 10250 71 | # registryBurst: 10 72 | # registryPullQPS: 5 73 | # resolvConf: /etc/resolv.conf 74 | # rotateCertificates: true 75 | # runtimeRequestTimeout: 2m0s 76 | # serializeImagePulls: true 77 | # staticPodPath: /etc/kubernetes/manifests 78 | # streamingConnectionIdleTimeout: 4h0m0s 79 | # syncFrequency: 1m0s 80 | # volumeStatsAggPeriod: 1m0s 81 | -------------------------------------------------------------------------------- /roles/tools/tasks/labels.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #- hosts: nodes 3 | # tasks: 4 | ## The below should be 4 times, (to cover nodes fqdn, nodes nofqdn, master fqdn, master nofqdn) based on: 5 | # - not custom.networking.fqdn.always 6 | # - not custom.networking.fqdn.node 7 | # - inventory_hostname not in groups['masters'] 8 | # As it's overcomplicating and sometimes k8s still has nodes with short (even if fqdn is true), we default to: 9 | # Due to ansible in probelmatic netw setups (https://github.com/ReSearchITEng/kubeadm-playbook/issues/81, https://github.com/ansible/ansible/issues/38777 ) 10 | # we have to add the 3rd option as well... 11 | 12 | - block: 13 | - name: labeling using inventory_hostname_short - {{ inventory_hostname_short }} - (this or below 2 will end with error) 14 | delegate_to: "{{groups['primary-master'][0]}}" 15 | environment: 16 | KUBECONFIG: /etc/kubernetes/admin.conf 17 | shell: kubectl label nodes {{ inventory_hostname_short }} {{label | default ("node-role.kubernetes.io/compute=") }} --overwrite 18 | register: command_result 19 | changed_when: '"not labeled" not in command_result.stdout' 20 | ignore_errors: true 21 | 22 | - name: labeling using ansible_fqdn - {{ inventory_hostname }} - (this or above or below will end with error) 23 | delegate_to: "{{groups['primary-master'][0]}}" 24 | environment: 25 | KUBECONFIG: /etc/kubernetes/admin.conf 26 | shell: kubectl label nodes {{ inventory_hostname }} {{label | default ("node-role.kubernetes.io/compute=") }} --overwrite 27 | register: command_result 28 | changed_when: '"not labeled" not in command_result.stdout' 29 | ignore_errors: true 30 | 31 | - name: labeling using ansible_fqdn - {{ ansible_fqdn }} - (this or one of the above 2 will end with error) 32 | delegate_to: "{{groups['primary-master'][0]}}" 33 | environment: 34 | KUBECONFIG: /etc/kubernetes/admin.conf 35 | shell: kubectl label nodes {{ ansible_fqdn }} {{label | default ("node-role.kubernetes.io/compute=") }} --overwrite 36 | register: command_result 37 | changed_when: '"not labeled" not in command_result.stdout' 38 | ignore_errors: true 39 | when: 40 | - inventory_hostname not in groups['masters'] 41 | tags: 42 | - all 43 | 44 | 45 | - block: 46 | # - name: get name of primary_master when single node cluster 47 | # environment: 48 | # KUBECONFIG: /etc/kubernetes/admin.conf 49 | # shell: 'kubectl get no -o=jsonpath="{.items[0].metadata.name}"' 50 | # register: result_primary_master_name 51 | 52 | - name: when cluster is one machine only, labeling it also as infra node 53 | environment: 54 | KUBECONFIG: /etc/kubernetes/admin.conf 55 | shell: 'kubectl label nodes -l="node-role.kubernetes.io/control-plane=" "node-role.kubernetes.io/infra=" --overwrite' 56 | register: command_result 57 | changed_when: '"not labeled" not in command_result.stdout' 58 | ignore_errors: true 59 | when: 60 | - groups['all'] | length == 1 61 | tags: 62 | - all 63 | 64 | -------------------------------------------------------------------------------- /batch_deploy_serial_non_parallel.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #- hosts: master 3 | # become: yes 4 | # become_method: sudo 5 | # tags: 6 | # - helm 7 | # tasks: 8 | 9 | - set_fact: 10 | env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/bin:/home/linuxbrew/.linuxbrew/bin" }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}' 11 | tags: 12 | - always 13 | 14 | - name: helm related crds,webhooks,rbacs,operators via manifests 15 | block: 16 | - name: applying pre_helm_manifests 17 | command: kubectl apply -f {{ item }} 18 | with_items: '{{ pre_helm_manifests }}' 19 | environment: '{{env_kc}}' 20 | when: 21 | - pre_helm_manifests is defined 22 | - pre_helm_manifests | length > 0 23 | register: command_result 24 | changed_when: '" changed" in command_result.stdout or " created" in command_result.stdout' 25 | # there might be mutiplce artifacts in the same yaml, so we cannot simply rely on "unchanged" 26 | tags: 27 | - pre_helm_manifests 28 | - charts_deploy 29 | 30 | - block: 31 | - name: Group helm charts by batch 32 | set_fact: 33 | charts_by_batch: "{{ charts_by_batch | default({}) | combine({ (item.batch | default(50)) | string: (charts_by_batch[item.batch | default(50) | string] | default([])) + [item] }) }}" 34 | with_items: "{{ helm.packages_list }}" 35 | when: 36 | - helm is defined 37 | - helm.packages_list is defined 38 | 39 | - name: Get sorted batch numbers 40 | set_fact: 41 | sorted_batches: "{{ charts_by_batch.keys() | map('int') | sort | map('string') }}" 42 | when: 43 | - charts_by_batch is defined 44 | 45 | - name: Deploy helm charts batch by batch 46 | include_tasks: deploy_batch.yml 47 | vars: 48 | batch_number: "{{ batch_item }}" 49 | charts_in_batch: "{{ charts_by_batch[batch_item] }}" 50 | loop: "{{ sorted_batches }}" 51 | loop_control: 52 | loop_var: batch_item 53 | when: 54 | - sorted_batches is defined 55 | - charts_by_batch is defined 56 | tags: 57 | - helm 58 | - charts_deploy 59 | 60 | - name: "helm full sanity - wait for all installed charts to become running after all batches deployed" 61 | block: 62 | - name: Wait 3 seconds before helm sanity check 63 | pause: 64 | seconds: 3 65 | changed_when: false 66 | 67 | - name: helm full sanity - wait for all pods to be running (excluding some that may not be ready immediately) 68 | shell: "kubectl get pods --all-namespaces --no-headers | grep -v -w 'Running' | grep -v 'kube-dns' | grep -v 'coredns' | grep -v 'tiller-deploy' || true" 69 | environment: '{{ env_kc }}' 70 | register: command_result 71 | until: command_result.stdout == "" 72 | retries: "{{ RETRIES | default(40) }}" 73 | delay: 3 74 | changed_when: false 75 | tags: 76 | - sanity 77 | - helm_sanity 78 | 79 | - name: Display cluster status after all helm charts deployed 80 | shell: "kubectl get pods --all-namespaces" 81 | environment: '{{ env_kc }}' 82 | register: final_status 83 | changed_when: false 84 | 85 | - debug: 86 | msg: "All helm charts deployed successfully. Final cluster status:" 87 | - debug: 88 | var: final_status.stdout_lines 89 | 90 | 91 | -------------------------------------------------------------------------------- /roles/helm/tasks/charts_deploy.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #- hosts: master 3 | # become: yes 4 | # become_method: sudo 5 | # tags: 6 | # - helm 7 | # tasks: 8 | 9 | - set_fact: 10 | env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/bin:/home/linuxbrew/.linuxbrew/bin" }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}' 11 | tags: 12 | - always 13 | 14 | - name: helm related crds,webhooks,rbacs,operators via manifests 15 | block: 16 | - name: applying pre_helm_manifests 17 | command: kubectl apply -f {{ item }} 18 | with_items: '{{ pre_helm_manifests }}' 19 | environment: '{{env_kc}}' 20 | when: 21 | - pre_helm_manifests is defined 22 | - pre_helm_manifests | length > 0 23 | register: command_result 24 | changed_when: '" changed" in command_result.stdout or " created" in command_result.stdout' 25 | # there might be mutiplce artifacts in the same yaml, so we cannot simply rely on "unchanged" 26 | tags: 27 | - pre_helm_manifests 28 | - charts_deploy 29 | 30 | - block: 31 | - name: Group helm charts by batch 32 | set_fact: 33 | charts_by_batch: "{{ charts_by_batch | default({}) | combine({ (item.batch | default(50)) | string: (charts_by_batch[item.batch | default(50) | string] | default([])) + [item] }) }}" 34 | with_items: "{{ helm.packages_list }}" 35 | when: 36 | - helm is defined 37 | - helm.packages_list is defined 38 | 39 | - name: Get sorted batch numbers 40 | set_fact: 41 | sorted_batches: "{{ charts_by_batch.keys() | map('int') | sort | map('string') }}" 42 | when: 43 | - charts_by_batch is defined 44 | 45 | - name: Deploy helm charts batch by batch 46 | include_tasks: deploy_batch.yml 47 | vars: 48 | batch_number: "{{ batch_item }}" 49 | charts_in_batch: "{{ charts_by_batch[batch_item] }}" 50 | loop: "{{ sorted_batches }}" 51 | loop_control: 52 | loop_var: batch_item 53 | when: 54 | - sorted_batches is defined 55 | - charts_by_batch is defined 56 | tags: 57 | - helm 58 | - charts_deploy 59 | 60 | - name: "helm full sanity - wait for all installed charts to become running after all batches deployed" 61 | tags: 62 | - helm 63 | - charts_deploy 64 | block: 65 | - name: Wait 3 seconds before helm sanity check 66 | pause: 67 | seconds: 3 68 | changed_when: false 69 | 70 | - name: helm full sanity - wait for all installed charts to become running 71 | environment: 72 | KUBECONFIG: /etc/kubernetes/admin.conf 73 | shell: "kubectl get --all-namespaces pods --no-headers | grep -v -w 'Running' || true " 74 | register: command_result 75 | tags: 76 | - sanity_helm 77 | - sanity 78 | - charts_deploy_sanity 79 | until: command_result.stdout == "" 80 | retries: 60 81 | delay: 3 82 | changed_when: false 83 | when: wait_charts_deploy_sanity | default('false') | bool # it's not mandatory to wait/make sure absolutelly all deployments are fine. 84 | 85 | - name: Display cluster status after all helm charts deployed 86 | shell: "kubectl get pods --all-namespaces" 87 | environment: '{{ env_kc }}' 88 | register: final_status 89 | changed_when: false 90 | 91 | - debug: 92 | msg: "All helm charts deployed successfully. Final cluster status:" 93 | - debug: 94 | var: final_status.stdout_lines 95 | 96 | -------------------------------------------------------------------------------- /roles/common/tasks/kernel_modules.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ## ip_vs is optional for k8s (but ideal), and it's mandatory for keepalived especially when in docker 3 | - name: kernel_modules - load ip_vs group 4 | modprobe: name={{ item }} state=present 5 | with_items: 6 | - ip_vs_wrr 7 | - ip_vs_rr 8 | - ip_vs_sh 9 | - ip_vs 10 | # - nf_conntrack_ipv4 # removed as it fails in newer kernels 11 | when: 12 | - KubeProxyConfiguration.mode is defined 13 | - KubeProxyConfiguration.mode == "ipvs" 14 | 15 | - name: kernel_modules - persist ip_vs kernel modules 16 | copy: 17 | dest: /etc/modules-load.d/k8s_ip_vs.conf 18 | content: | 19 | ip_vs_wrr 20 | ip_vs_rr 21 | ip_vs_sh 22 | ip_vs 23 | # nf_conntrack_ipv4 # removed as it fails in newer kernels 24 | when: 25 | - KubeProxyConfiguration.mode is defined 26 | - KubeProxyConfiguration.mode == "ipvs" 27 | 28 | - name: load bridge kernel module ( for /proc/sys/net/bridge/bridge-nf-call-iptables ) 29 | modprobe: name={{item}} state=present 30 | with_items: 31 | - bridge 32 | ignore_errors: true 33 | 34 | - name: every reboot load bridge kernel modules 35 | copy: 36 | dest: "/etc/modules-load.d/k8s_bridge.conf" 37 | content: | 38 | bridge 39 | 40 | - name: load br_netfilter kernel module (for /proc/sys/net/bridge/bridge-nf-call-iptables in newer kernels) 41 | modprobe: name={{item}} state=present 42 | with_items: 43 | - br_netfilter 44 | register: br_netfilter_load_result 45 | ignore_errors: true # because in some old kernels br_netfilter does not exist and bridge is enough 46 | 47 | - name: every reboot load bridge and br_netfilter kernel modules (for k8s) 48 | # Note: br_netfilter is available only in the newer kernel versions 49 | copy: 50 | dest: "/etc/modules-load.d/k8s_br_netfilter.conf" 51 | content: | 52 | br_netfilter 53 | when: 54 | - br_netfilter_load_result is not failed 55 | 56 | - name: setup kernel parameters for k8s - reboot might be required, but we will not trigger 57 | #here RH asks for reboot: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/load_balancer_administration/s1-initial-setup-forwarding-vsa 58 | sysctl: name={{item.name}} value={{item.value}} state=present 59 | with_items: 60 | - {name: "net.bridge.bridge-nf-call-iptables", value: "1" } 61 | - {name: "net.bridge.bridge-nf-call-ip6tables", value: "1" } 62 | - {name: "net.ipv4.ip_forward", value: "1" } 63 | - {name: "net.ipv4.ip_nonlocal_bind", value: "1" } 64 | 65 | - name: setup kernel parameters for network net.netfilter.nf_conntrack_maxi (optional) 66 | # https://docs.projectcalico.org/maintenance/troubleshoot/troubleshooting 67 | sysctl: name={{item.name}} value={{item.value}} state=present 68 | with_items: 69 | - {name: "net.netfilter.nf_conntrack_max", value: "1000000" } 70 | when: 71 | - ( KubeProxyConfiguration.mode is not defined ) or ( KubeProxyConfiguration.mode == "iptables" ) 72 | 73 | - name: setup kernel parameters for eventual elasticsearch - reboot might be required, but we will not trigger 74 | #here RH asks for reboot: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/load_balancer_administration/s1-initial-setup-forwarding-vsa 75 | sysctl: name={{item.name}} value={{item.value}} state=present 76 | with_items: 77 | - {name: "vm.max_map_count", value: "262144" } 78 | 79 | -------------------------------------------------------------------------------- /docs/PRODUCTION_TIPS.md: -------------------------------------------------------------------------------- 1 | # When to use this chart 2 | Run this helm chart only for: 3 | - generating the cluster first time: 4 | - Adding a new node, using these steps: 5 | 1. create a new hosts file and populate values only for **primary-master** (which won't be touched) and the sections where new nodes to be joining the cluster 6 | (either compute under **[nodes]** or master(control plane) under **[secondary-masters]** ; All non-relevant groups shoud be empty) 7 | 2. run the `ansible-playbook -i hosts site.yml --tags node` (note the **--tags node** ) 8 | 9 | # Use conventions 10 | Besides "master" role, it's suggested to use also "infra" role (by specifying `label=node-role.kubernetes.io/infra=` in the hosts file). 11 | Machines marked as infra usually hold Prometheus, nginx ingress controllers, grafana, EFK, etc... 12 | Usually there should be min. 3 master nodes and min 3 infra nodes ( compute (aka worker) nodes -> as many as required by the actual workload of the cluster). 13 | 14 | # Secure Dashboard 15 | - from addons.yaml, remove "--set enableInsecureLogin=True --set enableSkipLogin=True" 16 | - also you may want to review the dashboard service account perms you desire 17 | 18 | # Heads-up 19 | When you have master-ha, the cluster can function properly when there are up at least 1/2 + 1 masters (so the quorum will function). If you have 3 masters, you must have at least 2 masters up for the cluster function. 20 | FYI: the good part is that the workload of a k8s cluster will continue to serve everything even without any master running, BUT, if any pod crashes, or there are any activities that need masters up, those won't be done till masters are up again. 21 | 22 | # Certificates: 23 | - certs will expire 1 year after installation. The good part is that every kubeadm upgrade, the certs are getting regenerated. 24 | So, if you upgrade the cluster at least once a year (which you should to keep up with security fixes at least), then you don't need to be concerned. 25 | 26 | # Check security settings: 27 | - https://www.stackrox.com/post/2019/09/12-kubernetes-configuration-best-practices/ (PRs based on this are welcome) 28 | - https://kubernetes.io/docs/tasks/administer-cluster/securing-a-cluster/ 29 | - secure using: https://github.com/nirmata/kyverno/blob/master/samples/README.md 30 | - test using: https://github.com/aquasecurity/kube-bench 31 | 32 | # Security improvements already done: 33 | - inhibited nginx version info in headers: server-token=False, hide-headers=Server ; More params on: https://github.com/kubernetes/ingress-nginx/blob/master/docs/user-guide/nginx-configuration/configmap.md 34 | 35 | # Known pending improvements of k8s/kubeadm: 36 | - metrics-server cannot validate kubelet certs; Proper fix for "--kubelet-insecure-tls" will be in k8s 1.19 https://github.com/kubernetes/kubeadm/issues/1602 ; Alternatives is manually generating and approving certs for each node: serverTLSBootstrap: https://github.com/kubernetes-sigs/metrics-server/issues/146#issuecomment-472655656 37 | 38 | 39 | # Other usefull charts: 40 | - https://github.com/planetlabs/draino/tree/master/helm/draino -> when node is not heathy, it's automatically cordoned and containers drained (Kubernetes Node Problem Detector and Cluster Autoscaler). 41 | - Use Public IP Address from a cloud vendor, simulating a LoadBalancer: https://github.com/inlets/inlets-operator 42 | 43 | # Debian - package hold 44 | make sure k8s tools are not upgraded by mistake (do it post ansible) 45 | ``` 46 | sudo apt-mark hold kubectl kubelet kubeadm kubernetes-cni cri-tools 47 | ``` 48 | allow k8s tools to be upgraded (do it when upgrade is desired) 49 | ``` 50 | sudo apt-mark unhold kubectl kubelet kubeadm kubernetes-cni cri-tools 51 | ``` 52 | 53 | -------------------------------------------------------------------------------- /roles/helm/tasks/helm.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #- hosts: master 3 | # become: yes 4 | # become_method: sudo 5 | # tags: 6 | # - helm 7 | # tasks: 8 | 9 | - set_fact: 10 | env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/bin:/home/linuxbrew/.linuxbrew/bin" }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}' 11 | tags: 12 | - always 13 | 14 | - block: 15 | - name: check helm version (if exists) 16 | environment: '{{env_kc}}' 17 | shell: helm version --template="{{ '{{.Version}}' }}" 18 | #local_action: shell helm version --template="{{ '{{.Version}}' }}" 19 | register: helm_version_result 20 | 21 | - name: set_fact helm_install_not_required when existing version is identical 22 | set_fact: helm_install_not_required="not" 23 | when: 24 | - helm_version_result is defined 25 | - helm_version_result.stdout | length > 0 26 | - helm_version_result.stdout_lines[0] == helm.helm_version 27 | 28 | - debug: 29 | msg: 'helm versions compare. Existing (if any): requested: {{helm.helm_version}} and existings (if any): {{helm_version_result.stdout_lines[0]}}' 30 | when: 31 | - helm_version_result is defined 32 | - helm_version_result.stdout | length > 0 33 | #when: helm_version_result.stdout_lines[0] == helm.helm_version 34 | 35 | ignore_errors: true 36 | when: 37 | - helm.helm_version is defined 38 | tags: 39 | - helm 40 | 41 | - block: 42 | - name: 'Download helm binary archive {{ helm.archive_url | default ("https://get.helm.sh") }}/helm-{{ helm.helm_version }}-linux-{{ HOST_ARCH }}.tar.gz' 43 | environment: '{{env_kc}}' 44 | get_url: 45 | url: "{{ helm.archive_url | default ('https://get.helm.sh') }}/helm-{{ helm.helm_version }}-linux-{{ HOST_ARCH }}.tar.gz" 46 | dest: /tmp/helm-{{ helm.helm_version }}-linux-{{ HOST_ARCH }}.tar.gz 47 | force: no 48 | mode: "0755" 49 | retries: 3 50 | delay: 3 51 | register: result 52 | until: result is not failed 53 | 54 | - name: unarchive 55 | unarchive: 56 | dest: /tmp/ 57 | src: /tmp/helm-{{ helm.helm_version }}-linux-{{ HOST_ARCH }}.tar.gz 58 | mode: "0755" 59 | remote_src: yes 60 | 61 | - name: helm - place binary in destination folder 62 | copy: 63 | src: /tmp/linux-{{ HOST_ARCH }}/helm 64 | dest: /usr/local/bin/ 65 | force: yes 66 | mode: "0755" 67 | remote_src: yes 68 | become: true 69 | become_user: root 70 | when: 71 | - helm_install_not_required is not defined 72 | - helm.helm_version is defined 73 | tags: 74 | - helm 75 | 76 | - block: 77 | 78 | # - name: helm repo remove (before add) - deprecated by --force-update flag 79 | # environment: '{{env_kc}}' 80 | # command: helm repo remove {{ item.name }} 81 | # with_items: 82 | # - '{{ helm.repos | default("") }}' 83 | # when: helm is defined and helm.repos is defined 84 | # ignore_errors: true 85 | 86 | - name: helm repo add 87 | environment: '{{env_kc}}' 88 | command: helm repo add --force-update {{ item.name }} {{ item.url }} 89 | with_items: 90 | - '{{ helm.repos | default("") }}' 91 | when: helm is defined and helm.repos is defined 92 | retries: 7 93 | delay: 3 94 | register: result 95 | until: result is not failed 96 | 97 | - name: helm repo update #Sometimes initial repo add corrupts the repo and update fixes it. 98 | environment: '{{env_kc}}' 99 | command: helm repo update 100 | when: helm is defined 101 | retries: 7 102 | delay: 3 103 | register: result 104 | until: result is not failed 105 | 106 | tags: 107 | - helm 108 | 109 | -------------------------------------------------------------------------------- /roles/tools/tasks/postinstall_messages.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - set_fact: 3 | env_kc: '{{ {} | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/bin/" }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}' 4 | tags: 5 | - always 6 | 7 | ### Post install messages 8 | - block: 9 | 10 | # Too much clutter, so disabling get events 11 | # - name: Get events 12 | # command: kubectl get events --all-namespaces 13 | # register: command_result 14 | # changed_when: false 15 | # tags: 16 | # - cluster_info 17 | # - cluster_status 18 | 19 | # - name: Display events 20 | # debug: 21 | # var: command_result.stdout_lines 22 | # changed_when: false 23 | # tags: 24 | # - cluster_info 25 | # - cluster_status 26 | 27 | - name: Get deployed helm charts 28 | shell: "helm list --all-namespaces --all --superseded --pending" 29 | changed_when: false 30 | register: command_result 31 | tags: 32 | - cluster_status 33 | - helm 34 | - post_deploy 35 | 36 | - name: Print all helm charts 37 | debug: 38 | var: command_result.stdout_lines 39 | changed_when: false 40 | 41 | - name: Get pods 42 | command: "kubectl get pods -o wide --all-namespaces --show-labels=true --show-kind=true" 43 | register: command_result 44 | changed_when: false 45 | tags: 46 | - cluster_status 47 | 48 | - name: Display pods 49 | debug: 50 | var: command_result.stdout_lines 51 | changed_when: false 52 | tags: 53 | - cluster_status 54 | 55 | - name: Get pods not yet in Running status 56 | shell: "kubectl get --all-namespaces pods --no-headers | grep -v -w 'Running' || true " 57 | register: command_result 58 | changed_when: false 59 | 60 | - name: Print pods not yet in Running status 61 | debug: 62 | var: command_result.stdout_lines 63 | 64 | - name: Get nodes 65 | command: kubectl get nodes 66 | register: command_result 67 | changed_when: false 68 | 69 | - name: Display nodes 70 | debug: 71 | var: command_result.stdout_lines 72 | changed_when: false 73 | 74 | - name: Get cluster-info 75 | command: kubectl cluster-info 76 | register: command_result 77 | changed_when: false 78 | 79 | - name: cluster-info 80 | debug: 81 | #msg: "{{ command_result.stdout_lines | from_yaml }}" 82 | var: command_result.stdout_lines 83 | changed_when: false 84 | 85 | - name: Print cluster information and other useful commands. 86 | vars: 87 | msg: | 88 | Your cluster should be up and running ! 89 | Now you may: 90 | - type: alias to see the predefined aliases 91 | - for a command line "dashboard", on master type: 92 | - wp # watch pods in all namespaces (optionally --show-labels ) 93 | - kg po # kg='kubectl get --all-namespaces -o wide' 94 | - ks # ks='kubectl -n kube-system ' get po 95 | - k top node / pod # More tips: https://kubernetes.io/docs/user-guide/kubectl-cheatsheet/ 96 | - to check pods which are not yet in Running status, run: "kubectl get --all-namespaces pods --no-headers | grep -v -w 'Running' " 97 | 98 | - browse your master (using fqdn) to see the dashboard: 99 | http://{{groups["masters"][0]}} 100 | or, ideally (depending what was defined in config) 101 | http://dashboard.{{custom.networking.dnsDomain}} (when var custom.networking.dnsDomain properly defined and set in your dns ) 102 | debug: var=msg 103 | # msg: "{{ msg.split('\n') }}" 104 | register: command_result 105 | changed_when: false 106 | #http://dashboard.{{ custom.networking.dnsDomain | default ("you may want to define custom.networking.dnsDomain") }} 107 | 108 | # - name: Print cluster information and other useful commands. 109 | # debug: vars=command_result 110 | # changed_when: false 111 | environment: '{{env_kc}}' 112 | tags: 113 | - cluster_info 114 | - postinstall_messages 115 | run_once: true 116 | -------------------------------------------------------------------------------- /roles/helm/tasks/deploy_batch.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: "Deploy batch {{ batch_number }} helm charts" 3 | debug: 4 | msg: "Deploying batch {{ batch_number }} with {{ charts_in_batch | length }} charts in parallel" 5 | 6 | - name: "Deploy helm charts in batch {{ batch_number }} (without namespace) - async" 7 | command: > 8 | helm upgrade --install {{ item.name }} {{ item.repo }} 9 | {{ item.options | default('') }} 10 | --create-namespace 11 | with_items: "{{ charts_in_batch }}" 12 | async: 600 # 10 minutes timeout for each chart 13 | poll: 0 # Don't wait, start all in parallel 14 | environment: '{{ env_kc }}' 15 | when: 16 | - item.namespace is not defined or item.namespace == "" 17 | register: helm_jobs_no_ns 18 | 19 | - name: "Deploy helm charts in batch {{ batch_number }} (with namespace) - async" 20 | command: > 21 | helm upgrade --install {{ item.name }} {{ item.repo }} 22 | --namespace {{ item.namespace }} 23 | {{ item.options | default('') }} 24 | --create-namespace 25 | with_items: "{{ charts_in_batch }}" 26 | async: 600 # 10 minutes timeout for each chart 27 | poll: 0 # Don't wait, start all in parallel 28 | environment: '{{ env_kc }}' 29 | when: 30 | - item.namespace is defined 31 | - item.namespace != "" 32 | register: helm_jobs_with_ns 33 | 34 | - name: "Wait for all helm charts in batch {{ batch_number }} to complete" 35 | async_status: 36 | jid: "{{ item.ansible_job_id }}" 37 | register: helm_result_no_ns 38 | until: helm_result_no_ns.finished 39 | retries: 120 # 10 minutes total (120 * 5 seconds) 40 | delay: 5 41 | with_items: "{{ helm_jobs_no_ns.results | default([]) }}" 42 | when: 43 | - helm_jobs_no_ns is defined 44 | - helm_jobs_no_ns.results is defined 45 | - item.ansible_job_id is defined 46 | changed_when: '"deployed" in helm_result_no_ns.stdout' 47 | 48 | - name: "Wait for all helm charts with namespace in batch {{ batch_number }} to complete" 49 | async_status: 50 | jid: "{{ item.ansible_job_id }}" 51 | register: helm_result_with_ns 52 | until: helm_result_with_ns.finished 53 | retries: 120 # 10 minutes total (120 * 5 seconds) 54 | delay: 5 55 | with_items: "{{ helm_jobs_with_ns.results | default([]) }}" 56 | when: 57 | - helm_jobs_with_ns is defined 58 | - helm_jobs_with_ns.results is defined 59 | - item.ansible_job_id is defined 60 | changed_when: '"deployed" in helm_result_with_ns.stdout' 61 | 62 | - name: "Display completion status for batch {{ batch_number }}" 63 | debug: 64 | msg: "All {{ charts_in_batch | length }} charts in batch {{ batch_number }} have been deployed" 65 | 66 | - name: "CALICO BLOCK - after batch {{ batch_number }} when tigera-operator in charts_in_batch" 67 | when: '"tigera-operator" in (charts_in_batch | map(attribute="namespace") | list)' 68 | block: 69 | 70 | - name: Calico - Wait few seconds for deployments to start - wait to make sure calico-node is getting started - required for containerd... 71 | pause: seconds=10 72 | changed_when: false 73 | 74 | - name: Wait for calico-node daemonset to be ready 75 | shell: kubectl -n calico-system get daemonset calico-node -o jsonpath='{.status.numberReady}/{.status.desiredNumberScheduled}' 76 | environment: '{{ env_kc }}' 77 | register: calico_node_ready 78 | until: calico_node_ready.stdout.split('/')[0] == calico_node_ready.stdout.split('/')[1] and calico_node_ready.stdout.split('/')[0] | int > 0 79 | retries: 60 80 | delay: 10 81 | ignore_errors: true 82 | 83 | - name: Calico - Restart containerd due to containerd cni bugs are still there in containerd 1.6.6 84 | systemd: name=containerd state=restarted enabled=yes daemon_reload=yes 85 | 86 | - name: Calico - Wait few seconds for containerd to restart 87 | pause: seconds=10 88 | changed_when: false 89 | 90 | - name: Wait for CoreDNS deployment to be ready 91 | shell: kubectl -n kube-system get deployment coredns -o jsonpath='{.status.readyReplicas}' 92 | environment: '{{ env_kc }}' 93 | register: coredns_ready 94 | until: coredns_ready.stdout | int > 0 95 | retries: 20 96 | delay: 10 97 | ignore_errors: false 98 | changed_when: false 99 | 100 | - debug: 101 | msg: "Calico networking components are ready, proceeding to next batch" 102 | -------------------------------------------------------------------------------- /all_reset.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ## Preparations 3 | ## Making sure python exists on all nodes, so Ansible will be able to run: 4 | - hosts: all 5 | gather_facts: no 6 | become: yes 7 | become_method: sudo 8 | pre_tasks: 9 | ## It would be best to have ansible already installed on all machines. 10 | ## But if it is not, we'll try to do it: 11 | - name: when no python2, install python2 for Ansible<2.8 (usually required on ubuntu, which defaults to python3) # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3 12 | raw: test -e /usr/bin/python || (apt -y update && apt install -y python-minimal) || (yum install -y python2 python-simplejson) 13 | register: output 14 | changed_when: output.stdout != "" 15 | tags: always 16 | when: 17 | - ansible_version.full is version_compare('2.8', '<') 18 | - ( ansible_python_interpreter is not defined or ansible_python_interpreter == "/usr/bin/python" ) 19 | # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required) 20 | ignore_errors: true 21 | ## reason for ignore_errors: true 22 | ## "version_compare" was replaced with "version" starting ansible 2.5; 23 | ## CentOS/RHEL 7.x use ansible 2.4, so not able to grasp what version_compare is. 24 | ## Ansible 2.9 removes the version_compare and does not recognize it any longer. 25 | ## As our need is to add python2 only on versions before 2.8, if this fails 26 | ## (due to missing version_compare command), we are fine. 27 | ## We do not cover cases where it fails due to other reasons, but that is a reasonable risk, 28 | ## and that issue will be captured later in the flow. 29 | 30 | - name: when no python(2/3), install python3(Debian) python2(RedHat) for Ansible>=2.8 # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3 31 | raw: test -e /usr/bin/python || (apt -y update && apt install -y python3-minimal) || (yum install -y python2 python-simplejson) 32 | register: output 33 | changed_when: output.stdout != "" 34 | tags: always 35 | when: 36 | - ansible_version.full is version('2.8', '>=') or ( ansible_python_interpreter is defined and ansible_python_interpreter == "/usr/bin/python3" ) 37 | # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required) 38 | ignore_errors: true 39 | ## reason for ignore_errors: true 40 | ## is similar to the one explained above (complements it) 41 | 42 | - setup: # aka gather_facts 43 | tags: always # required for tags, see ansible issue: #14228 44 | 45 | - name: test min. vars (group_vars/all) are set (ClusterConfiguration and k8s_network_addons_urls) 46 | debug: msg='Make sure min. vars are set in group_vars/all/ (e.g. ClusterConfiguration and k8s_network_addons_urls)' 47 | when: 48 | - ClusterConfiguration is not defined 49 | - JoinConfiguration is not defined 50 | failed_when: 51 | - ClusterConfiguration is not defined 52 | - JoinConfiguration is not defined 53 | tags: always # always check if we have vars in place 54 | 55 | ## proper reset of any previous cluster (if any) 56 | - hosts: primary-master 57 | become: yes 58 | become_method: sudo 59 | tags: 60 | - reset 61 | - master 62 | roles: 63 | #- { role: helm, task: helm_reset, tags: [ 'reset', 'helm_reset' ] } # in helm3 is no longer required 64 | - { role: storage, task: remove_pvs, tags: [ 'reset', 'storage_reset', 'pvs_reset' ] } 65 | - { role: storage, task: nfs_reset, tags: [ 'reset', 'storage_reset', 'nfs_reset' ] } 66 | - { role: storage, task: rook_reset, tags: [ 'reset', 'storage_reset', 'rook_reset' ] } 67 | - { role: tools, task: reset_drain, tags: [ 'reset', 'node_reset', 'drain', 'node_drain' ] } #done on master, affecting nodes 68 | 69 | ## nodes -> reset and install common part (for all nodes) 70 | - hosts: nodes 71 | become: yes 72 | become_method: sudo 73 | tags: 74 | - node 75 | roles: 76 | - { role: tools, task: reset, tags: [ 'reset', 'node_reset' ], when: "inventory_hostname not in groups['masters']" } 77 | - { role: tools, task: weave_reset, tags: [ 'reset', 'node_reset', 'network_reset', 'weave_reset', 'weave' ], when: "inventory_hostname not in groups['masters']" } 78 | 79 | - hosts: masters 80 | become: yes 81 | become_method: sudo 82 | tags: 83 | - master 84 | roles: 85 | - { role: tools, task: reset, tags: [ 'reset', 'master_reset' ] } 86 | - { role: tools, task: weave_reset, tags: [ 'reset', 'master_reset', 'network_reset', 'weave', 'weave_reset' ] } 87 | 88 | -------------------------------------------------------------------------------- /roles/common/tasks/iptables.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ### iptables: 3 | - block: 4 | # Currently it will disable the REJECT rules and change policy to allow all. 5 | # For making pin-pointed rules, one may look at: 6 | # https://github.com/kubernetes/contrib/blob/master/ansible/roles/node/tasks/iptables.yml 7 | # For weave netw plugin, open also: TCP 6783 and UDP 6783/6784 8 | 9 | - name: Disable firewalld (CentOS/RHEL) 10 | systemd: name=firewalld state=stopped enabled=no 11 | when: ansible_os_family == "RedHat" 12 | ignore_errors: true # in case it is not even installed 13 | # For developing firewalld friendly solution, check: 14 | # https://github.com/kubernetes/contrib/tree/master/ansible/roles/ 15 | 16 | - name: Install iptables-services (if does not exist) - RedHat/CentOS 17 | package: state=present name={{ item }} #-{{version}} 18 | #environment: '{{ proxy_env | default ({}) }}' 19 | when: ansible_os_family == "RedHat" 20 | with_items: 21 | - iptables-services 22 | notify: 23 | - Restart iptables 24 | 25 | - name: Install netfilter-persistent required for saving iptables rule - Debian 26 | package: state=present name={{ item }} #-{{version}} 27 | #environment: '{{ proxy_env | default ({}) }}' 28 | when: ansible_os_family == "Debian" 29 | with_items: 30 | - netfilter-persistent 31 | 32 | - name: iptables default policies need to be ACCEPT on all chains 33 | iptables: 34 | chain: '{{item}}' 35 | policy: ACCEPT 36 | with_items: 37 | - INPUT 38 | - FORWARD 39 | - OUTPUT 40 | 41 | - name: remove the REJECT rules on all chains 42 | iptables: 43 | chain: '{{item}}' 44 | state: absent 45 | reject_with: 'icmp-host-prohibited' 46 | with_items: 47 | - INPUT 48 | - FORWARD 49 | - OUTPUT 50 | 51 | - name: remove the REJECT rules on all chains from the /etc/sysconfig/iptables (persisting the change) on RH/CentOS 52 | lineinfile: 53 | name: /etc/sysconfig/iptables 54 | state: absent 55 | line: "{{ item }}" 56 | with_items: 57 | - '-A INPUT -j REJECT --reject-with icmp-host-prohibited' 58 | - '-A FORWARD -j REJECT --reject-with icmp-host-prohibited' 59 | when: ansible_os_family == "RedHat" 60 | 61 | # alternative option to previous persistence solution with /etc/sysconfig/iptables 62 | #- name: Save iptables rules - sol2 63 | # command: service iptables save 64 | # when: ansible_os_family == "Redhat" 65 | 66 | - name: save iptables rules (Debian) 67 | shell: netfilter-persistent save 68 | when: ansible_os_family == "Debian" 69 | 70 | when: iptables_setup is defined and iptables_setup 71 | tags: 72 | - iptables 73 | - firewall 74 | 75 | # Debug iptables with: 76 | # watch -n1 iptables -vnL 77 | # Check ports: https://kubernetes.io/docs/setup/independent/install-kubeadm/ 78 | 79 | ### TODO: test min. ports to be allowed 80 | # sudo iptables -I INPUT -p tcp --dport 6443 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT 81 | # ### sudo iptables -I FORWARD -p tcp --dport 6443 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT 82 | # sudo iptables -I OUTPUT -p tcp --sport 6443 -m conntrack --ctstate ESTABLISHED -j ACCEPT 83 | 84 | 85 | # sudo iptables -I INPUT -p tcp --dport 10250 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT 86 | # ### sudo iptables -D FORWARD -p tcp --dport 10250 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT 87 | # sudo iptables -I OUTPUT -p tcp --sport 10250 -m conntrack --ctstate ESTABLISHED -j ACCEPT 88 | 89 | # ### sudo iptables -I INPUT -p tcp --dport 9898 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT 90 | # sudo iptables -D FORWARD -p tcp --dport 9898 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT 91 | # ### sudo iptables -I OUTPUT -p tcp --sport 9898 -m conntrack --ctstate ESTABLISHED -j ACCEPT 92 | 93 | # sudo iptables -D INPUT -p udp --dport 53 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT 94 | # sudo iptables -I FORWARD -p udp --dport 53 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT 95 | # sudo iptables -D OUTPUT -p udp --sport 53 -m conntrack --ctstate ESTABLISHED -j ACCEPT 96 | 97 | # sudo iptables -I FORWARD -p udp --sport 53 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT 98 | # sudo iptables -I FORWARD -p tcp --dport 9100 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT 99 | # sudo iptables -I FORWARD -p tcp --sport 9100 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT 100 | 101 | 102 | # http://www.slsmk.com/how-to-log-iptables-dropped-packets-to-syslog/ and monitor with journalctl -kf 103 | # iptables -N LOGGINGA 104 | # iptables -I FORWARD 9 -j LOGGINGA 105 | # # iptables -D FORWARD -j LOGGINGA 106 | # iptables -A LOGGINGA -m limit --limit 10/min -j LOG --log-prefix "IPTables-A: " --log-level 4 107 | # iptables -A LOGGINGA -j DROP 108 | 109 | -------------------------------------------------------------------------------- /roles/common/tasks/aliases_completion.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: aliases and shell completion 3 | block: 4 | ### BASHRC / ZSHRC file set 5 | - name: aliases - choose where to put aliases - default local 6 | set_fact: 7 | BASHRC: '~/.bashrc' 8 | ZSHRC: '~/.zshrc' 9 | 10 | - name: aliases - choose where to put aliases - when global, on Debian family 11 | set_fact: 12 | BASHRC: '/etc/bash.bashrc' 13 | ZSHRC: '/etc/zshrc' 14 | when: 15 | - aliases.rc is defined 16 | - aliases.rc == "global" 17 | - ansible_os_family == "Debian" 18 | 19 | - name: aliases - choose where to put aliases - when global, on RedHat family 20 | set_fact: 21 | BASHRC: '/etc/bashrc' 22 | ZSHRC: '/etc/zshrc' 23 | when: 24 | - aliases.rc is defined 25 | - aliases.rc == "global" 26 | - ansible_os_family == "RedHat" 27 | 28 | - name: aliases - choose where to put aliases - when custom 29 | set_fact: 30 | BASHRC: "{{ aliases.rc_bash_custom | default ('~/.bashrc') }}" 31 | ZSHRC: "{{ aliases.rc_zsh_custom | default ('~/.zshrc') }}" 32 | when: 33 | - aliases.rc is defined 34 | - aliases.rc == "custom" 35 | 36 | ### BASH aliases 37 | - name: aliases-bash - kubectl and helm aliases to "{{ BASHRC | default ('~/.bashrc') }}" (if exists) 38 | lineinfile: 39 | dest: "{{ BASHRC | default ('~/.bashrc') }}" 40 | line: "{{ item }}" 41 | state: present 42 | create: "{{ aliases.file_create_if_missing | default ('no') }}" 43 | with_items: "{{ aliases.list | default ([]) }}" 44 | 45 | ### ZSH aliases 46 | - name: aliases-zsh - kubectl aliases to "{{ ZSHRC | default ('~/.zshrc') }}" (if exists) 47 | lineinfile: 48 | dest: "{{ ZSHRC | default ('~/.zshrc') }}" 49 | line: "{{ item }}" 50 | state: present 51 | create: "{{ aliases.file_create_if_missing | default ('no') }}" 52 | with_items: "{{ aliases.list | default ([]) }}" 53 | 54 | ### BASH Completion 55 | - name: aliases-bash-completion - Install optional packages like bash-completion 56 | package: name={{ item }} state={{ package_state | default ('present') }} 57 | with_items: 58 | - bash-completion 59 | 60 | - name: "create /usr/share/bash-completion/completions/[kubeadm, kubectl, helm]" 61 | shell: "{{ item }} completion bash | sudo tee /usr/share/bash-completion/completions/{{ item }} >/dev/null" 62 | with_items: 63 | - kubeadm 64 | - kubectl 65 | - helm 66 | 67 | - name: aliases-bash-completion to kubectl aliases in "{{ BASHRC | default ('~/.bashrc') }}" (if exists) 68 | lineinfile: 69 | dest: "{{ BASHRC | default ('~/.bashrc') }}" 70 | line: "[[ -n $PS1 ]] && complete -F __start_kubectl {{ item }}" 71 | state: present 72 | create: "{{ aliases.file_create_if_missing | default ('no') }}" 73 | with_items: "{{ aliases.kubectl_complete_also_aliases | default ([]) }}" 74 | when: 75 | - aliases.kubectl_complete_also_aliases is defined 76 | 77 | - name: aliases-bash-completion to helm aliases in "{{ BASHRC | default ('~/.bashrc') }}" (if exists) 78 | lineinfile: 79 | dest: "{{ BASHRC | default ('~/.bashrc') }}" 80 | line: "[[ -n $PS1 ]] && complete -F __start_helm {{ item }}" 81 | state: present 82 | create: "{{ aliases.file_create_if_missing | default ('no') }}" 83 | with_items: "{{ aliases.helm_complete_also_aliases | default ([]) }}" 84 | when: 85 | - aliases.helm_complete_also_aliases is defined 86 | 87 | ### ZSH Completion 88 | - name: "create /usr/local/share/zsh/site-functions/_[kubeadm, kubectl, helm]" 89 | shell: "{{ item }} completion zsh | sudo tee /usr/local/share/zsh/site-functions/_{{ item }} >/dev/null" 90 | with_items: 91 | - kubeadm 92 | - kubectl 93 | - helm 94 | 95 | - name: aliases-zsh-completion to kubectl aliases in "{{ ZSHRC | default ('~/.zshrc') }}" (if exists) 96 | lineinfile: 97 | dest: "{{ ZSHRC | default ('~/.zshrc') }}" 98 | line: "[[ -n $PS1 ]] && compdef __start_kubectl {{ item }}" 99 | state: present 100 | create: "{{ aliases.file_create_if_missing | default ('no') }}" 101 | with_items: "{{ aliases.kubectl_complete_also_aliases | default ([]) }}" 102 | when: 103 | - aliases.kubectl_complete_also_aliases is defined 104 | 105 | - name: aliases-zsh-completion to helm aliases in "{{ ZSHRC | default ('~/.zshrc') }}" (if exists) 106 | lineinfile: 107 | dest: "{{ ZSHRC | default ('~/.zshrc') }}" 108 | line: "[[ -n $PS1 ]] && compdef __start_helm {{ item }}" 109 | state: present 110 | create: "{{ aliases.file_create_if_missing | default ('no') }}" 111 | ignore_errors: true 112 | with_items: "{{ aliases.helm_complete_also_aliases | default ([]) }}" 113 | when: 114 | - aliases.helm_complete_also_aliases is defined 115 | 116 | - name: "remove .zcompdump before recreate" 117 | ansible.builtin.file: 118 | path: ~/.zcompdump 119 | state: absent 120 | 121 | - name: run compinit 122 | ansible.builtin.shell: "zsh -c 'compinit -C'" 123 | args: 124 | executable: /bin/zsh 125 | 126 | tags: 127 | - aliases 128 | when: 129 | - aliases is defined 130 | ignore_errors: true 131 | -------------------------------------------------------------------------------- /roles/storage/templates/nfs.j2: -------------------------------------------------------------------------------- 1 | #https://raw.githubusercontent.com/kubernetes-incubator/external-storage/master/nfs/deploy/kubernetes/rbac.yaml 2 | #https://raw.githubusercontent.com/kubernetes-incubator/external-storage/master/nfs/deploy/kubernetes/deployment.yaml 3 | #https://raw.githubusercontent.com/kubernetes-incubator/external-storage/master/nfs/deploy/kubernetes/class.yaml 4 | #https://github.com/kubernetes-incubator/external-storage/tree/master/nfs 5 | 6 | kind: ClusterRole 7 | apiVersion: rbac.authorization.k8s.io/v1 8 | metadata: 9 | name: nfs-provisioner-runner 10 | rules: 11 | - apiGroups: [""] 12 | resources: ["persistentvolumes"] 13 | verbs: ["get", "list", "watch", "create", "delete"] 14 | - apiGroups: [""] 15 | resources: ["persistentvolumeclaims"] 16 | verbs: ["get", "list", "watch", "update"] 17 | - apiGroups: ["storage.k8s.io"] 18 | resources: ["storageclasses"] 19 | verbs: ["get", "list", "watch"] 20 | - apiGroups: [""] 21 | resources: ["events"] 22 | verbs: ["list", "watch", "create", "update", "patch"] 23 | - apiGroups: [""] 24 | resources: ["services", "endpoints"] 25 | verbs: ["get"] 26 | - apiGroups: ["extensions"] 27 | resources: ["podsecuritypolicies"] 28 | resourceNames: ["nfs-provisioner"] 29 | verbs: ["use"] 30 | --- 31 | apiVersion: v1 32 | kind: ServiceAccount 33 | metadata: 34 | name: nfs-provisioner 35 | --- 36 | kind: ClusterRoleBinding 37 | apiVersion: rbac.authorization.k8s.io/v1 38 | metadata: 39 | name: run-nfs-provisioner 40 | subjects: 41 | - kind: ServiceAccount 42 | name: nfs-provisioner 43 | # replace with namespace where provisioner is deployed 44 | namespace: kube-system 45 | roleRef: 46 | kind: ClusterRole 47 | name: nfs-provisioner-runner 48 | apiGroup: rbac.authorization.k8s.io 49 | --- 50 | kind: Role 51 | apiVersion: rbac.authorization.k8s.io/v1 52 | metadata: 53 | name: leader-locking-nfs-provisioner 54 | rules: 55 | - apiGroups: [""] 56 | resources: ["endpoints"] 57 | verbs: ["get", "list", "watch", "create", "update", "patch"] 58 | --- 59 | kind: RoleBinding 60 | apiVersion: rbac.authorization.k8s.io/v1 61 | metadata: 62 | name: leader-locking-nfs-provisioner 63 | subjects: 64 | - kind: ServiceAccount 65 | name: nfs-provisioner 66 | # replace with namespace where provisioner is deployed 67 | namespace: kube-system 68 | roleRef: 69 | kind: Role 70 | name: leader-locking-nfs-provisioner 71 | apiGroup: rbac.authorization.k8s.io 72 | --- 73 | kind: Service 74 | apiVersion: v1 75 | metadata: 76 | name: nfs-provisioner 77 | labels: 78 | app: nfs-provisioner 79 | spec: 80 | ports: 81 | - name: nfs 82 | port: 2049 83 | - name: mountd 84 | port: 20048 85 | - name: rpcbind 86 | port: 111 87 | - name: rpcbind-udp 88 | port: 111 89 | protocol: UDP 90 | selector: 91 | app: nfs-provisioner 92 | --- 93 | kind: Deployment 94 | apiVersion: apps/v1 95 | metadata: 96 | name: nfs-provisioner 97 | spec: 98 | selector: 99 | matchLabels: 100 | app: nfs-provisioner 101 | replicas: 1 102 | strategy: 103 | type: Recreate 104 | template: 105 | metadata: 106 | labels: 107 | app: nfs-provisioner 108 | spec: 109 | serviceAccount: nfs-provisioner 110 | nodeSelector: 111 | node-role.kubernetes.io/control-plane: "" 112 | tolerations: 113 | - key: "node-role.kubernetes.io/control-plane" 114 | effect: NoSchedule 115 | containers: 116 | - name: nfs-provisioner 117 | image: quay.io/kubernetes_incubator/nfs-provisioner:v2.2.1-k8s1.12 118 | ports: 119 | - name: nfs 120 | containerPort: 2049 121 | - name: mountd 122 | containerPort: 20048 123 | - name: rpcbind 124 | containerPort: 111 125 | - name: rpcbind-udp 126 | containerPort: 111 127 | protocol: UDP 128 | securityContext: 129 | capabilities: 130 | add: 131 | - DAC_READ_SEARCH 132 | - SYS_RESOURCE 133 | args: 134 | - "-provisioner={{ nfs_k8s.provisioner }}" 135 | env: 136 | - name: POD_IP 137 | valueFrom: 138 | fieldRef: 139 | fieldPath: status.podIP 140 | - name: SERVICE_NAME 141 | value: nfs-provisioner 142 | - name: POD_NAMESPACE 143 | valueFrom: 144 | fieldRef: 145 | fieldPath: metadata.namespace 146 | imagePullPolicy: "IfNotPresent" 147 | volumeMounts: 148 | - name: export-volume 149 | mountPath: /export 150 | volumes: 151 | - name: export-volume 152 | hostPath: 153 | path: {{ nfs_k8s.host_path }} 154 | --- 155 | kind: StorageClass 156 | apiVersion: storage.k8s.io/v1 157 | metadata: 158 | name: "{{nfs_k8s.provisioner }}" 159 | annotations: 160 | storageclass.beta.kubernetes.io/is-default-class: "{{ nfs_k8s.is_default_class | default('true') }}" 161 | labels: 162 | kubernetes.io/cluster-service: "true" 163 | provisioner: "{{nfs_k8s.provisioner }}" 164 | mountOptions: 165 | - vers=4.1 166 | -------------------------------------------------------------------------------- /only_nodes_only_install.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ## Preparations 3 | ## Making sure python exists on all nodes, so Ansible will be able to run: 4 | - hosts: nodes 5 | gather_facts: no 6 | become: yes 7 | become_method: sudo 8 | pre_tasks: 9 | ## It would be best to have ansible already installed on all machines. 10 | ## But if it is not, we'll try to do it: 11 | - name: when no python2, install python2 for Ansible<2.8 (usually required on ubuntu, which defaults to python3) # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3 12 | raw: test -e /usr/bin/python || (apt -y update && apt install -y python-minimal) || (yum install -y python2 python-simplejson) 13 | register: output 14 | changed_when: output.stdout != "" 15 | tags: always 16 | when: 17 | - ansible_version.full is version_compare('2.8', '<') 18 | - ( ansible_python_interpreter is not defined or ansible_python_interpreter == "/usr/bin/python" ) 19 | # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required) 20 | ignore_errors: true 21 | ## reason for ignore_errors: true 22 | ## "version_compare" was replaced with "version" starting ansible 2.5; 23 | ## CentOS/RHEL 7.x use ansible 2.4, so not able to grasp what version_compare is. 24 | ## Ansible 2.9 removes the version_compare and does not recognize it any longer. 25 | ## As our need is to add python2 only on versions before 2.8, if this fails 26 | ## (due to missing version_compare command), we are fine. 27 | ## We do not cover cases where it fails due to other reasons, but that is a reasonable risk, 28 | ## and that issue will be captured later in the flow. 29 | 30 | - name: when no python(2/3), install python3(Debian) python2(RedHat) for Ansible>=2.8 # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3 31 | raw: test -e /usr/bin/python || (apt -y update && apt install -y python3-minimal) || (yum install -y python2 python-simplejson) 32 | register: output 33 | changed_when: output.stdout != "" 34 | tags: always 35 | when: 36 | - ansible_version.full is version('2.8', '>=') or ( ansible_python_interpreter is defined and ansible_python_interpreter == "/usr/bin/python3" ) 37 | # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required) 38 | ignore_errors: true 39 | ## reason for ignore_errors: true 40 | ## is similar to the one explained above (complements it) 41 | 42 | - setup: # aka gather_facts 43 | tags: always # required for tags, see ansible issue: #14228 44 | 45 | - name: test min. vars (group_vars/all) are set (ClusterConfiguration and k8s_network_addons_urls) 46 | debug: msg='Make sure min. vars are set in group_vars/all/ (e.g. ClusterConfiguration and k8s_network_addons_urls)' 47 | when: 48 | - ClusterConfiguration is not defined 49 | - JoinConfiguration is not defined 50 | failed_when: 51 | - ClusterConfiguration is not defined 52 | - JoinConfiguration is not defined 53 | tags: always # always check if we have vars in place 54 | 55 | - hosts: nodes 56 | become: yes 57 | become_method: sudo 58 | tags: 59 | - node 60 | roles: 61 | - { role: common, task: all, tags: [ 'common', 'install', 'common_install', 'node_install', 'node' ], when: "inventory_hostname not in groups['masters']" } 62 | 63 | ## node -> install nodes (kubeadm join, etc) 64 | - hosts: nodes 65 | become: yes 66 | become_method: sudo 67 | any_errors_fatal: yes 68 | tags: 69 | - node 70 | - install 71 | - node_install 72 | roles: 73 | - { role: non-primary-master, tags: [ 'node', 'install', 'node_install'], when: "inventory_hostname not in groups['masters']" } 74 | 75 | ## node -> label nodes (even when master is also a node) 76 | - hosts: nodes 77 | become: yes 78 | become_method: sudo 79 | any_errors_fatal: yes 80 | tags: 81 | - node 82 | - install 83 | - node_install 84 | - label 85 | roles: 86 | - { role: tools, task: labels, tags: [ 'label'] } 87 | 88 | ### For fixes like vsphere's bug, we have to reboot after some more fixes... 89 | #https://github.com/vmware/kubernetes/issues/495 90 | - hosts: mustrebootlist 91 | gather_facts: no 92 | become: yes 93 | become_method: sudo 94 | tags: 95 | - mustrebootlist 96 | - vsphere_bug_fix 97 | - vsphere 98 | roles: 99 | - { role: tools, task: reboot, tags: [ 'reboot_minimal' ], when: "ClusterConfiguration.cloudProvider is defined and ClusterConfiguration.cloudProvider == 'vsphere' and allow_restart | default(False) and vsphere_bug_fix is defined and vsphere_bug_fix" } 100 | 101 | ## Generic Sanity 102 | - hosts: masters 103 | become: yes 104 | become_method: sudo 105 | tags: 106 | - master 107 | pre_tasks: 108 | - name: remove temporary mustreboot temporary group 109 | group: 110 | name: mustrebootlist 111 | state: absent 112 | roles: 113 | - { role: tools, task: cluster_sanity, tags: [ 'cluster_sanity', 'sanity' ] } 114 | - { role: tools, task: postinstall_messages, tags: [ 'cluster_sanity', 'sanity' ] } 115 | 116 | ## to reset/add only some (more) nodes: 117 | ## 1. keep in hosts only: 118 | ## - the master 119 | ## - the affected node (all other nodes should not be there) 120 | ## 2. Have the token defined in the group_vars/all 121 | ## 3. Run using only this/these tag(s): 122 | ## ansible-playbook -i hosts -v site.yml --tags "node" # same with: ansible-playbook -i hosts -v site.yml --tags "node_reset,node_install,cluster_sanity,cluster_info" 123 | 124 | ## To get cluster info/sanity: 125 | ## ansible-playbook -i hosts -v site.yml --tags "cluster_sanity,cluster_info" 126 | -------------------------------------------------------------------------------- /only_secondaryMasters_only_install.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ## Preparations 3 | ## Making sure python exists on all nodes, so Ansible will be able to run: 4 | - hosts: secondary-masters 5 | gather_facts: no 6 | become: yes 7 | become_method: sudo 8 | pre_tasks: 9 | ## It would be best to have ansible already installed on all machines. 10 | ## But if it is not, we'll try to do it: 11 | - name: when no python2, install python2 for Ansible<2.8 (usually required on ubuntu, which defaults to python3) # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3 12 | raw: test -e /usr/bin/python || (apt -y update && apt install -y python-minimal) || (yum install -y python2 python-simplejson) 13 | register: output 14 | changed_when: output.stdout != "" 15 | tags: always 16 | when: 17 | - ansible_version.full is version_compare('2.8', '<') 18 | - ( ansible_python_interpreter is not defined or ansible_python_interpreter == "/usr/bin/python" ) 19 | # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required) 20 | ignore_errors: true 21 | ## reason for ignore_errors: true 22 | ## "version_compare" was replaced with "version" starting ansible 2.5; 23 | ## CentOS/RHEL 7.x use ansible 2.4, so not able to grasp what version_compare is. 24 | ## Ansible 2.9 removes the version_compare and does not recognize it any longer. 25 | ## As our need is to add python2 only on versions before 2.8, if this fails 26 | ## (due to missing version_compare command), we are fine. 27 | ## We do not cover cases where it fails due to other reasons, but that is a reasonable risk, 28 | ## and that issue will be captured later in the flow. 29 | 30 | - name: when no python(2/3), install python3(Debian) python2(RedHat) for Ansible>=2.8 # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3 31 | raw: test -e /usr/bin/python || (apt -y update && apt install -y python3-minimal) || (yum install -y python2 python-simplejson) 32 | register: output 33 | changed_when: output.stdout != "" 34 | tags: always 35 | when: 36 | - ansible_version.full is version('2.8', '>=') or ( ansible_python_interpreter is defined and ansible_python_interpreter == "/usr/bin/python3" ) 37 | # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required) 38 | ignore_errors: true 39 | ## reason for ignore_errors: true 40 | ## is similar to the one explained above (complements it) 41 | 42 | - setup: # aka gather_facts 43 | tags: always # required for tags, see ansible issue: #14228 44 | 45 | - name: test min. vars (group_vars/all) are set (ClusterConfiguration and k8s_network_addons_urls) 46 | debug: msg='Make sure min. vars are set in group_vars/all/ (e.g. ClusterConfiguration and k8s_network_addons_urls)' 47 | when: 48 | - ClusterConfiguration is not defined 49 | - JoinConfiguration is not defined 50 | failed_when: 51 | - ClusterConfiguration is not defined 52 | - JoinConfiguration is not defined 53 | tags: always # always check if we have vars in place 54 | 55 | - hosts: secondary-masters 56 | become: yes 57 | become_method: sudo 58 | tags: 59 | - master 60 | - secondary_masters 61 | roles: 62 | - { role: tools, task: reset, tags: [ 'reset', 'master_reset' ] } 63 | - { role: tools, task: weave_reset, tags: [ 'reset', 'master_reset', 'network_reset', 'weave', 'weave_reset' ] } 64 | - { role: common, task: all, tags: [ 'common', 'install', 'common_install', 'master_install'] } 65 | 66 | ## master -> install keepalived on masters (relevat if HA) 67 | - hosts: secondary-masters 68 | become: yes 69 | become_method: sudo 70 | any_errors_fatal: yes 71 | tags: 72 | - master 73 | - install 74 | - ha 75 | - master_install 76 | - secondary_masters 77 | roles: 78 | - role: keepalived 79 | tags: [ 'master', 'install', 'master_install', 'ha', 'keepalived'] 80 | when: 81 | - ( groups['masters'] | length ) > 1 82 | - ( custom.networking.masterha_type | default('vip') ) == 'vip' 83 | 84 | - hosts: secondary-masters 85 | become: yes 86 | become_method: sudo 87 | any_errors_fatal: yes 88 | tags: 89 | - master 90 | - install 91 | - ha 92 | - master_install 93 | - secondary_masters 94 | roles: 95 | - { role: non-primary-master, tags: [ 'secondary-masters', 'master', 'install', 'master_install', 'secondary_masters'] } 96 | 97 | ### For fixes like vsphere's bug, we have to reboot after some more fixes... 98 | #https://github.com/vmware/kubernetes/issues/495 99 | - hosts: mustrebootlist 100 | gather_facts: no 101 | become: yes 102 | become_method: sudo 103 | tags: 104 | - mustrebootlist 105 | - vsphere_bug_fix 106 | - vsphere 107 | roles: 108 | - { role: tools, task: reboot, tags: [ 'reboot_minimal' ], when: "ClusterConfiguration.cloudProvider is defined and ClusterConfiguration.cloudProvider == 'vsphere' and allow_restart | default(False) and vsphere_bug_fix is defined and vsphere_bug_fix" } 109 | 110 | ## Generic Sanity 111 | - hosts: secondary-masters 112 | become: yes 113 | become_method: sudo 114 | tags: 115 | - master 116 | - secondary_masters 117 | pre_tasks: 118 | - name: remove temporary mustreboot temporary group 119 | group: 120 | name: mustrebootlist 121 | state: absent 122 | roles: 123 | - { role: tools, task: cluster_sanity, tags: [ 'cluster_sanity', 'sanity' ] } 124 | - { role: tools, task: postinstall_messages, tags: [ 'cluster_sanity', 'sanity' ] } 125 | 126 | ## to reset/add only some (more) nodes: 127 | ## 1. keep in hosts only: 128 | ## - the master 129 | ## - the affected node (all other nodes should not be there) 130 | ## 2. Have the token defined in the group_vars/all 131 | ## 3. Run using only this/these tag(s): 132 | ## ansible-playbook -i hosts -v site.yml --tags "node" # same with: ansible-playbook -i hosts -v site.yml --tags "node_reset,node_install,cluster_sanity,cluster_info" 133 | 134 | ## To get cluster info/sanity: 135 | ## ansible-playbook -i hosts -v site.yml --tags "cluster_sanity,cluster_info" 136 | -------------------------------------------------------------------------------- /roles/tools/tasks/reset.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #- hosts: all 3 | # gather_facts: False 4 | # become: yes 5 | # become_method: sudo 6 | # tags: 7 | # - reset 8 | # tasks: 9 | 10 | - block: 11 | 12 | - name: stop keepalived for cleanup activities 13 | systemd: name={{ item }} state=stopped 14 | with_items: 15 | - keepalived 16 | tags: 17 | - kubelet 18 | - uninstall 19 | ignore_errors: true 20 | when: 21 | - groups['masters'] | length > 1 22 | - ( custom.networking.masterha_type | default('vip') ) == 'vip' 23 | 24 | # We had to remove it, as it blocks the flow. It also fetches docker.io images and in some setups there is no access to or fails due to limits on docker hub... 25 | # - name: Reset weave network # if it was used 26 | # shell: /usr/local/bin/weave reset --force 27 | # ignore_errors: true 28 | 29 | - name: remove pods NFS mount leftovers; Note you have to collect them from the remote storage (e.g. vsphere datastore) also 30 | shell: umount -f $(mount | grep '/kubelet/pods/' | grep '/volumes/kubernetes.io~nfs' | awk '{print $3}') 31 | tags: 32 | - umount 33 | - nfs_reset 34 | ignore_errors: true 35 | 36 | - name: Reset cluster (kubeadm reset --force --ignore-preflight-errors=all ) 37 | command: /usr/bin/kubeadm reset --force --ignore-preflight-errors=all 38 | ignore_errors: true 39 | # TODO: if cluster is installed, but kubedm is no longer available on the machine, we will not have a reset of cluster... 40 | 41 | - name: Reset cluster (kubeadm reset --force --ignore-preflight-errors=all using --cri-socket loop ) 42 | # command: /usr/bin/kubeadm reset --force --ignore-preflight-errors=all --cri-socket={{ item }} 43 | command: /usr/bin/kubeadm reset --force --ignore-preflight-errors=all --cri-socket={{ InitConfiguration.nodeRegistration.criSocket }} 44 | ignore_errors: true 45 | # with_items: 46 | # - /var/run/dockershim.sock 47 | # - /var/run/crio/crio.sock 48 | # - /var/run/containerd/containerd.sock 49 | # - /var/run/cri-dockerd.sock 50 | 51 | ### Cleaning full /etc/kubernetes/ ; Starting k8s 1.12 behaves better, at some point we will remove this step: 52 | - name: ensure old kubeadm config files were removed 53 | file: state=absent path={{ item }} 54 | with_items: 55 | - /etc/kubernetes/ 56 | #- /etc/kubernetes/kubeadm.conf 57 | #- /etc/kubernetes/kubeadm-master.config 58 | #- /etc/kubernetes/kubeadm-master.conf 59 | #- /etc/kubernetes/cloud-config 60 | 61 | # - name: ensure old /etc/kubernetes/ is removed when full_kube_reinstall is true 62 | # file: state=absent path={{ item }} 63 | # with_items: 64 | # - /etc/kubernetes/ 65 | # #- /var/lib/etcd # there might be cases 66 | # when: full_kube_reinstall is defined and full_kube_reinstall 67 | 68 | - name: ensure old /var/lib/etcd/member is removed 69 | file: state=absent path={{ item }} 70 | with_items: 71 | - /var/lib/etcd/member 72 | when: etcd_clean | default(false) 73 | 74 | - name: systemctl stop kube*.*.slice 75 | shell: 'for i in $(systemctl list-unit-files --no-legend --no-pager -l | grep --color=never -o kube.*\.slice );do echo $i; systemctl stop $i ; done' 76 | tags: 77 | - umount 78 | 79 | - name: Reset cluster (kubeadm reset --force) # starting 1.14 80 | command: /usr/bin/kubeadm reset --force --ignore-preflight-errors=all 81 | ignore_errors: true 82 | # TODO: if cluster is installed, but kubedm is no longer available on the machine, we will not have a reset of cluster... 83 | 84 | - name: stop kubelet and etcd for cleanup activities 85 | systemd: name={{ item }} state=stopped 86 | with_items: 87 | - kubelet 88 | - etcd 89 | tags: 90 | - kubelet 91 | - uninstall 92 | ignore_errors: true 93 | 94 | - name: unhold before reinstall packages 95 | shell: apt-mark unhold {{ item }} 96 | ignore_errors: true 97 | with_items: 98 | - kubeadm 99 | - kubelet 100 | - kubectl 101 | - kubernetes-cni 102 | - cri-tools 103 | when: 104 | - full_kube_reinstall | default (False) 105 | - full_kube_apt_unhold | default (False) 106 | - ansible_os_family == "Debian" 107 | tags: 108 | - kubelet 109 | - uninstall 110 | 111 | - name: Remove before reinstall packages 112 | package: name={{ item }} state=absent 113 | with_items: 114 | - kubeadm 115 | - kubelet 116 | - kubectl 117 | - kubernetes-cni 118 | when: full_kube_reinstall | default (False) #is defined and full_kube_reinstall 119 | tags: 120 | - kubelet 121 | - uninstall 122 | 123 | - name: remove plugins mount leftovers; Note you have to collect them from the remote storage (e.g. vsphere datastore) also 124 | #shell: 'umount $(mount | grep " on /var/lib/kubelet/plugins/kubernetes.io/" | cut -f1 -d" ")' 125 | shell: umount -f $(mount | grep '/kubelet/plugins/kubernetes.io/' | awk '{print $3}') 126 | #shell: 'umount $(mount | grep "/kubelet/plugins/kubernetes.io/" | cut -f1 -d" ")' 127 | tags: 128 | - kubelet 129 | - uninstall 130 | ignore_errors: true 131 | 132 | - name: remove pods mount leftovers; Note you have to collect them from the remote storage (e.g. vsphere datastore) also 133 | shell: umount -f $(mount | grep '/kubelet/pods/' | grep '/volumes/kubernetes.io~' | awk '{print $3}') 134 | tags: 135 | - kubelet 136 | - uninstall 137 | ignore_errors: true 138 | 139 | - name: docker network prune -f 140 | shell: 'docker network prune -f' 141 | 142 | #https://github.com/kubernetes/kubernetes/issues/39557 143 | - name: cni0/cbr0 IP alloction issue 144 | shell: 'rm -rf /var/lib/cni/ /var/lib/kubelet/* /etc/cni/ ; ip link delete cni0; ip link delete cbr0 ; ip link delete flannel.1; ip link delete weave' 145 | ignore_errors: true 146 | tags: 147 | - uninstall 148 | 149 | - name: ipvsadm clear 150 | shell: 'ipvsadm --clear' 151 | ignore_errors: true 152 | tags: 153 | - uninstall 154 | 155 | - name: Reset iptables rules # THIS TASK SHOULD BE REMOVED, is not maintained 156 | shell: iptables-save | awk '/^[*]/ { print $1 } /^:[A-Z]+ [^-]/ { print $1 " ACCEPT" ; } /COMMIT/ { print $0; }' | iptables-restore 157 | when: iptables_reset is defined and iptables_reset 158 | ignore_errors: true 159 | tags: 160 | - uninstall 161 | 162 | #- name: restart kubelet for cleanup activities 163 | # systemd: name={{ item }} state=restarted 164 | # with_items: 165 | # - kubelet 166 | # when: ! (full_kube_reinstall is defined and full_kube_reinstall ) 167 | # tags: 168 | # - kubelet 169 | # - uninstall 170 | # ignore_errors: true 171 | 172 | - name: Remove /etc/systemd/system/kubelet.service.d/20-etcd-service-manager.conf if present from HA etcd setup time (in MasterHA) 173 | file: 174 | path: /etc/systemd/system/kubelet.service.d/20-etcd-service-manager.conf 175 | state: absent 176 | 177 | - name: Remove /etc/sysconfig/kubelet if present 178 | file: 179 | path: /etc/sysconfig/kubelet 180 | state: absent 181 | 182 | tags: 183 | - reset 184 | -------------------------------------------------------------------------------- /docs/popular_helm_charts_cli_deploy.md: -------------------------------------------------------------------------------- 1 | Examples of popular helm charts with their relevant params. 2 | Tested in k8s 10, helm 2.8.2, with persistent volumes and proxy. 3 | 4 | # Test k8s deployment with: 5 | ## Wordpress 6 | ``` 7 | export K8SMASTER=$(hostname -s) 8 | helm delete --purge wordpress || true 9 | helm install --name wordpress --namespace default \ 10 | --set wordpressUsername=admin,wordpressPassword=password \ 11 | --set persistence.size=200Mi \ 12 | --set mariadb.mariadbRootPassword=secretpassword,mariadb.persistence.size=400Mi \ 13 | --set ingress.enabled=true,ingress.hosts[0].name="wordpress.${K8SMASTER}.k8singress.example.com" \ 14 | stable/wordpress 15 | ``` 16 | 17 | # DBs 18 | ## mysql 19 | ``` 20 | export K8SMASTER=$(hostname -s) 21 | helm delete --purge mysql || true 22 | helm install --namespace default --name mysql \ 23 | --set mysqlRootPassword=secretpassword,mysqlUser=my-user,mysqlPassword=my-password,mysqlDatabase=my-database,persistence.size=400Mi \ 24 | stable/mysql 25 | ``` 26 | 27 | ## MongoDB 28 | ``` 29 | export K8SMASTER=$(hostname -s) 30 | helm delete --purge mongodb || true 31 | helm install --name mongodb --namespace mongodb \ 32 | --set mongodbRootPassword=secretpassword,mongodbUsername=my-user,mongodbPassword=my-password,mongodbDatabase=my-database \ 33 | --set persistence.enabled=True,persistence.size=500Mi \ 34 | stable/mongodb 35 | ``` 36 | 37 | ## PostgreSQL 38 | ``` 39 | export K8SMASTER=$(hostname -s) 40 | helm delete --purge postgresql || true 41 | helm install --name postgresql --namespace default \ 42 | --set postgresUser=my-user,postgresPassword=secretpassword,postgresDatabase=my-database \ 43 | --set persistence.enabled=True,persistence.size=300Mi \ 44 | stable/postgresql 45 | ``` 46 | 47 | # Monitoring 48 | ## Prometheus 49 | ``` 50 | export K8SMASTER=$(hostname -s) 51 | helm delete --purge prometheus || true 52 | helm install --name prometheus --namespace infra \ 53 | --set rbac.create=True \ 54 | --set alertmanager.ingress.enabled=True,alertmanager.ingress.hosts[0]=alertmanager.${K8SMASTER}.k8singress.example.com \ 55 | --set alertmanager.persistentVolume.enabled=true,alertmanager.persistentVolume.size=300Mi \ 56 | --set server.ingress.enabled=True,server.ingress.hosts[0]=prometheus.${K8SMASTER}.k8singress.example.com \ 57 | --set server.persistentVolume.enabled=True,server.persistentVolume.size=400Mi \ 58 | --set pushgateway.ingress.enabled=True,pushgateway.ingress.hosts[0]=pushgateway.${K8SMASTER}.k8singress.example.com \ 59 | stable/prometheus 60 | ``` 61 | 62 | ## Grafana (resource intensive/cron jobs) 63 | ``` 64 | export K8SMASTER=$(hostname -s) 65 | helm delete --purge grafana || true 66 | helm install --name grafana --namespace infra \ 67 | --set adminPassword=my-password \ 68 | --set persistence.enabled=True,persistence.size=200Mi,persistence.accessModes[0]=ReadWriteOnce \ 69 | --set ingress.enabled=True,ingress.hosts[0]=grafana.${K8SMASTER}.k8singress.example.com \ 70 | --set datasources.datasources\\.yaml.apiVersion=1 \ 71 | --set datasources.datasources\\.yaml.datasources[0].name=prometheus \ 72 | --set datasources.datasources\\.yaml.datasources[0].type=prometheus \ 73 | --set datasources.datasources\\.yaml.datasources[0].url="http://prometheus-server.infra.svc.cluster.local" \ 74 | --set datasources.datasources\\.yaml.datasources[0].isDefault=true \ 75 | --set datasources.datasources\\.yaml.datasources[0].access=proxy \ 76 | --set datasources.datasources\\.yaml.datasources[1].name=prometheus_direct \ 77 | --set datasources.datasources\\.yaml.datasources[1].type=prometheus \ 78 | --set datasources.datasources\\.yaml.datasources[1].url="http://prometheus.${K8SMASTER}.k8singress.example.com" \ 79 | --set datasources.datasources\\.yaml.datasources[1].isDefault=false \ 80 | --set datasources.datasources\\.yaml.datasources[1].access=direct \ 81 | stable/grafana 82 | ``` 83 | 84 | # Others 85 | ## chartmuseum 86 | TBD: The chart does not provide a way to add proxy curently 87 | ``` 88 | export K8SMASTER=$(hostname -s) 89 | helm delete --purge chartmuseum || true 90 | helm install --name chartmuseum --namespace infra \ 91 | --set persistence.enabled=True,persistence.storageClass="",persistence.size=100Mi \ 92 | --set ingress.enabled=True \ 93 | --set ingress.hosts.chartmuseum\\.${K8SMASTER}\\.k8singress\\.example\\.com[0]="/charts" \ 94 | --set ingress.hosts.chartmuseum\\.${K8SMASTER}\\.k8singress\\.example\\.com[1]="/index.yaml" \ 95 | --set ingress.hosts.chartmuseum\\.${K8SMASTER}\\.k8singress\\.example\\.com[2]="/index.yml" \ 96 | stable/chartmuseum 97 | 98 | #### Optionally, install the binary on the unix side to interact with it: 99 | curl -LO https://s3.amazonaws.com/chartmuseum/release/latest/bin/linux/amd64/chartmuseum && chmod +x ./chartmuseum && mv ./chartmuseum /usr/local/bin 100 | ``` 101 | 102 | ## Monocular 103 | TBD: The chart does not provide a way to add proxy curently 104 | ``` 105 | export K8SMASTER=$(hostname -s) 106 | helm delete --purge monocular || true 107 | helm repo add monocular https://kubernetes-helm.github.io/monocular 108 | helm install --name monocular --namespace infra \ 109 | --set ingress.enabled=True,ingress.hosts[0]="monocular.${K8SMASTER}.k8singress.example.com" \ 110 | --set mongodb.persistence.enabled=True,mongodb.persistence.size=400Mi \ 111 | monocular/monocular 112 | ``` 113 | 114 | # CI/CD 115 | ## Jenkins 116 | ``` 117 | export K8SMASTER=$(hostname -s) 118 | helm delete --purge jenkins || true 119 | helm install --name jenkins --namespace infra \ 120 | --set rbac.install=true \ 121 | --set Master.InstallPlugins[0]="kubernetes:1.5.1" \ 122 | --set Master.InstallPlugins[1]="credentials-binding:1.16" \ 123 | --set Master.InstallPlugins[2]="git:3.8.0" \ 124 | --set Master.InstallPlugins[3]="workflow-job:2.18" \ 125 | --set Master.InstallPlugins[4]="workflow-aggregator:2.5" \ 126 | --set Master.InitContainerEnv[0].name=http_proxy,Master.InitContainerEnv[0].value='http://proxy.corp.example.com:8080' \ 127 | --set Master.InitContainerEnv[1].name=https_proxy,Master.InitContainerEnv[1].value='http://proxy.corp.example.com:8080' \ 128 | --set Master.InitContainerEnv[2].name=no_proxy,Master.InitContainerEnv[2].value='localhost\,.svc\,.local\,.example.com' \ 129 | --set Master.ContainerEnv[0].name=http_proxy,Master.ContainerEnv[0].value='http://proxy.corp.example.com:8080' \ 130 | --set Master.ContainerEnv[1].name=https_proxy,Master.ContainerEnv[1].value='http://proxy.corp.example.com:8080' \ 131 | --set Master.ContainerEnv[2].name=no_proxy,Master.ContainerEnv[2].value='localhost\,.svc\,.local\,.example.com' \ 132 | --set Master.JavaOpts="-Dhttp.proxyHost=proxy.corp.example.com -Dhttp.proxyPort=8080 -Dhttps.proxyHost=proxy.corp.example.com -Dhttps.proxyPort=8080 -Dhttp.nonProxyHosts='localhost|*.example.com|*.local|*.svc' -Dhttps.nonProxyHosts='localhost|*.example.com|*.local|*.svc' " \ 133 | --set Master.ServiceType=ClusterIP \ 134 | --set Master.HostName=jenkins.${K8SMASTER}.k8singress.example.com \ 135 | --set Persistence.Enabled=True \ 136 | --set Persistence.Size=1Gi \ 137 | stable/jenkins 138 | echo "Find admin password is:" 139 | printf $(kubectl get secret --namespace infra jenkins -o jsonpath="{.data.jenkins-admin-password}" | base64 --decode);echo 140 | ``` 141 | 142 | ## Nexus 143 | TBD: The chart does not provide a way to add proxy at deploy time (as of now) 144 | ``` 145 | export K8SMASTER=$(hostname -s) 146 | helm delete --purge nexus || true 147 | helm install --name nexus --namespace infra \ 148 | --set docker.enabled=True,docker.host=myregistry.${K8SMASTER}.k8singress.example.com,docker.port=5000 \ 149 | --set persistence.enabled=True,persistence.size=1Gi \ 150 | --set service.type=ClusterIP \ 151 | --set ingress.enabled=True,ingress.hosts[0]="nexus.${K8SMASTER}.k8singress.example.com" \ 152 | stable/sonatype-nexus 153 | ``` 154 | 155 | # Notes: 156 | All sizes are at min. 157 | If proxy is not required, remove the relevant lines 158 | -------------------------------------------------------------------------------- /all_install.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ## Preparations 3 | ## Making sure python exists on all nodes, so Ansible will be able to run: 4 | - hosts: all 5 | gather_facts: no 6 | become: yes 7 | become_method: sudo 8 | pre_tasks: 9 | ## It would be best to have ansible already installed on all machines. 10 | ## But if it is not, we'll try to do it: 11 | - name: when no python2, install python2 for Ansible<2.8 (usually required on ubuntu, which defaults to python3) # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3 12 | raw: test -e /usr/bin/python || (apt -y update && apt install -y python-minimal) || (yum install -y python2 python-simplejson) 13 | register: output 14 | changed_when: output.stdout != "" 15 | tags: always 16 | when: 17 | - ansible_version.full is version_compare('2.8', '<') 18 | - ( ansible_python_interpreter is not defined or ansible_python_interpreter == "/usr/bin/python" ) 19 | # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required) 20 | ignore_errors: true 21 | ## reason for ignore_errors: true 22 | ## "version_compare" was replaced with "version" starting ansible 2.5; 23 | ## CentOS/RHEL 7.x use ansible 2.4, so not able to grasp what version_compare is. 24 | ## Ansible 2.9 removes the version_compare and does not recognize it any longer. 25 | ## As our need is to add python2 only on versions before 2.8, if this fails 26 | ## (due to missing version_compare command), we are fine. 27 | ## We do not cover cases where it fails due to other reasons, but that is a reasonable risk, 28 | ## and that issue will be captured later in the flow. 29 | 30 | - name: when no python(2/3), install python3(Debian) python2(RedHat) for Ansible>=2.8 # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3 31 | raw: test -e /usr/bin/python || (apt -y update && apt install -y python3-minimal) || (yum install -y python2 python-simplejson) 32 | register: output 33 | changed_when: output.stdout != "" 34 | tags: always 35 | when: 36 | - ansible_version.full is version('2.8', '>=') or ( ansible_python_interpreter is defined and ansible_python_interpreter == "/usr/bin/python3" ) 37 | # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required) 38 | ignore_errors: true 39 | ## reason for ignore_errors: true 40 | ## is similar to the one explained above (complements it) 41 | 42 | - setup: # aka gather_facts 43 | tags: always # required for tags, see ansible issue: #14228 44 | 45 | - name: test min. vars (group_vars/all) are set (ClusterConfiguration and k8s_network_addons_urls) 46 | debug: msg='Make sure min. vars are set in group_vars/all/ (e.g. ClusterConfiguration and k8s_network_addons_urls)' 47 | when: 48 | - ClusterConfiguration is not defined 49 | - JoinConfiguration is not defined 50 | failed_when: 51 | - ClusterConfiguration is not defined 52 | - JoinConfiguration is not defined 53 | tags: always # always check if we have vars in place 54 | 55 | ## nodes -> install common part (for all nodes) 56 | - hosts: nodes 57 | become: yes 58 | become_method: sudo 59 | tags: 60 | - node 61 | roles: 62 | - { role: common, task: all, tags: [ 'common', 'install', 'common_install', 'node_install', 'node' ], when: "inventory_hostname not in groups['masters']" } 63 | 64 | ## master -> install common part (for all masters - and sometimes etcd when colocated with masters) 65 | - hosts: masters 66 | become: yes 67 | become_method: sudo 68 | tags: 69 | - master 70 | roles: 71 | - { role: common, task: all, tags: [ 'common', 'install', 'common_install', 'master_install'] } 72 | 73 | ## master -> install keepalived on masters (relevat if HA) 74 | - hosts: masters 75 | become: yes 76 | become_method: sudo 77 | any_errors_fatal: yes 78 | tags: 79 | - master 80 | - install 81 | - ha 82 | - master_install 83 | roles: 84 | - role: keepalived 85 | tags: [ 'master', 'install', 'master_install', 'ha', 'keepalived'] 86 | when: 87 | - ( groups['masters'] | length ) > 1 88 | - ( custom.networking.masterha_type | default('vip') ) == 'vip' 89 | 90 | - hosts: primary-master 91 | name: primary-master (or master in general) - it applies to both ha and non-ha 92 | become: yes 93 | become_method: sudo 94 | any_errors_fatal: yes 95 | tags: 96 | - master 97 | - install 98 | - master_install 99 | - ha 100 | roles: 101 | - { role: primary-master, task: primary, tags: [ 'primary-master', 'master', 'install', 'master_install'] } 102 | 103 | - hosts: secondary-masters 104 | become: yes 105 | become_method: sudo 106 | any_errors_fatal: yes 107 | tags: 108 | - master 109 | - install 110 | - ha 111 | - master_install 112 | roles: 113 | - { role: non-primary-master, tags: [ 'secondary-masters', 'master', 'install', 'master_install', 'secondary_masters'] } 114 | 115 | ## node -> install nodes (kubeadm join, etc) 116 | - hosts: nodes 117 | become: yes 118 | become_method: sudo 119 | any_errors_fatal: yes 120 | tags: 121 | - node 122 | - install 123 | - node_install 124 | roles: 125 | - { role: non-primary-master, tags: [ 'node', 'install', 'node_install'], when: "inventory_hostname not in groups['masters']" } 126 | 127 | ## node -> label nodes (even when master is also a node) 128 | - hosts: nodes 129 | become: yes 130 | become_method: sudo 131 | any_errors_fatal: yes 132 | tags: 133 | - node 134 | - install 135 | - node_install 136 | - label 137 | roles: 138 | - { role: tools, task: labels, tags: [ 'label'] } 139 | 140 | ## Post deploy (network, storage, taints, helm installation, helm charts deploy, any other addons) 141 | - hosts: primary-master 142 | become: yes 143 | become_method: sudo 144 | tags: 145 | - post_deploy 146 | roles: 147 | - { role: post_deploy, task: all, tags: [ 'post_deploy_no_helm' ] } 148 | - { role: storage, task: create_all, tags: [ 'storage', 'rook', 'nfs', 'vsphere' ] } 149 | - { role: helm, task: helm, tags: [ 'helm' ] } 150 | - { role: helm, task: charts_deploy, tags: [ 'helm', 'charts_deploy' ] } 151 | 152 | ### For fixes like vsphere's bug, we have to reboot after some more fixes... 153 | #https://github.com/vmware/kubernetes/issues/495 154 | - hosts: mustrebootlist 155 | gather_facts: no 156 | become: yes 157 | become_method: sudo 158 | tags: 159 | - mustrebootlist 160 | - vsphere_bug_fix 161 | - vsphere 162 | roles: 163 | - { role: tools, task: reboot, tags: [ 'reboot_minimal' ], when: "ClusterConfiguration.cloudProvider is defined and ClusterConfiguration.cloudProvider == 'vsphere' and allow_restart | default(False) and vsphere_bug_fix is defined and vsphere_bug_fix" } 164 | 165 | ## Generic Sanity 166 | - hosts: masters 167 | become: yes 168 | become_method: sudo 169 | tags: 170 | - master 171 | pre_tasks: 172 | - name: remove temporary mustreboot temporary group 173 | group: 174 | name: mustrebootlist 175 | state: absent 176 | roles: 177 | - { role: tools, task: cluster_sanity, tags: [ 'cluster_sanity', 'sanity' ] } 178 | - { role: tools, task: postinstall_messages, tags: [ 'cluster_sanity', 'sanity' ] } 179 | 180 | ## to reset/add only some (more) nodes: 181 | ## 1. keep in hosts only: 182 | ## - the master 183 | ## - the affected node (all other nodes should not be there) 184 | ## 2. Have the token defined in the group_vars/all 185 | ## 3. Run using only this/these tag(s): 186 | ## ansible-playbook -i hosts -v site.yml --tags "node" # same with: ansible-playbook -i hosts -v site.yml --tags "node_reset,node_install,cluster_sanity,cluster_info" 187 | 188 | ## To get cluster info/sanity: 189 | ## ansible-playbook -i hosts -v site.yml --tags "cluster_sanity,cluster_info" 190 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | # All Vagrant configuration is done below. The "2" in Vagrant.configure 5 | # configures the configuration version (we support older styles for 6 | # backwards compatibility). Please don't change it unless you know what 7 | # you're doing. 8 | 9 | $instance_name_prefix = "k8s" 10 | $num_instances = 1 # Number of nodes, excluding master which is always created. 11 | #$custom_networking_dnsDomain = ".ap" # put same value like custom.networking.dnsDomain in ansible's group_vars/all, BUT this time WITH THE DOT in front! 12 | #E.g. ".demo.k8s.ap", 13 | # https://www.virtualbox.org/manual/ch08.html#vboxmanage-natnetwork 14 | #def nat(config) 15 | ### Cannot be used, as the rest of vagrant commands fail... 16 | # config.vm.provider "virtualbox" do |v| 17 | # v.customize ["modifyvm", :id, "--nic1", "bridged", "--bridgeadapter", "enp3s0", "--nictype1", "virtio", "--macaddress1", "auto" ] #, "--nat-network2", "mybridgeinterface", "--nictype1", "virtio"] # 82540EM 18 | # v.customize ["modifyvm", :id, "--nic2", "nat", "--nictype2", "virtio"] 19 | # end 20 | #end 21 | 22 | Vagrant.configure(2) do |config| 23 | # The most common configuration options are documented and commented below. 24 | # For a complete reference, please see the online documentation at 25 | # https://docs.vagrantup.com. 26 | 27 | #config.vm.box_check_update = "false" # If there is no internet access to get new updates 28 | 29 | #config.vm.network "public_network", type: "dhcp", bridge: "enp3s0" 30 | #config.vm.network "public_network" #, :bridge => "enp3s0" #, mac: "auto" #, :adapter=>1 #, use_dhcp_assigned_default_route: true 31 | #config.ssh.port=22 32 | #config.vm.network "public_network", type: "dhcp", :bridge => "enp3s0" 33 | #config.vm.usable_port_range = (2000..2500) 34 | #config.vm.boot_timeout = 90 35 | #config.ssh.insert_key = false 36 | #config.ssh.username = "your_user" 37 | #config.ssh.password = "your_password" 38 | 39 | config.vm.provider "virtualbox" do |vb| 40 | vb.gui = false # Set to true to view the window in graphical mode 41 | vb.memory = "6144" #"4096" #"3072" # 6144 42 | vb.cpus = 4 43 | #vb.customize ["storagectl", :id, "--name", "IDE Controller", "--remove"] # Make sure it does not use IDE 44 | #vb.customize ["storagectl", :id, "--name", "SATA Controller", "--add", "sata"] # Make it use SATA: faster and less issues 45 | # optionally add: , "--hostiocache", "on", "--bootable", "on"] # like here: https://www.virtualbox.org/manual/ch08.html#vboxmanage-storagectl 46 | end 47 | 48 | #### CHOOSE DESIRED OS: 49 | #config.vm.box = "centos/7" 50 | #config.vm.box = "centos/atomic-host" # NEVER TESTED 51 | config.vm.box = "ubuntu/xenial64" 52 | 53 | # NODES: 54 | (1..$num_instances).each do |i| 55 | config.vm.define vm_name = "%s-%02d%s" % [$instance_name_prefix, i, $custom_networking_dnsDomain] do |node| 56 | #node.vm.synced_folder ".vagrant", "/vagrant", type: "rsync" #, rsync__exclude: ".local_only" #rsync__include: ".vagrant/" 57 | #node.vm.box = "centos/7" 58 | #node.vm.box = "centos/atomic-host" 59 | node.vm.hostname = vm_name 60 | #node.ssh.host = vm_name 61 | #node.vm.provision "shell", inline: "echo hello from %s" % [node.vm.hostname] 62 | #node.vm.provision "shell" do |s| 63 | #s.path= "dockerize.sh" # no longer required, handled by ansible 64 | #s.args= "node" 65 | #end 66 | node.vm.provision "shell", inline: <<-SHELL 67 | sudo cp -rf ~vagrant/.ssh ~root/ || true # This will allow us to ssh into root with existing vagrant key 68 | sudo cp -rf ~ubuntu/.ssh ~root/ || true # This will allow us to ssh into root with existing vagrant key 69 | #chmod 755 /vagrant/dockerize.sh 70 | #/vagrant/dockerize.sh 71 | SHELL 72 | #File.open("ssh_config", "w+") { |file| file.write("boo" ) } 73 | end 74 | end 75 | 76 | # MASTER: 77 | config.vm.define vm_name = "%s-master%s" % [$instance_name_prefix, $custom_networking_dnsDomain] , primary: true do |k8smaster| 78 | #k8smaster.vm.synced_folder ".vagrant", "/vagrant", type: "rsync" #, rsync__exclude: ".local_only" #rsync__include: ".vagrant/" 79 | #k8smaster.vm.hostname = "#{k8smaster}" 80 | #k8smaster.vm.hostname = "%s" % [ k8smaster ] 81 | k8smaster.vm.hostname = vm_name 82 | #k8smaster.ssh.host = vm_name 83 | #k8smaster.vm.network "forwarded_port", guest: 80, host: 2080, auto_correct: true 84 | #k8smaster.vm.network "forwarded_port", guest: 443, host: 2443, auto_correct: true 85 | 86 | #k8smaster.vm.provision :shell, inline: "echo hello from %s" % [k8smaster.vm.hostname] 87 | #k8smaster.vm.provision "shell" do |s| 88 | #s.path= "dockerize.sh" # no longer required, handled by ansible 89 | #s.args= "master" 90 | #end 91 | 92 | k8smaster.vm.provision "shell", inline: <<-SHELL 93 | sudo cp -rf ~vagrant/.ssh ~root/ || true # This will allow us to ssh into root with existing vagrant key 94 | sudo cp -rf ~ubuntu/.ssh ~root/ || true # This will allow us to ssh into root with existing vagrant key 95 | #chmod 755 /vagrant/dockerize.sh 96 | #/vagrant/dockerize.sh 97 | # curl -SL https://github.com/ReSearchITEng/kubeadm-playbook/archive/master.tar.gz | tar xvz # already in /vagrant 98 | SHELL 99 | 100 | end 101 | 102 | # Disable automatic box update checking. If you disable this, then 103 | # boxes will only be checked for updates when the user runs 104 | # `vagrant box outdated`. This is not recommended. 105 | # config.vm.box_check_update = false 106 | 107 | # Create a forwarded port mapping which allows access to a specific port 108 | # within the machine from a port on the host machine. In the example below, 109 | # accessing "localhost:8080" will access port 80 on the guest machine. 110 | # config.vm.network "forwarded_port", guest: 80, host: 8080 111 | 112 | # Create a private network, which allows host-only access to the machine 113 | # using a specific IP. 114 | # config.vm.network "private_network", ip: "192.168.33.10" 115 | 116 | # Create a public network, which generally matched to bridged network. 117 | # Bridged networks make the machine appear as another physical device on 118 | # your network. 119 | # config.vm.network "public_network" 120 | 121 | # Share an additional folder to the guest VM. The first argument is 122 | # the path on the host to the actual folder. The second argument is 123 | # the path on the guest to mount the folder. And the optional third 124 | # argument is a set of non-required options. 125 | # config.vm.synced_folder "../data", "/vagrant_data" 126 | 127 | # Provider-specific configuration so you can fine-tune various 128 | # backing providers for Vagrant. These expose provider-specific options. 129 | # Example for VirtualBox: 130 | # 131 | # config.vm.provider "virtualbox" do |vb| 132 | # # Display the VirtualBox GUI when booting the machine 133 | # vb.gui = true 134 | # 135 | # # Customize the amount of memory on the VM: 136 | # vb.memory = "1024" 137 | # end 138 | # 139 | # View the documentation for the provider you are using for more 140 | # information on available options. 141 | 142 | # Define a Vagrant Push strategy for pushing to Atlas. Other push strategies 143 | # such as FTP and Heroku are also available. See the documentation at 144 | # https://docs.vagrantup.com/v2/push/atlas.html for more information. 145 | # config.push.define "atlas" do |push| 146 | # push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME" 147 | # end 148 | 149 | # Enable provisioning with a shell script. Additional provisioners such as 150 | # Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the 151 | # documentation for more information about their specific syntax and use. 152 | # config.vm.provision "shell", inline: <<-SHELL 153 | # sudo apt-get update 154 | # sudo apt-get install -y apache2 155 | # SHELL 156 | end 157 | -------------------------------------------------------------------------------- /group_vars/all/storage.yml: -------------------------------------------------------------------------------- 1 | ################## STORAGE ################ 2 | ############################################# 3 | 4 | ## General Storage settings 5 | ## When reseting a previous instalaltion, should it first remove the exsting pvcs&pvs (default false)? 6 | storage: 7 | delete_pvs: false 8 | 9 | ##### STORAGE OPTION: VMWARE VSPHERE Storage # 10 | ############################################## 11 | ##### Note: This requires the cloud provider settings below: 12 | # ClusterConfiguration.cloudProvider: 'vsphere' 13 | 14 | vsphere_storageclass_urls: 15 | - https://github.com/kubernetes/kubernetes/raw/master/cluster/addons/storage-class/vsphere/default.yaml 16 | #- https://raw.githubusercontent.com/kubernetes/kubernetes/master/examples/volumes/vsphere/vsphere-volume-sc-fast.yaml 17 | 18 | #vsphere_bug_fix github.com/vmware/kubernetes/issues/495 # For k8s 11.x 19 | vsphere_bug_fix: False 20 | 21 | ##### 22 | cloud_config_vsphere_specific: 23 | server: "vcenter.corp.example.com" 24 | secret_name: "vsphere-credentials" #Any name would do 25 | secret_namespace: "kube-system" #kube-system is the usual namespace for such details 26 | username: "user@corp.example.com" # move these to your vault 27 | password: "PASSWORD" # move these to your vault 28 | 29 | cloud_config: | 30 | [Global] 31 | ## Vsphere: 32 | ## One must ensure: 33 | ## - all vms have this enabled: ./govc vm.change -e="disk.enableUUID=1" -vm= 34 | ## - all vms are in the same VCenter 35 | ## - the user below has the following roles at vcenter level: 36 | ## Datastore > Allocate space 37 | ## Datastore > Low level file Operations 38 | ## Virtual Machine > Configuration > Add existing disk 39 | ## Virtual Machine > Configuration > Add or remove device 40 | ## Virtual Machine > Configuration > Remove disk 41 | ## Virtual machine > Configuration > Add new disk 42 | ## Virtual Machine > Inventory > Create new 43 | ## Network > Assign network 44 | ## Resource > Assign virtual machine to resource pool 45 | ## Profile-driven storage -> Profile-driven storage view 46 | insecure-flag = 1 47 | secret-name = "{{ cloud_config_vsphere_specific.secret_name }}" 48 | secret-namespace = "{{ cloud_config_vsphere_specific.secret_namespace }}" 49 | 50 | [VirtualCenter "{{ cloud_config_vsphere_specific.server }}"] 51 | port = 443 52 | datacenters = DC01 53 | 54 | [Workspace] 55 | server = "{{ cloud_config_vsphere_specific.server }}" 56 | datacenter = DC01 57 | default-datastore = DS01 58 | folder = kubernetes # for VRA usually folder name is: VRM 59 | ## Working dir is necessary when your machines are under a directory (and all have to be under the same one) 60 | ##./govc vm.info -vm.dns=machine01 | grep Path #and remove the machine name (last string) 61 | 62 | ## Setup of per machine vm-uuid is usually not required, and it's determined automatically. 63 | #cat /sys/class/dmi/id/product_serial and format like: "4237558d-2231-78b9-e07e-e9028e7cf4a5" 64 | #or: ./govc vm.info -vm.dns=machine01 | grep UUID #(well formated also) 65 | #machine01: vm-uuid="4215e1de-26df-21ec-c79e-2105fe3f9ad1" 66 | #machine02: vm-uuid="4215f1e4-6abd-cff1-1a4c-71ec169d7b11" 67 | [Disk] 68 | #scsicontrollertype = lsilogic-sas 69 | scsicontrollertype = pvscsi 70 | 71 | ##### 72 | 73 | ##### STORAGE OPTION: Self Created NFS ### 74 | ########################################## 75 | ## Creates a nfs server on the master and exports the below path from the master to all cluster 76 | nfs_k8s: #https://github.com/kubernetes/kubernetes/blob/master/examples/volumes/nfs/provisioner/nfs-server-gce-pv.yaml 77 | #https://github.com/kubernetes-incubator/nfs-provisioner 78 | #enabled: "true" 79 | enabled: False 80 | provisioner: nfs.k8s 81 | # Path on the master node: 82 | host_path: /storage/nfs 83 | is_default_class: 'true' # case sensitive! Also: only one class can be default. Note that vpshere thin is also trying to be set as default, choose which one you want as default 84 | wipe: true # When set to true, every reset the files under host_path will be wiped !!! 85 | 86 | ##### STORAGE OPTION: Rook (ceph) ######## 87 | ########################################## 88 | ## Rook - Ceph Distributed Software Storage 89 | ## As per spec section of: https://github.com/rook/rook/blob/master/demo/kubernetes/rook-cluster.yaml 90 | 91 | ## NOTE: rook/ceph is moved to the chart version instead! BELOW is not up to date! 92 | rook: 93 | enabled: false 94 | os_packages: 95 | - jq 96 | reset: 97 | storage_delete: true 98 | ## OLD Installation type, using url. Now we use the helm chart which wraps it. 99 | #operator_url: 100 | # https://github.com/rook/rook/raw/master/demo/kubernetes/rook-operator.yaml 101 | client_tools_url: 102 | - https://github.com/rook/rook/raw/master/demo/kubernetes/rook-client.yaml 103 | - https://github.com/rook/rook/raw/master/demo/kubernetes/rook-tools.yaml 104 | sharedfs: 105 | enabled: false 106 | fs: 107 | - { name: "sharedfs", replication: 2 } #ceph osd pool set sharedfs-data size 2 && ceph osd pool set sharedfs-metadata size 2 108 | allowed_consumer_namespaces: #E.g.: kubectl get secret rook-admin -n rook -o json | jq '.metadata.namespace = "kube-system"' | kubectl apply -f - # as per: https://github.com/rook/rook/blob/master/Documentation/k8s-filesystem.md 109 | - "kube-system" 110 | - "default" 111 | cluster_spec: # as per: https://github.com/rook/rook/blob/master/demo/kubernetes/rook-cluster.yaml and https://github.com/rook/rook/blob/master/Documentation/cluster-tpr.md 112 | versionTag: master-latest 113 | dataDirHostPath: /storage/rook 114 | storage: # cluster level storage configuration and selection 115 | useAllNodes: true 116 | useAllDevices: false 117 | deviceFilter: 118 | metadataDevice: 119 | location: 120 | storeConfig: 121 | storeType: filestore 122 | databaseSizeMB: 1024 # this value can be removed for environments with normal sized disks (100 GB or larger) 123 | journalSizeMB: 1024 # this value can be removed for environments with normal sized disks (20 GB or larger) 124 | ## Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named 125 | ## nodes below will be used as storage resources. Each node's 'name' field should match their 'kubernetes.io/hostname' label. 126 | # nodes: 127 | # - name: "172.17.4.101" 128 | # directories: # specific directores to use for storage can be specified for each node 129 | # - path: "/rook/storage-dir" 130 | # - name: "172.17.4.201" 131 | # devices: # specific devices to use for storage can be specified for each node 132 | # - name: "sdb" 133 | # - name: "sdc" 134 | # storeConfig: # configuration can be specified at the node level which overrides the cluster level config 135 | # storeType: bluestore 136 | # - name: "172.17.4.301" 137 | # deviceFilter: "^sd." 138 | 139 | ## ADVANCED rook options: 140 | rbd: 141 | enabled: true 142 | pool_spec: # as per: https://github.com/rook/rook/blob/master/demo/kubernetes/rook-storageclass.yaml and https://github.com/rook/rook/blob/master/Documentation/pool-tpr.md 143 | replication: 144 | size: 1 145 | ## For an erasure-coded pool, comment out the replication size above and uncomment the following settings. 146 | ## Make sure you have enough OSDs to support the replica size or erasure code chunks. 147 | #erasureCode: 148 | # codingChunks: 2 149 | # dataChunks: 2 150 | 151 | storageclass_parameters: # as per: https://github.com/rook/rook/blob/master/demo/kubernetes/rook-storageclass.yaml 152 | pool: replicapool 153 | ## Specify the Rook cluster from which to create volumes. If not specified, it will use `rook` as the namespace and name of the cluster. 154 | # clusterName: rook 155 | # clusterNamespace: rook 156 | 157 | ##ceph_conf: as per https://github.com/rook/rook/blob/master/Documentation/advanced-configuration.md 158 | #ceph_conf: | 159 | # [global] 160 | # osd crush update on start = false 161 | # osd pool default size = 2 162 | 163 | monitoring: # as per: https://github.com/rook/rook/blob/master/Documentation/k8s-monitoring.md 164 | enabled: true 165 | 166 | ##### 167 | 168 | -------------------------------------------------------------------------------- /group_vars/all/network.yml: -------------------------------------------------------------------------------- 1 | ## HA 2 | CLUSTER_NAME: demok8s # used only for defining the clusterConfiguration and joinConfiguration k8s config, as well as the below dnsDomain and masterha_fqdn 3 | 4 | ## ensure you have the DNS set for wildcard, and pointing all the trafic to master or similar setup 5 | custom: 6 | networking: 7 | dnsDomain: "{{ CLUSTER_NAME }}.{{ CORP_DNS_DOMAIN | default ('corp.example.com') }}" # For MasterHA, if you have dns, put the desired cluster domain here. If no DNS change possible on your side, and you want MasterHA, fix the below 2 values accordinly 8 | 9 | ###### 10 | ## masterha_* params are requried when you have MasterHA (meaning when your inventory has "secondary-masters" section not-empty) 11 | ## Your setup can either use a LoadBalancer (usually a hw one), 12 | ## or use a VIP address which keepalived will manage (move the address from one master to another as needed) 13 | 14 | ## Decide on one of 2 the masterha_types possible: 15 | # "vip" #Choose VIP and you'll have keepalived instaleld and cofigured for the masterha_ip below (default) 16 | # "lb" #Choose lb when you have a MasterHA LB which load-balances across all your masters, on api port (default 6443) 17 | # # Make sure your LB is setup to forward requests to a specific master ONLY when api port /healthz on that host returns status 200 18 | #masterha_type: "vip" # or "lb" 19 | masterha_ip: "192.0.0.171" #| default('') }}" # Important when you have MasterHA; # IP of either your LB or the VIP to be used. 20 | ## masterha_fqdn is usually the dns name of masterha_ip above. (We cannot get it automatically in ansible...) 21 | ## This value is important in order to set apiServerCertSANs in the certs correctly 22 | masterha_fqdn: "master-{{ CLUSTER_NAME }}.{{ CORP_DNS_DOMAIN | default ('corp.example.com') }}" # Important when you have MasterHA, in order to set apiServerCertSANs correctly 23 | #masterha_fqdn: "{{ lookup('dig', masterha_ip, 'qtype=PTR') }}" # but requires some pip modules on host... 24 | 25 | #masterha_bindPort: 6443 #default is 6443; We recommend to keep it 6443. 26 | ### end of masterha topic 27 | 28 | ## When masterha_type is set to "vip", keepalived is deployed automatically. Options to deploy it via linux package (rpm/deb) or using a docker image. 29 | ## if you move from one type to another, please make sure you manually remove the previous setup 30 | ## E.g. moving from package to docker, manually do: systemctl stop keepliaved; systemctl disable keepalived 31 | ## E.g. moving from docker to package, manually do: docker rm -f keepalived 32 | ## Can be either 'docker' or 'package' or 'provided' (when already installed outside of this playbook; this playbook will generate the configuration and check script) 33 | masterha_vip_keepalived_deploy_type: docker 34 | masterha_vip_keepalived_docker_image: osixia/keepalived:2.0.17 # 2.0.17+; older version do not have curl 35 | 36 | ## The right way is to always define machines with FQDN in the inventory file (hosts file) 37 | ## using http proxy for getting to internet (outside of the env) works fine, only by setting nodes with fqdn in inventory, without the below settings 38 | ## Use the below fqdn functionality only if really required (stong understanding of risks)! 39 | ## Also do not mix (some fqdn some short name). 40 | fqdn: # decide where to force use fqdn for non masterha and nodes. When set to false, it will use the name as defined in the inventory file 41 | always: false # makes all the below true 42 | master: true # when true, actions like wait/join will be done against dns name instead of IP 43 | node: false # when true, the join command will have --node-name set to fqdn. When false, k8s will set based on how node machine answers to the hostname command 44 | 45 | ############## THE BELOW SECTION IS NO LONGER RELEVANT, as NETWORK comes via HELM CHARTS (e.g. tigera-operator from calico) 46 | #Define network for K8S services 47 | SERVICE_NETWORK_CIDR: 10.96.0.0/12 48 | 49 | ## Select pod Network. One may add mode simply by adding the deployment url and pod netwrod cidr it needs 50 | ## This section is obsolete. By default calico is installed using its helm charts (see addons.yaml) 51 | #podNetwork: 'calico' 52 | #'flannel' 53 | #'weavenet' 54 | #'calico' 55 | 56 | # flannel: 57 | # - podSubnet: 10.244.0.0/16 58 | # - urls: 59 | # - https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml 60 | # 61 | # calico: 62 | # - podSubnet: 192.168.0.0/16 63 | # - urls: 64 | # - https://docs.projectcalico.org/v3.2/getting-started/kubernetes/installation/hosted/etcd.yaml 65 | # - https://docs.projectcalico.org/v3.2/getting-started/kubernetes/installation/rbac.yaml 66 | # - https://docs.projectcalico.org/v3.2/getting-started/kubernetes/installation/hosted/calico.yaml 67 | # 68 | # weavenet: 69 | # - podSubnet: 10.32.0.0/12 70 | # - urls: 71 | # #- "https://cloud.weave.works/k8s/net?k8s-version={{ClusterConfiguration.kubernetesVersion}}&env.IPALLOC_RANGE={{POD_NETWORK_CIDR | default ('10.32.0.0/12') }}" 72 | # - "https://cloud.weave.works/k8s/net?k8s-version={{ClusterConfiguration.kubernetesVersion}}&env.IPALLOC_RANGE='10.32.0.0/12'" 73 | 74 | POD_NETWORK_CIDR: 10.244.0.0/16 # Exactly this one is required when Flannel network is used. It can be used also for calico which autodetects the range. 75 | #POD_NETWORK_CIDR: '192.168.0.0/16' # Calico is able to autodetect, this should never be required. 76 | #POD_NETWORK_CIDR: '10.32.0.0/12' # Exactly this one is required when Weave network is used (with defaults). If you other network solutions, this entry can be commented out. 77 | 78 | ##### 79 | ## NETWORK 80 | ## usually, it's not possible to have more than one network solution (but projects like "Multus" exist) 81 | ## options: https://kubernetes.io/docs/admin/addons/ 82 | ## Usually choices are: flannel, weavenet, calico 83 | 84 | ## We have moved the networking deploy part of the helm charts (addons.yaml) 85 | ## Should you want to use an overlay network that does not have helm chart, uncomment k8s_network_addons_urls along with of its options: 86 | 87 | #k8s_network_addons_urls: 88 | 89 | ## CALICO # For Calico one has to also ensure above setting ClusterConfiguration.networking.podSubnet is set to 192.168.0.0/16 ) 90 | ## new 2020: as per: https://docs.projectcalico.org/getting-started/kubernetes/self-managed-onprem/onpremises#install-calico-with-kubernetes-api-datastore-50-nodes-or-less 91 | ## "If you are using a different pod CIDR with kubeadm, no changes are required - Calico will automatically detect the CIDR based on the running configuration." 92 | #- https://docs.projectcalico.org/manifests/calico.yaml 93 | # - https://docs.projectcalico.org/v3.2/getting-started/kubernetes/installation/hosted/etcd.yaml 94 | # - https://docs.projectcalico.org/v3.2/getting-started/kubernetes/installation/rbac.yaml 95 | # - https://docs.projectcalico.org/v3.2/getting-started/kubernetes/installation/hosted/calico.yaml 96 | ## Other Calico version (newer, reusing etcd of k8s, but with other limitations, use with care): 97 | # - https://docs.projectcalico.org/v3.2/getting-started/kubernetes/installation/hosted/kubernetes-datastore/calico-networking/1.7/calico.yaml 98 | ## OLDER_CALICO: 99 | # - https://docs.projectcalico.org/v2.6/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml # versions are 2.4,2.5,2.6 100 | 101 | 102 | ## OR 103 | 104 | ## Flanned: (for Flanned one has to also ensure above setting ClusterConfiguration.networking.podSubnet is set to 10.244.0.0/16 ) 105 | ##- https://raw.githubusercontent.com/coreos/flannel/master/Documentation/k8s-manifests/kube-flannel-rbac.yml 106 | #- https://raw.githubusercontent.com/coreos/flannel/v0.9.1/Documentation/kube-flannel.yml # For latest, replace v0.9.0 with master 107 | #- https://raw.githubusercontent.com/coreos/flannel/v0.10.0/Documentation/kube-flannel.yml 108 | # flannel for 1.12 (fixes toleartions, and fix is not in v0.10.0) 109 | #- https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml 110 | 111 | # OR 112 | 113 | ## Weave: #https://www.weave.works/docs/net/latest/kubernetes/kube-addon/ 114 | #- https://cloud.weave.works/k8s/net?k8s-version={{ClusterConfiguration.kubernetesVersion}}&{{POD_NETWORK_CIDR | default ('env.IPALLOC_RANGE=10.32.0.0/12') }} 115 | #- "https://cloud.weave.works/k8s/net?k8s-version={{ClusterConfiguration.kubernetesVersion}}&env.IPALLOC_RANGE={{POD_NETWORK_CIDR | default ('10.32.0.0/12') }}" 116 | 117 | # OR 118 | ## kube-router 119 | #- https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/daemonset/kube-router-all-service-daemonset.yaml 120 | ##### 121 | 122 | -------------------------------------------------------------------------------- /roles/storage/tasks/rook.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # - hosts: master 3 | # become: yes 4 | # become_method: sudo 5 | # tags: 6 | # - rook 7 | # tasks: 8 | 9 | ## rook common: 10 | - block: 11 | - set_fact: 12 | env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin" }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}' 13 | tags: 14 | - always 15 | 16 | - name: k8s cluster pre-installation sanity - check if all current k8s pods are in Running status 17 | environment: 18 | KUBECONFIG: /etc/kubernetes/admin.conf 19 | shell: "kubectl get --all-namespaces pods --no-headers | grep -v -w 'Running' || true " 20 | register: command_result 21 | tags: 22 | - sanity 23 | until: command_result.stdout == "" 24 | retries: 30 25 | delay: 3 26 | changed_when: false 27 | 28 | - name: ensure jq third party exists on the machine (can come from from epel, pip, etc) 29 | shell: jq --version 30 | changed_when: false 31 | 32 | - name: install rook operator (using rook.operator_url) 33 | environment: '{{env_kc}}' 34 | command: /usr/bin/kubectl apply -f {{ item }} 35 | when: rook.operator_url is defined 36 | with_items: 37 | - "{{ rook.operator_url | default ('') }}" 38 | 39 | - name: git clone rook (till charts.rook.io is created) 40 | environment: '{{env_kc}}' 41 | git: 42 | repo: 'https://github.com/rook/rook.git' 43 | dest: /tmp/rook 44 | force: yes 45 | depth: 1 46 | version: master 47 | recursive: no 48 | when: rook.operator_url is defined 49 | 50 | - name: deploy rook operator (using helm chart) - prefered method 51 | environment: '{{env_kc}}' 52 | #command: 'helm install {{ item.repo }} --namespace {{ item.namespace | default("default") }} --name {{ item.name }} {{ item.options | default ("") }}' 53 | #command: 'helm install rook/rook-operator --namespace rook --name rook-operator --set image.pullPolicy=Always ' 54 | command: 'helm install /tmp/rook/demo/helm/rook-operator/ --namespace rook --name rook-operator --set image.pullPolicy=Always ' 55 | when: rook.operator_url is not defined 56 | 57 | - name: rook operator pod sanity 58 | environment: 59 | KUBECONFIG: /etc/kubernetes/admin.conf 60 | shell: "kubectl get --all-namespaces pods --no-headers | grep -v -w 'Running' || true " 61 | register: command_result 62 | tags: 63 | - sanity 64 | until: command_result.stdout == "" 65 | retries: 30 66 | delay: 3 67 | changed_when: false 68 | 69 | - name: wait for rook.io/, Kind=Cluster to be created 70 | environment: 71 | KUBECONFIG: /etc/kubernetes/admin.conf 72 | shell: kubectl get thirdpartyresources --no-headers | grep rook.io | grep -i cluster 73 | register: command_result 74 | tags: 75 | - sanity 76 | until: command_result.stdout != "" 77 | retries: 30 78 | delay: 3 79 | changed_when: false 80 | 81 | # This is still required... 82 | - name: Wait few more seconds for rook.io/, Kind=Cluster to be created 83 | pause: seconds=0 84 | changed_when: false 85 | 86 | - name: prepare rook-cluster.yml file 87 | template: 88 | src: rook-cluster.j2 89 | dest: /tmp/rook-cluster.yml 90 | force: yes 91 | tags: 92 | - rook-cluster 93 | 94 | - name: install rook cluster and create rook namespace 95 | environment: '{{env_kc}}' 96 | command: /usr/bin/kubectl apply -f {{ item }} 97 | with_items: 98 | - /tmp/rook-cluster.yml 99 | tags: 100 | - rook-cluster 101 | 102 | - name: rook cluster deploy sanity - wait for all installed pods to become Running 103 | environment: 104 | KUBECONFIG: /etc/kubernetes/admin.conf 105 | shell: "kubectl get --all-namespaces pods --no-headers | grep -v -w 'Running' || true " 106 | register: command_result 107 | tags: 108 | - sanity 109 | - rook 110 | until: command_result.stdout == "" 111 | retries: 30 112 | delay: 3 113 | changed_when: false 114 | 115 | - name: prepare rook-ceph_conf-ConfigMap.yml file 116 | template: 117 | src: rook_ceph_conf.j2 118 | dest: /tmp/rook_ceph_conf.yml 119 | force: yes 120 | when: rook.ceph_conf is defined 121 | 122 | - name: install rook ceph_conf config map - namespace rook should be available 123 | environment: '{{env_kc}}' 124 | command: /usr/bin/kubectl apply -f {{ item }} 125 | when: rook.ceph_conf is defined 126 | with_items: 127 | - /tmp/rook_ceph_conf.yml 128 | 129 | - name: rook-tools allowed_consumer_namespaces secret injection 130 | environment: 131 | KUBECONFIG: /etc/kubernetes/admin.conf 132 | shell: kubectl get secret rook-admin -n rook -o json | jq '.metadata.namespace = "{{ item }}"' | kubectl apply -f - 133 | when: rook.allowed_consumer_namespaces is defined 134 | with_items: "{{ rook.allowed_consumer_namespaces }}" 135 | 136 | - name: install rook client and tools - aka client_tools_url 137 | environment: '{{env_kc}}' 138 | command: /usr/bin/kubectl apply -f {{ item }} 139 | with_items: "{{ rook.client_tools_url | default ('') }}" 140 | 141 | - name: rook client tools deploy sanity - wait for all installed pods to become Running 142 | environment: 143 | KUBECONFIG: /etc/kubernetes/admin.conf 144 | shell: "kubectl get --all-namespaces pods --no-headers | grep -v -w 'Running' || true " 145 | register: command_result 146 | when: rook.client_tools_url is defined 147 | tags: 148 | - sanity 149 | until: command_result.stdout == "" 150 | retries: 30 151 | delay: 3 152 | 153 | - name: rook cluster deploy full sanity via rook client command rook node ls 154 | environment: 155 | KUBECONFIG: /etc/kubernetes/admin.conf 156 | #kubectl exec -it rook-tools -- bash -c "rook node ls >> /tmp/status.txt" 157 | shell: "kubectl -n rook exec rook-tools -it /usr/bin/rook node ls | tail -n +2 | grep -v ' OK ' || true " 158 | register: command_result 159 | tags: 160 | - sanity 161 | until: command_result.stdout == "" 162 | retries: 30 163 | delay: 3 164 | changed_when: false 165 | when: rook is defined and rook.enabled 166 | tags: 167 | - rook 168 | 169 | ## rook sharedfs: 170 | - block: 171 | - name: rook-client create rook sharedfs 172 | environment: 173 | KUBECONFIG: /etc/kubernetes/admin.conf 174 | shell: kubectl -n rook exec rook-tools -- rook filesystem create --name {{ item.name | default("sharedfs") }} 175 | with_items: "{{ rook.sharedfs.fs | default ('') }}" 176 | 177 | - name: rook-tools set replication(redundancy) for sharedfs data 178 | environment: 179 | KUBECONFIG: /etc/kubernetes/admin.conf 180 | shell: kubectl -n rook exec rook-tools -- ceph osd pool set {{ item.name | default("sharedfs") }}-data size {{ item.replication | default (2) }} 181 | with_items: '{{ rook.sharedfs.fs | default("sharedfs") }}' 182 | 183 | - name: rook-tools set replication(redundancy) for sharedfs metadata 184 | environment: 185 | KUBECONFIG: /etc/kubernetes/admin.conf 186 | shell: kubectl -n rook exec rook-tools -- ceph osd pool set {{ item.name | default("sharedfs") }}-metadata size {{ item.replication | default (2) }} 187 | with_items: "{{ rook.sharedfs.fs | default('sharedfs') }}" 188 | 189 | - name: wait for rook.io/, Kind=Pool to be created 190 | environment: 191 | KUBECONFIG: /etc/kubernetes/admin.conf 192 | shell: kubectl get thirdpartyresources --no-headers | grep rook.io | grep -i pool 193 | register: command_result 194 | tags: 195 | - sanity 196 | until: command_result.stdout != "" 197 | retries: 30 198 | delay: 3 199 | changed_when: false 200 | when: rook is defined and rook.enabled and rook.sharedfs is defined and rook.sharedfs.enabled 201 | tags: 202 | - rook 203 | - rook_sharedfs 204 | 205 | ## rook rbd: 206 | - block: 207 | - name: prepare rook-pool.yml file 208 | template: 209 | src: rook-pool.j2 210 | dest: /tmp/rook-pool.yml 211 | force: yes 212 | tags: 213 | - rbd 214 | when: rook.rbd is defined and rook.rbd.enabled 215 | 216 | - name: install rook rook-pool.yml file 217 | environment: '{{env_kc}}' 218 | command: /usr/bin/kubectl apply -f {{ item }} 219 | with_items: 220 | - /tmp/rook-pool.yml 221 | tags: 222 | - rbd 223 | when: rook.rbd is defined and rook.rbd.enabled 224 | 225 | - name: prepare rook-storageclass.yml file 226 | template: 227 | src: rook-storageclass.j2 228 | dest: /tmp/rook-storageclass.yml 229 | force: yes 230 | tags: 231 | - rbd 232 | when: rook.rbd is defined and rook.rbd.enabled 233 | 234 | - name: install rook rook-storageclass.yml file 235 | environment: '{{env_kc}}' 236 | command: /usr/bin/kubectl apply -f {{ item }} 237 | when: rook.rbd is defined and rook.rbd.enabled 238 | with_items: 239 | - /tmp/rook-storageclass.yml 240 | tags: 241 | - rbd 242 | 243 | # Block ends. Its condition was: 244 | when: rook is defined and rook.enabled and rook.rbd is defined and rook.rbd.enabled 245 | tags: 246 | - rook 247 | - rook_rbd 248 | 249 | 250 | 251 | 252 | -------------------------------------------------------------------------------- /site.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ## Preparations 3 | ## Making sure python exists on all nodes, so Ansible will be able to run; make sure min vars are defined 4 | - hosts: all 5 | gather_facts: true 6 | become: true 7 | become_method: sudo 8 | pre_tasks: 9 | ## It would be best to have ansible already installed on all machines. 10 | ## But if it is not, we'll try to do it: 11 | - name: when no python2, install python2 for Ansible<2.8 (usually required on ubuntu, which defaults to python3) # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3 12 | ansible.builtin.raw: test -e /usr/bin/python || (apt -y update && apt install -y python-minimal) || (yum install -y python2 python-simplejson) 13 | register: output 14 | changed_when: output.stdout != "" 15 | tags: always 16 | when: 17 | - ansible_version.full is version_compare('2.8', '<') 18 | - ( ansible_python_interpreter is not defined or ansible_python_interpreter == "/usr/bin/python" ) 19 | # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required) 20 | ignore_errors: true 21 | ## reason for ignore_errors: true 22 | ## "version_compare" was replaced with "version" starting ansible 2.5; 23 | ## CentOS/RHEL 7.x use ansible 2.4, so not able to grasp what version_compare is. 24 | ## Ansible 2.9 removes the version_compare and does not recognize it any longer. 25 | ## As our need is to add python2 only on versions before 2.8, if this fails 26 | ## (due to missing version_compare command), we are fine. 27 | ## We do not cover cases where it fails due to other reasons, but that is a reasonable risk, 28 | ## and that issue will be captured later in the flow. 29 | 30 | - name: when no python(2/3), install python3(Debian) python2(RedHat) for Ansible>=2.8 # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3 31 | ansible.builtin.raw: test -e /usr/bin/python3 || (apt -y update && apt install -y python3-minimal) || (yum install -y python3 python-simplejson) 32 | register: output 33 | changed_when: output.stdout != "" 34 | tags: always 35 | when: 36 | - ansible_version.full is version('2.8', '>=') or ( ansible_python_interpreter is defined and ansible_python_interpreter == "/usr/bin/python3" ) 37 | # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required) 38 | ignore_errors: true 39 | ## reason for ignore_errors: true 40 | ## is similar to the one explained above (complements it) 41 | 42 | - name: Make sure proxy_env map is defined, even if not required; must be a map, e.g. empty map 43 | ansible.builtin.set_fact: 44 | proxy_env: '{{ proxy_env |default({}) }}' 45 | cacheable: yes 46 | tags: always 47 | when: proxy_env is not defined 48 | 49 | - ansible.builtin.setup: # aka gather_facts 50 | tags: always # required for tags, see ansible issue: #14228 51 | 52 | - name: test min. vars (group_vars/all) are set, like ClusterConfiguration (and k8s_network_addons_urls if netw is not via helm chart) 53 | ansible.builtin.debug: msg='Make sure min. vars (group_vars/all) are set, like ClusterConfiguration (and k8s_network_addons_urls if netw is not via helm chart)' 54 | when: 55 | - ClusterConfiguration is not defined 56 | - JoinConfiguration is not defined 57 | failed_when: 58 | - ClusterConfiguration is not defined 59 | - JoinConfiguration is not defined 60 | tags: always # always check if we have vars in place 61 | 62 | ## proper reset of any previous cluster (if any) 63 | - hosts: primary-master 64 | become: true 65 | become_method: sudo 66 | tags: 67 | - reset 68 | - master 69 | roles: 70 | #- { role: helm, task: helm_reset, tags: [ 'reset', 'helm_reset' ] } # in helm3 is no longer required 71 | - { role: storage, task: remove_pvs, tags: [ 'reset', 'storage_reset', 'pvs_reset' ] } 72 | - { role: storage, task: nfs_reset, tags: [ 'reset', 'storage_reset', 'nfs_reset' ] } 73 | - { role: storage, task: rook_reset, tags: [ 'reset', 'storage_reset', 'rook_reset' ] } 74 | - { role: tools, task: reset_drain, tags: [ 'reset', 'node_reset', 'drain', 'node_drain' ] } #done on master, affecting nodes 75 | 76 | ## nodes -> reset and install common part (for all nodes) 77 | - hosts: nodes 78 | become: true 79 | become_method: sudo 80 | tags: 81 | - node 82 | roles: 83 | - { role: tools, task: reset, tags: [ 'reset', 'node_reset' ], when: "inventory_hostname not in groups['masters']" } 84 | - { role: tools, task: weave_reset, tags: [ 'reset', 'node_reset', 'network_reset', 'weave_reset', 'weave' ], when: "inventory_hostname not in groups['masters']" } 85 | - { role: common, task: all, tags: [ 'common', 'install', 'common_install', 'node_install', 'node' ], when: "inventory_hostname not in groups['masters']" } 86 | 87 | ## master -> reset and install common part (for all masters - and sometimes etcd when colocated with masters) 88 | - hosts: masters 89 | become: true 90 | become_method: sudo 91 | tags: 92 | - master 93 | roles: 94 | - { role: tools, task: reset, tags: [ 'reset', 'master_reset' ] } 95 | - { role: tools, task: weave_reset, tags: [ 'reset', 'master_reset', 'network_reset', 'weave', 'weave_reset' ] } 96 | - { role: common, task: all, tags: [ 'common', 'install', 'common_install', 'master_install'] } 97 | 98 | ## master -> install keepalived on masters (relevat if HA) 99 | - hosts: masters 100 | become: true 101 | become_method: sudo 102 | any_errors_fatal: true 103 | tags: 104 | - master 105 | - install 106 | - ha 107 | - master_install 108 | roles: 109 | - role: keepalived 110 | tags: [ 'master', 'install', 'master_install', 'ha', 'keepalived'] 111 | when: 112 | - ( groups['masters'] | length ) > 1 113 | - ( custom.networking.masterha_type | default('vip') ) == 'vip' 114 | 115 | - hosts: primary-master 116 | name: primary-master (or master in general) - it applies to both ha and non-ha 117 | become: true 118 | become_method: sudo 119 | any_errors_fatal: true 120 | tags: 121 | - master 122 | - install 123 | - master_install 124 | - ha 125 | roles: 126 | - { role: primary-master, task: primary, tags: [ 'primary-master', 'master', 'install', 'master_install'] } 127 | 128 | ## secondary-masters -> install secondary masters 129 | - hosts: secondary-masters 130 | become: true 131 | become_method: sudo 132 | any_errors_fatal: true 133 | tags: 134 | - master 135 | - install 136 | - ha 137 | - master_install 138 | roles: 139 | - { role: non-primary-master, tags: [ 'secondary-masters', 'master', 'install', 'master_install', 'secondary_masters'] } 140 | 141 | ## node -> install nodes (kubeadm join, etc) 142 | - hosts: nodes 143 | become: true 144 | become_method: sudo 145 | any_errors_fatal: true 146 | tags: 147 | - node 148 | - install 149 | - node_install 150 | roles: 151 | - { role: non-primary-master, tags: [ 'node', 'install', 'node_install'], when: "inventory_hostname not in groups['masters']" } 152 | 153 | ## node -> label nodes (even when master is also a node) 154 | - hosts: nodes 155 | become: true 156 | become_method: sudo 157 | any_errors_fatal: true 158 | tags: 159 | - node 160 | - install 161 | - node_install 162 | - label 163 | roles: 164 | - { role: tools, task: labels, tags: [ 'label'] } 165 | 166 | ## Post deploy (network, storage, taints, helm installation, helm charts deploy, any other addons) 167 | - hosts: primary-master 168 | become: true 169 | become_method: sudo 170 | tags: 171 | - post_deploy 172 | roles: 173 | - { role: tools, task: labels, tags: [ 'label'] } 174 | - { role: post_deploy, task: all, tags: [ 'post_deploy_no_helm' ] } 175 | - { role: storage, task: create_all, tags: [ 'storage', 'rook', 'nfs', 'vsphere' ] } 176 | - { role: helm, task: helm, tags: [ 'helm' ] } 177 | - { role: helm, task: charts_deploy, tags: [ 'helm', 'charts_deploy' ] } 178 | 179 | ### For fixes like vsphere's bug, we have to reboot after some more fixes... 180 | #https://github.com/vmware/kubernetes/issues/495 181 | - hosts: mustrebootlist 182 | gather_facts: false 183 | become: true 184 | become_method: sudo 185 | tags: 186 | - mustrebootlist 187 | - vsphere_bug_fix 188 | - vsphere 189 | roles: 190 | - { role: tools, task: reboot, tags: [ 'reboot_minimal' ], when: "ClusterConfiguration.cloudProvider is defined and ClusterConfiguration.cloudProvider == 'vsphere' and allow_restart | default(False) and vsphere_bug_fix is defined and vsphere_bug_fix" } 191 | 192 | ## Generic Sanity 193 | - hosts: masters 194 | become: true 195 | become_method: sudo 196 | tags: 197 | - master 198 | pre_tasks: 199 | - name: remove temporary mustreboot temporary group 200 | group: 201 | name: mustrebootlist 202 | state: absent 203 | roles: 204 | - { role: tools, task: cluster_sanity, tags: [ 'cluster_sanity', 'sanity' ] } 205 | - { role: tools, task: postinstall_messages, tags: [ 'cluster_sanity', 'sanity' ] } 206 | 207 | ## to reset/add only some (more) nodes: 208 | ## 1. keep in hosts only: 209 | ## - the master 210 | ## - the affected node (all other nodes should not be there) 211 | ## 2. Have the token defined in the group_vars/all 212 | ## 3. Run using only this/these tag(s): 213 | ## ansible-playbook -i hosts -v site.yml --tags "node" # same with: ansible-playbook -i hosts -v site.yml --tags "node_reset,node_install,cluster_sanity,cluster_info" 214 | 215 | ## To get cluster info/sanity: 216 | ## ansible-playbook -i hosts -v site.yml --tags "cluster_sanity,cluster_info" 217 | -------------------------------------------------------------------------------- /roles/common/tasks/install_k8s_packages.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: find requested k8s major minor vers 3 | set_fact: 4 | k8s_major: "{{ KUBERNETES_VERSION.split('.').0 }}" 5 | k8s_minor: "{{ KUBERNETES_VERSION.split('.').1 }}" 6 | ### - "KUBERNETES_VERSION is version_compare ('1.27', 'lt')" 7 | # Debian based (includes Ubuntu) prerequisites for using apt-get from ansible: 8 | - name: Install apt-transport-https 9 | package: name={{ item }} state={{ package_state | default ('present') }} 10 | environment: '{{ proxy_env | default ({}) }}' 11 | with_items: 12 | - apt-transport-https 13 | when: ansible_os_family == "Debian" 14 | 15 | # Create deb/yum repos for kubernetes packages (kube*, cni, etc.) 16 | - block: 17 | # Debian based (includes Ubuntu) 18 | - apt_key: 19 | url: "{{ PKGS_K8S_IO_CORE }}:/stable:/v{{k8s_major}}.{{k8s_minor}}/deb/Release.key" 20 | state: present 21 | environment: '{{ proxy_env | default ({}) }}' 22 | when: ansible_os_family == "Debian" 23 | 24 | - apt_repository: 25 | repo: "deb {{ PKGS_K8S_IO_CORE }}:/stable:/v{{k8s_major}}.{{k8s_minor}}/deb/ /" 26 | state: present 27 | #filename: 'kubernetes.list' 28 | #if filename not defined, looks filename generated like: pkgs_k8s_io_core_stable_v1_29_deb.list 29 | update_cache: yes 30 | environment: '{{ proxy_env | default ({}) }}' 31 | when: ansible_os_family == "Debian" 32 | 33 | # RedHat based (includes CentOS, RHEL, Fedora, Oracle, etc.) 34 | - name: Create kubernetes yum repository 35 | yum_repository: 36 | name: kubernetes 37 | description: Kubernetes 38 | baseurl: "{{ PKGS_K8S_IO_CORE }}:/stable:/v{{k8s_major}}.{{k8s_minor}}/rpm/" 39 | #http://yum.kubernetes.io/repos/kubernetes-el7-x86_64 40 | gpgcheck: 0 # to allow internal repos also 41 | when: ansible_os_family == "RedHat" # and HOST_ARCH == "amd64" 42 | 43 | - name: add proxy for the repo 44 | ini_file: 45 | dest: /etc/yum.repos.d/kubernetes.repo 46 | section: "{{item}}" 47 | option: proxy 48 | value: "{{proxy_env.https_proxy | default ('') }}" 49 | with_items: [ 'kubernetes' ] 50 | when: 51 | - proxy_env is defined 52 | - proxy_env.https_proxy is defined 53 | #- proxy_env.https_proxy | length > 0 54 | - ansible_os_family == "RedHat" 55 | when: kubernetes_repo_create | default('true') 56 | 57 | - name: Clean yum metadata 58 | command: yum clean all 59 | args: 60 | warn: no 61 | when: ansible_os_family == "RedHat" and package_state is defined and package_state == "latest" 62 | 63 | - name: apt-get clean metadata 64 | command: apt-get clean ; apt-file purge 65 | args: 66 | warn: no 67 | when: ansible_os_family == "Debian" and package_state is defined and package_state == "latest" 68 | 69 | # End OS dependent repo setup 70 | 71 | ### socat 72 | - name: Ansible check /usr/bin/socat exists 73 | stat: 74 | path: /usr/bin/socat 75 | register: statsocat 76 | 77 | - name: Install socat from centos/rhel/ubuntu repo 78 | package: name={{ item }} state={{ package_state | default ('present') }} 79 | #environment: '{{ proxy_env | default ({}) }}' 80 | with_items: 81 | - socat 82 | when: statsocat.stat.exists is not defined or statsocat.stat.exists == False 83 | 84 | ### tc (iptables-tc) 85 | - name: Ansible check /usr/sbin/tc exists 86 | stat: 87 | path: /usr/sbin/tc 88 | register: stattc 89 | 90 | - name: Install tc/iproute-tc from centos/rhel/ubuntu repo 91 | package: name={{ item }} state={{ package_state | default ('present') }} 92 | #environment: '{{ proxy_env | default ({}) }}' 93 | with_items: 94 | - iproute-tc 95 | when: 96 | - stattc.stat.exists is not defined or stattc.stat.exists == False 97 | - ansible_os_family == "RedHat" 98 | 99 | - name: Install tc/iproute2 Ubuntu/Debian 100 | package: name={{ item }} state={{ package_state | default ('present') }} 101 | #environment: '{{ proxy_env | default ({}) }}' 102 | with_items: 103 | - iproute2 104 | when: 105 | - stattc.stat.exists is not defined or stattc.stat.exists == False 106 | - ansible_os_family == "Debian" 107 | 108 | ### ipset 109 | - name: Ansible check /usr/sbin/ipset exists 110 | stat: 111 | # ubuntu 18.04 /sbin/ipset, but we skip checking it there to simplify code. Keeping only ubuntu 20.04 and rhel based 112 | path: /usr/sbin/ipset 113 | register: statipset 114 | 115 | - name: Install ipset from centos/rhel/ubuntu repo 116 | package: name={{ item }} state={{ package_state | default ('present') }} 117 | #environment: '{{ proxy_env | default ({}) }}' 118 | with_items: 119 | - ipset 120 | when: statipset.stat.exists is not defined or statipset.stat.exists == False 121 | 122 | ### unhold (debian) 123 | - name: unhold before upgrade/install packages (when on debian) 124 | shell: apt-mark unhold {{ item }} 125 | ignore_errors: true 126 | with_items: 127 | - kubeadm 128 | - kubelet 129 | - kubectl 130 | - kubernetes-cni 131 | - cri-tools 132 | - containernetworking-plugins 133 | when: 134 | - full_kube_apt_unhold | default (False) 135 | - ansible_os_family == "Debian" 136 | - kubelet_version is defined and kubelet_version!='present' 137 | - kubectl_version is defined and kubectl_version!='present' 138 | - kubeadm_version is defined and kubeadm_version!='present' 139 | 140 | - name: make sure there is no package containernetworking-plugins as it conflicts with kubernetes-cni 141 | package: name={{ item }} state=absent 142 | with_items: 143 | - containernetworking-plugins 144 | 145 | ### kubelet 146 | - name: Install kubelet when kubelet_version is not defined 147 | package: name={{ item }} state={{ package_state | default ('present') }} 148 | #environment: '{{ proxy_env | default ({}) }}' 149 | with_items: 150 | - kubelet 151 | when: kubelet_version is not defined 152 | 153 | - name: Install kubelet when Debian and kubelet_version is defined 154 | package: name="{{ item }}={{kubelet_version | regex_replace('v')}}*" state=present force=yes 155 | #environment: '{{ proxy_env | default ({}) }}' 156 | with_items: 157 | - kubelet 158 | when: kubelet_version is defined and ( kubelet_version!='present' or kubelet_version!='latest' ) and ansible_os_family == "Debian" 159 | 160 | - name: Install kubelet when RedHat and kubelet_version is defined 161 | package: name="{{ item }}-{{kubelet_version | regex_replace('v')}}" state=present allow_downgrade=yes 162 | #environment: '{{ proxy_env | default ({}) }}' 163 | with_items: 164 | - kubelet 165 | when: kubelet_version is defined and ( kubelet_version!='present' or kubelet_version!='latest' ) and ansible_os_family == "RedHat" 166 | 167 | - name: Install kubelet when kubelet_version is latest 168 | package: name={{ item }} state={{kubelet_version}} 169 | #environment: '{{ proxy_env | default ({}) }}' 170 | with_items: 171 | - kubelet 172 | when: kubelet_version is defined and ( kubelet_version=='present' or kubelet_version=='latest' ) 173 | 174 | ############## 175 | ### kubectl 176 | - name: Install kubectl when kubectl_version not defined 177 | package: name={{ item }} state={{ package_state | default ('present') }} 178 | #environment: '{{ proxy_env | default ({}) }}' 179 | with_items: 180 | - kubectl 181 | when: kubectl_version is not defined 182 | 183 | - name: Install kubectl when Debian and when kubectl_version is defined # ansible bug 29705 184 | package: name="{{ item }}={{kubectl_version | regex_replace('v')}}*" state=present force=yes 185 | #environment: '{{ proxy_env | default ({}) }}' 186 | with_items: 187 | - kubectl 188 | when: kubectl_version is defined and ( kubectl_version!='present' or kubectl_version!='latest' ) and ansible_os_family == "Debian" 189 | 190 | - name: Install kubectl when RedHat and when kubectl_version is defined 191 | package: name="{{ item }}-{{kubectl_version | regex_replace('v')}}" state=present allow_downgrade=yes 192 | #environment: '{{ proxy_env | default ({}) }}' 193 | with_items: 194 | - kubectl 195 | when: kubectl_version is defined and ( kubectl_version!='present' or kubectl_version!='latest' ) and ansible_os_family == "RedHat" 196 | 197 | - name: Install kubectl when kubectl_version is latest 198 | package: name={{ item }} state={{kubectl_version}} 199 | #environment: '{{ proxy_env | default ({}) }}' 200 | with_items: 201 | - kubectl 202 | when: kubectl_version is defined and ( kubectl_version=='present' or kubectl_version=='latest' ) 203 | 204 | ############# 205 | ### kubeadm 206 | - name: Install kubeadm when kubeadm_version is not defined 207 | package: name={{ item }} state={{ package_state | default ('present') }} 208 | #environment: '{{ proxy_env | default ({}) }}' 209 | with_items: 210 | - kubeadm 211 | when: kubeadm_version is not defined 212 | 213 | - name: Install kubeadm when Debian and kubeadm_version is defined # ansible bug 29705 214 | package: name="{{ item }}={{kubeadm_version | regex_replace('v')}}*" state=present force=yes 215 | #environment: '{{ proxy_env | default ({}) }}' 216 | with_items: 217 | - kubeadm 218 | when: kubeadm_version is defined and ( kubeadm_version!='present' or kubeadm_version!='latest' ) and ansible_os_family == "Debian" 219 | 220 | - name: Install kubeadm when RedHat and kubeadm_version is defined 221 | package: name="{{ item }}-{{kubeadm_version | regex_replace('v')}}" state=present allow_downgrade=yes 222 | #environment: '{{ proxy_env | default ({}) }}' 223 | with_items: 224 | - kubeadm 225 | when: kubeadm_version is defined and ( kubeadm_version!='present' or kubeadm_version!='latest' ) and ansible_os_family == "RedHat" 226 | 227 | - name: Install kubeadm when kubeadm_version is latest 228 | package: name={{ item }} state={{kubeadm_version}} 229 | #environment: '{{ proxy_env | default ({}) }}' 230 | with_items: 231 | - kubeadm 232 | when: kubeadm_version is defined and ( kubeadm_version=='present' or kubeadm_version=='latest' ) 233 | 234 | #- name: Install packages 235 | # package: name={{ item }} state={{ package_state | default ('present') }} 236 | # environment: '{{ proxy_env | default ({}) }}' 237 | # with_items: 238 | #- kubernetes-cni # already installed by kubelet anyway 239 | #- docker # for RH, but could be installed manually, so no checks here. 240 | #- kubeadm 241 | #- docker.io # for ubuntu 242 | 243 | 244 | --------------------------------------------------------------------------------