├── roles
    ├── helm
    │   └── tasks
    │   │   ├── main.yml
    │   │   ├── all.yml
    │   │   ├── helm_reset.yml
    │   │   ├── charts_deploy.yml
    │   │   ├── helm.yml
    │   │   └── deploy_batch.yml
    ├── tools
    │   └── tasks
    │   │   ├── main.yml
    │   │   ├── weave_reset.yml
    │   │   ├── reboot.yml
    │   │   ├── cluster_sanity.yml
    │   │   ├── reset_drain.yml
    │   │   ├── labels.yml
    │   │   ├── postinstall_messages.yml
    │   │   └── reset.yml
    ├── post_deploy
    │   └── tasks
    │   │   ├── main.yml
    │   │   ├── all.yml
    │   │   ├── post_cluster_deploy.yml
    │   │   ├── sanity.yml
    │   │   ├── taints.yml
    │   │   └── network.yml
    ├── storage
    │   ├── tasks
    │   │   ├── main.yml
    │   │   ├── all.yml
    │   │   ├── create_all.yml
    │   │   ├── nfs_reset.yml
    │   │   ├── nfs.yml
    │   │   ├── remove_pvs.yml
    │   │   ├── vsphere.yml
    │   │   ├── rook_reset.yml
    │   │   └── rook.yml
    │   ├── templates
    │   │   ├── rook_ceph_conf.j2
    │   │   ├── rook-storageclass.j2
    │   │   ├── rook-pool.j2
    │   │   ├── rook-cluster.j2
    │   │   └── nfs.j2
    │   └── files
    │   │   └── vsphere_bug_fix.sh
    ├── common
    │   ├── tasks
    │   │   ├── main.yml
    │   │   ├── firewalld.yml
    │   │   ├── swap.yml
    │   │   ├── rook.yml
    │   │   ├── all.yml
    │   │   ├── various.yml
    │   │   ├── ntpd.yml
    │   │   ├── decide_master_name.yml
    │   │   ├── selinux.yml
    │   │   ├── kube_config.yml
    │   │   ├── kernel_modules.yml
    │   │   ├── iptables.yml
    │   │   ├── aliases_completion.yml
    │   │   └── install_k8s_packages.yml
    │   ├── templates
    │   │   └── cloud-config.j2
    │   ├── files
    │   │   └── 90-kubeadm.conf
    │   └── handlers
    │   │   └── main.yml
    ├── primary-master
    │   ├── handlers
    │   │   └── main.yml
    │   └── templates
    │   │   └── cloud-config-vsphere-secret.j2
    ├── non-primary-master
    │   └── handlers
    │   │   └── main.yml
    └── keepalived
    │   ├── templates
    │       ├── check_apiserver.sh.j2
    │       └── keepalived.conf.j2
    │   └── tasks
    │       └── main.yaml
├── _config.yml
├── templates
    ├── kured_profile1.j2
    ├── metallb_profile1.j2
    ├── tigera-operator_profile1.j2
    ├── dashboard_profile1.j2
    ├── cert-manager_profile1.j2
    └── nginx-ingress_profile1.j2
├── .gitignore
├── ansible.cfg.example
├── docs
    ├── architecture.md
    ├── upgrade_cluster.md
    ├── Troubleshooting.md
    ├── add-remove-nodes.md
    ├── portable_machine_setup.md
    ├── PRODUCTION_TIPS.md
    └── popular_helm_charts_cli_deploy.md
├── demo
    ├── demo-svc.yml
    ├── demo-ingress.yml
    ├── demo-claim.yml
    └── demo-pod.yml
├── .gitattributes
├── .github
    └── workflows
    │   ├── greetings.yml
    │   └── stale.yml
├── scripts
    └── test.sh
├── allow-all-all-rbac.yml
├── group_vars
    └── all
    │   ├── JoinConfiguration.yml
    │   ├── InitConfiguration.yml
    │   ├── KubeProxyConfiguration.yml
    │   ├── ClusterConfiguration.yml
    │   ├── KubeletConfiguration.yml
    │   ├── storage.yml
    │   └── network.yml
├── vagrant_known_issues.md
├── LICENSE.md
├── hosts.example
├── other_tools
    ├── k8s_cli_tools.sh
    └── dockerize.sh
├── pre_sanity.yml
├── batch_deploy_serial_non_parallel.yml
├── all_reset.yml
├── only_nodes_only_install.yml
├── only_secondaryMasters_only_install.yml
├── all_install.yml
├── Vagrantfile
└── site.yml


/roles/helm/tasks/main.yml:
--------------------------------------------------------------------------------
1 | - include_tasks: "{{task}}.yml"


--------------------------------------------------------------------------------
/roles/tools/tasks/main.yml:
--------------------------------------------------------------------------------
1 | - include_tasks: "{{task}}.yml"


--------------------------------------------------------------------------------
/roles/post_deploy/tasks/main.yml:
--------------------------------------------------------------------------------
1 | - include_tasks: "{{task}}.yml"


--------------------------------------------------------------------------------
/roles/storage/tasks/main.yml:
--------------------------------------------------------------------------------
1 | - include_tasks: "{{task}}.yml"


--------------------------------------------------------------------------------
/roles/common/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - include_tasks: "{{task}}.yml"
3 | 


--------------------------------------------------------------------------------
/roles/common/templates/cloud-config.j2:
--------------------------------------------------------------------------------
1 | 
2 | {{ cloud_config | indent (0) }}
3 | 


--------------------------------------------------------------------------------
/roles/helm/tasks/all.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - include_tasks: helm_reset.yml
3 | - include_tasks: helm.yml


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman
2 | include:              ["README.md"]
3 | exclude:              ["*.*"]
4 | 


--------------------------------------------------------------------------------
/roles/common/files/90-kubeadm.conf:
--------------------------------------------------------------------------------
1 | net.bridge.bridge-nf-call-iptables = 1
2 | net.bridge.bridge-nf-call-ip6tables = 1
3 | 


--------------------------------------------------------------------------------
/templates/kured_profile1.j2:
--------------------------------------------------------------------------------
1 | extraArgs:
2 |   period: 0h07m0s
3 | image:
4 |   repository: '{{ images_repo | default ("ghcr.io") }}/kubereboot/kured'
5 | 


--------------------------------------------------------------------------------
/roles/post_deploy/tasks/all.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - include_tasks: network.yml
3 | - include_tasks: post_cluster_deploy.yml
4 | - include_tasks: sanity.yml
5 | - include_tasks: taints.yml
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.retry
 2 | *.log
 3 | *.tmp
 4 | test.sh
 5 | tmp.*
 6 | temp.*
 7 | .vagrant
 8 | ansible.cfg
 9 | ssh_config*
10 | /hosts
11 | #/group_vars/all/tmp.yaml
12 | #/group_vars/all/temp.yaml
13 | 


--------------------------------------------------------------------------------
/roles/storage/templates/rook_ceph_conf.j2:
--------------------------------------------------------------------------------
 1 | kind: ConfigMap
 2 | apiVersion: v1
 3 | metadata:
 4 |   name: rook-config-override
 5 |   namespace: rook
 6 | data:
 7 |   config: |
 8 |     {{ rook.ceph_conf | indent(4) }}
 9 | 
10 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/all.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - include_tasks: remove_pvs.yml
3 | - include_tasks: nfs_reset.yml
4 | - include_tasks: nfs.yml
5 | - include_tasks: rook_reset.yml
6 | - include_tasks: rook.yml
7 | - include_tasks: vsphere.yml
8 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/create_all.yml:
--------------------------------------------------------------------------------
1 | ---
2 | #- include_tasks: remove_pvs.yml
3 | #- include_tasks: nfs_reset.yml
4 | - include_tasks: nfs.yml
5 | #- include_tasks: rook_reset.yml
6 | - include_tasks: rook.yml
7 | - include_tasks: vsphere.yml
8 | 


--------------------------------------------------------------------------------
/roles/primary-master/handlers/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Restart docker
 3 |   systemd: name=docker state=restarted enabled=yes
 4 | 
 5 | - name: Reload systemd
 6 |   command: systemctl daemon-reload
 7 | 
 8 | - name: Restart kubelet
 9 |   systemd: name=kubelet state=restarted enabled=yes
10 | 


--------------------------------------------------------------------------------
/roles/non-primary-master/handlers/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Restart docker
 3 |   systemd: name=docker state=restarted enabled=yes
 4 | 
 5 | - name: Reload systemd
 6 |   command: systemctl daemon-reload
 7 | 
 8 | - name: Restart kubelet
 9 |   systemd: name=kubelet state=restarted enabled=yes
10 | 


--------------------------------------------------------------------------------
/ansible.cfg.example:
--------------------------------------------------------------------------------
 1 | [defaults]
 2 | #remote_user=vagrant
 3 | become=true
 4 | become_method=sudo
 5 | stdout_callback = debug
 6 | 
 7 | [ssh_connection]
 8 | ssh_args = -C -o ControlMaster=auto -o ControlPersist=60s -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -F ./ssh_config 
 9 | pipelining = True
10 | 


--------------------------------------------------------------------------------
/docs/architecture.md:
--------------------------------------------------------------------------------
 1 | ## Namespaces:
 2 | ### monitoring
 3 | Holding:
 4 | - prometheus operator
 5 | - metrics-server
 6 | 
 7 | ### cert-manager
 8 | Holding:
 9 | - cert-manager
10 | 
11 | ### kube-system
12 | Holding:
13 | - nginx-controller
14 | - kured
15 | - heapster
16 | - networking (e.g. flannel)
17 | - dashboard
18 | 


--------------------------------------------------------------------------------
/demo/demo-svc.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   labels:
 5 |     app: demo-svc
 6 |   name: demo-svc
 7 |   namespace: default
 8 | spec:
 9 |   ports:
10 |   - port: 80
11 |     protocol: TCP
12 |     targetPort: 80
13 |   selector:
14 |     app: demo
15 |   sessionAffinity: None
16 |   type: NodePort
17 | 


--------------------------------------------------------------------------------
/demo/demo-ingress.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: extensions/v1beta1
 2 | kind: Ingress
 3 | metadata:
 4 |   name: demo
 5 |   namespace: default
 6 | spec:
 7 |   rules:
 8 |     - host: pv.k8s.cloud.corp.example.com
 9 |       http:
10 |         paths:
11 |           - backend:
12 |               serviceName: demo-svc
13 |               servicePort: 80
14 |             path: /
15 | 


--------------------------------------------------------------------------------
/roles/common/tasks/firewalld.yml:
--------------------------------------------------------------------------------
1 | ---
2 |   - name: Disable firewalld (CentOS/RHEL)
3 |     systemd: name=firewalld state=stopped enabled=no
4 |     when:  ansible_os_family == "RedHat"
5 |     ignore_errors: true  # in case it does not exist
6 |     # For developing firewalld friendly solution, check:
7 |     # https://github.com/kubernetes/contrib/tree/master/ansible/roles/
8 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # THIS IS ONLY FOR THE gitattributes REPOSITORY.
 2 | # Handle line endings automatically for files detected as text
 3 | # and leave all files detected as binary untouched.
 4 | * text=auto
 5 | 
 6 | #
 7 | # The above will handle all files NOT found below
 8 | #
 9 | # These files are text and should be normalized (Convert crlf => lf)
10 | *.gitattributes text
11 | .gitignore text
12 | *.md text
13 | *.yml text
14 | *.yaml text
15 | 


--------------------------------------------------------------------------------
/.github/workflows/greetings.yml:
--------------------------------------------------------------------------------
 1 | name: Greetings
 2 | 
 3 | on: [pull_request, issues]
 4 | 
 5 | jobs:
 6 |   greeting:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |     - uses: actions/first-interaction@v1
10 |       with:
11 |         repo-token: ${{ secrets.GITHUB_TOKEN }}
12 |         issue-message: 'Your constructive feedback makes this project stronger! Thank you!'' first issue'
13 |         pr-message: |
14 |           Many thanks! Hope you enjoyed it as much as we did!
15 | 


--------------------------------------------------------------------------------
/demo/demo-claim.yml:
--------------------------------------------------------------------------------
 1 | kind: PersistentVolumeClaim
 2 | apiVersion: v1
 3 | metadata:
 4 |   name: demo-claim
 5 |   #annotations: # When not defined, the default storageClass is used (if any defined)
 6 |     #volume.beta.kubernetes.io/storage-class: rook-block
 7 |     #volume.beta.kubernetes.io/storage-class: thin #vsphere
 8 |     #volume.beta.kubernetes.io/storage-class: nfs.k8s
 9 | spec:
10 |   accessModes:
11 |     - ReadWriteOnce
12 |   resources:
13 |     requests:
14 |       storage: 10Mi
15 | 


--------------------------------------------------------------------------------
/templates/metallb_profile1.j2:
--------------------------------------------------------------------------------
 1 | controller:
 2 |   metrics:
 3 |     enabled: true
 4 |     serviceMonitor:
 5 |       enabled: true
 6 |   nodeSelector:
 7 |     node-role.kubernetes.io/infra: ""
 8 | global:
 9 |   imageRegistry: '{{ images_repo | default ("docker.io") }}'
10 | installCRDs: true
11 | prometheusRule:
12 |   enabled: true
13 | speaker:
14 |   metrics:
15 |     enabled: true
16 |     serviceMonitor:
17 |       enabled: true
18 |   nodeSelector:
19 |     node-role.kubernetes.io/infra: ""
20 | 


--------------------------------------------------------------------------------
/roles/primary-master/templates/cloud-config-vsphere-secret.j2:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Secret
 3 | metadata:
 4 |  name: "{{ cloud_config_vsphere_specific.secret_name }}"
 5 |  namespace: "{{ cloud_config_vsphere_specific.secret_namespace }}"
 6 | type: Opaque
 7 | data:
 8 |    {{ cloud_config_vsphere_specific.server }}.username: {{ cloud_config_vsphere_specific.username | b64encode }}
 9 |    {{ cloud_config_vsphere_specific.server }}.password: {{ cloud_config_vsphere_specific.password | b64encode }}
10 | 
11 | 


--------------------------------------------------------------------------------
/roles/common/tasks/swap.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | ### Handling SWAP (k8s 1.8 expects swapoff or configure kubelet to accept it
 3 | - block:
 4 |   - name: swapoff (prereq k8s 1.8)
 5 |     command: swapoff --all
 6 | 
 7 |   - name: Remove swap from /etc/fstab (when named swap)
 8 |     mount:
 9 |       name: swap
10 |       fstype: swap
11 |       state: absent
12 | 
13 |   - name: Remove swap from /etc/fstab (when named none)
14 |     mount:
15 |       name: none
16 |       fstype: swap
17 |       state: absent
18 |   when: turn_swapoff | default (true)


--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | cd ~/work/
 4 | mv -f ./kubeadm-playbook ./kubeadm-playbook.old || true
 5 | sudo cp -rp ~researchiteng/git/kubeadm-playbook .
 6 | sudo chown -R `id -u`:`id -g` ./kubeadm-playbook
 7 | cd ./kubeadm-playbook
 8 | cp -p .././kubeadm-playbook.old/hosts .
 9 | sed -i 's/myk8s.corp.example.com/ap/' group_vars/all/network.yml
10 | sudo kubeadm reset -f
11 | ansible-playbook -i hosts site.yml
12 | sudo cp -pf /etc/kubernetes/admin.conf ~/.kube/config
13 | sudo chown -R `id -u`:`id -g` ~/.kube/config
14 | 
15 | 


--------------------------------------------------------------------------------
/roles/common/tasks/rook.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | ### Optionally install packages for rook
 3 | - block:
 4 |     - name: Install packages required by rook (ceph) storage
 5 |       package: name={{ item }} state={{ package_state | default ('present') }}
 6 |       with_items:
 7 |       - ceph-common
 8 | 
 9 |     - name: Install packages required by rook (ceph) storage setup (usually required only on masters)
10 |       package: name={{ item }} state={{ package_state | default ('present') }}
11 |       with_items:
12 |       - jq
13 |   when: rook is defined and rook.enabled
14 | 


--------------------------------------------------------------------------------
/roles/storage/templates/rook-storageclass.j2:
--------------------------------------------------------------------------------
 1 | apiVersion: storage.k8s.io/v1
 2 | kind: StorageClass
 3 | metadata:
 4 |    name: rook-block
 5 | provisioner: rook.io/block
 6 | parameters:
 7 |   {{ rook.rbd.storageclass_parameters | to_yaml | indent(2) }}
 8 | 
 9 |   # as per https://github.com/rook/rook/blob/master/demo/kubernetes/rook-storageclass.yaml
10 |   #pool: replicapool
11 |   # Specify the Rook cluster from which to create volumes. If not specified, it will use `rook` as the namespace and name of the cluster.
12 |   # clusterName: rook
13 |   # clusterNamespace: rook
14 | 
15 | 


--------------------------------------------------------------------------------
/allow-all-all-rbac.yml:
--------------------------------------------------------------------------------
 1 | # Create the clusterrole and clusterrolebinding:
 2 | # $ kubectl create -f allow-all-all-rbac.yml
 3 | ---
 4 | kind: ClusterRoleBinding
 5 | apiVersion: rbac.authorization.k8s.io/v1beta1
 6 | metadata:
 7 |   name: cluster-admin-binding
 8 | roleRef:
 9 |   apiGroup: rbac.authorization.k8s.io
10 |   kind: ClusterRole
11 |   name: cluster-admin
12 | subjects:
13 | - kind: ServiceAccount
14 |   name: default
15 |   namespace: kube-system
16 | - kind: ServiceAccount
17 |   name: default
18 |   namespace: ceph
19 | - kind: ServiceAccount
20 |   name: default
21 |   namespace: default
22 | 


--------------------------------------------------------------------------------
/roles/tools/tasks/weave_reset.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | #- hosts: all
 3 | #  become: yes
 4 | #  become_method: sudo
 5 | #  tags:
 6 | #  - weave
 7 | #  - reset
 8 | #  tasks:
 9 | 
10 | #  - name: Copy weave net script
11 | #    environment: '{{ proxy_env | default ({}) }}'
12 | #    get_url: url=https://raw.githubusercontent.com/weaveworks/weave/master/weave dest=/usr/local/bin/weave mode=u+rxw force=yes
13 | #    ignore_errors: true  # Currently there is no way to check if the user is using weave
14 | 
15 | # This is also part of the reset.yml
16 |   - name: Reset weave
17 |     shell: /usr/local/bin/weave reset
18 |     ignore_errors: true
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/roles/common/tasks/all.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | #- include_tasks: reset.yml
 3 | - include_tasks: selinux.yml # THIS MIGHT REBOOT MACHINE!!!
 4 | - include_tasks: install_k8s_packages.yml
 5 | - include_tasks: docker.yml
 6 | - include_tasks: iptables.yml
 7 | - include_tasks: firewalld.yml
 8 | - include_tasks: kube_config.yml
 9 | - include_tasks: swap.yml
10 | - include_tasks: kernel_modules.yml
11 |   when: kernel_modules_setup | default (True)
12 | - include_tasks: ntpd.yml
13 |   when: ntp_setup | default (True)
14 | - include_tasks: rook.yml
15 |   when: rook is defined and rook.enabled | default (False)
16 | - include_tasks: various.yml
17 | - include_tasks: aliases_completion.yml
18 | 


--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | name: Mark stale issues and pull requests
 2 | 
 3 | on:
 4 |   schedule:
 5 |   - cron: "0 23 * * *"
 6 | 
 7 | jobs:
 8 |   stale:
 9 | 
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |     - uses: actions/stale@v1
14 |       with:
15 |         repo-token: ${{ secrets.GITHUB_TOKEN }}
16 |         stale-issue-message: 'Stale issue'
17 |         stale-pr-message: 'Stale pull request'
18 |         stale-issue-label: 'no-issue-activity'
19 |         stale-pr-label: 'no-pr-activity'
20 |         exempt-issue-label: 'enhancement'
21 |         exempt-pr-label: 'awaiting-approval'
22 |         days-before-stale: 30
23 |         days-before-close: 7
24 | 


--------------------------------------------------------------------------------
/demo/demo-pod.yml:
--------------------------------------------------------------------------------
 1 | kind: Pod
 2 | apiVersion: v1
 3 | metadata:
 4 |   name: demo-pod
 5 |   labels:
 6 |     app: demo
 7 | spec:
 8 |   volumes:
 9 |     - name: demo-storage
10 |       persistentVolumeClaim:
11 |        claimName: demo-claim
12 | 
13 |   containers:
14 |     - name: demo-container
15 |       image: nginx
16 |       ports:
17 |         - containerPort: 80
18 |           name: "http-server"
19 |       volumeMounts:
20 |       - mountPath: "/usr/share/nginx/html"
21 |         name: demo-storage
22 |   nodeSelector:
23 |     node-role.kubernetes.io/control-plane: ""
24 |   tolerations:
25 |     - key: "node-role.kubernetes.io/control-plane"
26 |       effect: NoSchedule
27 | 


--------------------------------------------------------------------------------
/roles/storage/templates/rook-pool.j2:
--------------------------------------------------------------------------------
 1 | apiVersion: rook.io/v1alpha1
 2 | kind: Pool
 3 | metadata:
 4 |   name: replicapool
 5 |   namespace: rook
 6 | spec:
 7 |   {{ rook.rbd.pool_spec | to_yaml | indent(2) }}
 8 | 
 9 |   # as per https://github.com/rook/rook/blob/master/demo/kubernetes/rook-storageclass.yaml and https://github.com/rook/rook/blob/master/Documentation/pool-tpr.md
10 |   #replication:
11 |   #  size: 1
12 |   # For an erasure-coded pool, comment out the replication size above and uncomment the following settings.
13 |   # Make sure you have enough OSDs to support the replica size or erasure code chunks.
14 |   #erasureCode:
15 |   #  codingChunks: 2
16 |   #  dataChunks: 2
17 | 


--------------------------------------------------------------------------------
/templates/tigera-operator_profile1.j2:
--------------------------------------------------------------------------------
 1 | calicoctl:
 2 |   image: '{{ images_repo | default ("quay.io") }}/calico/ctl'
 3 | cni:
 4 |   image: '{{ images_repo | default ("quay.io") }}/calico/cni'
 5 | flexvol:
 6 |   image: '{{ images_repo | default ("quay.io") }}/calico/pod2daemon-flexvol'
 7 | installation:
 8 |   registry: '{{ images_repo | default ("docker.io") }}'
 9 | kubeControllers:
10 |   image: '{{ images_repo | default ("quay.io") }}/calico/kube-controllers'
11 | node:
12 |   image: '{{ images_repo | default ("quay.io") }}/calico/node'
13 | tigeraOperator:
14 |   registry: '{{ images_repo | default ("quay.io") }}'
15 | typha:
16 |   image: '{{ images_repo | default ("quay.io") }}/calico/typha'
17 | 
18 | 


--------------------------------------------------------------------------------
/roles/common/tasks/various.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: set hostname with fqdn
 3 |   hostname:
 4 |     name: "{{ inventory_hostname }}"
 5 |   when: set_hostname_to_inventory_hostname | default ( False )
 6 | 
 7 | - name: set hostname with fqdn - sol2
 8 |   shell: hostnamectl set-hostname {{ inventory_hostname }}
 9 |   when: set_hostname_to_inventory_hostname | default ( False )
10 |   # This does not require restart (as per docs)
11 | 
12 | - name: create dir /var/log/journal (so node-problem-detector finds any issues with the nodes, should there be any)
13 |   file:
14 |     path: /var/log/journal
15 |     state: directory
16 |     mode: 0755
17 | 
18 | # Forcing restart of services
19 | #- meta: flush_handlers
20 | 


--------------------------------------------------------------------------------
/roles/tools/tasks/reboot.yml:
--------------------------------------------------------------------------------
 1 | - name: Reboot
 2 |   shell: sleep 2 && /sbin/shutdown -r now
 3 |   async: 1
 4 |   poll: 0
 5 |   ignore_errors: true
 6 |   when: allow_restart | default ( false )
 7 | 
 8 | #- name: Pause till machine is up again
 9 | #  pause:
10 | #    seconds: 30
11 | 
12 | - name: Wait for server come back from restart 
13 |   local_action: wait_for
14 |   args:
15 |     host: "{{ inventory_hostname }}"
16 |     port: 22
17 |     state: started
18 |     delay: 15
19 |     timeout: 180
20 | 
21 | #Starting Ansible 2.3 one can do:
22 | #- name: Wait for system to become reachable # Ansible 2.3+
23 | #  wait_for_connection:
24 | #    timeout: 200
25 | 
26 | #- name: Gather facts for first time after restart
27 | #  setup:
28 | 
29 | 


--------------------------------------------------------------------------------
/roles/post_deploy/tasks/post_cluster_deploy.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # - hosts: master
 3 | #   become: yes
 4 | #   become_method: sudo
 5 | #   tags:
 6 | #   - k8s_addons
 7 | #   tasks:
 8 |   - set_fact:
 9 |       env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/bin"  }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}'
10 |     tags:
11 |     - always
12 | 
13 |   # k8s_addons_urls is usually empty
14 |   - name: Install k8s_addons_urls (with proxy)
15 |     command: kubectl apply -f {{ item }}
16 |     with_items: "{{ k8s_addons_urls | default ('') }}"
17 |     environment: '{{env_kc}}'
18 |     when:
19 |     - k8s_addons_urls is defined
20 |     - k8s_addons_urls | length > 0
21 |     tags:
22 |     - k8s_addons
23 | 


--------------------------------------------------------------------------------
/roles/keepalived/templates/check_apiserver.sh.j2:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | errorExit() {
 4 |     echo "*** $*" 1>&2
 5 |     exit 1
 6 | }
 7 | 
 8 | curl --silent --max-time 2 --insecure https://localhost:{{ InitConfiguration.localAPIEndpoint.bindPort | default (6443) }}/healthz -o /dev/null || errorExit "Error GET https://localhost:{{ InitConfiguration.localAPIEndpoint.bindPort | default (6443) }}/healthz"
 9 | if ip addr | grep -q {{ custom.networking.masterha_ip }}; then
10 |     curl --silent --max-time 2 --insecure https://{{ custom.networking.masterha_ip }}:{{ InitConfiguration.localAPIEndpoint.bindPort | default (6443) }}/healthz -o /dev/null || errorExit "Error GET https://{{ custom.networking.masterha_ip }}:{{ InitConfiguration.localAPIEndpoint.bindPort | default (6443) }}/healthz"
11 | fi
12 | 
13 | 


--------------------------------------------------------------------------------
/docs/upgrade_cluster.md:
--------------------------------------------------------------------------------
 1 | Kubeadm upgrade is pretty clear and simple, there is no need for much automation around it.     
 2 | Mainly run in a loop across all the machines (start with masters):    
 3 | (I would make a backup of /etc/kubernetes folder as first step on each node).    
 4 | https://kubernetes.io/docs/tasks/administer-cluster/kubeadm/kubeadm-upgrade/#upgrading-control-plane-nodes    
 5 | 
 6 | Upgrade only a version at a time (don't jump major versions).     
 7 | (ideally get familiar with the process on another machine before)
 8 | 
 9 | PS:   
10 | The concept of "primary master" is there only part of the install flow, to denote where will be the first set of commands and where we'll run commands like: get join tokens, etc.     
11 | The cluster as such does not have/need such a concept.
12 | 


--------------------------------------------------------------------------------
/roles/post_deploy/tasks/sanity.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 |   - name: Wait few seconds for network to start deploying
 3 |     pause: seconds=3
 4 |     changed_when: false
 5 | 
 6 |   - name: sanity - wait for alls pod to be running (besides kube-dns,coredns, tiller-deploy for now, as it might be forced to a node if master did not allow it due to tains)
 7 |     environment:
 8 |       KUBECONFIG: /etc/kubernetes/admin.conf
 9 |     shell: "kubectl get --namespace kube-system pods --no-headers | grep -v -w 'Running' | grep -v 'kube-dns' | grep -v 'coredns' | grep -v 'tiller-deploy' || true "
10 |     register: command_result
11 |     tags:
12 |     - k8s_network_addons
13 |     - sanity
14 |     until: command_result.stdout == ""
15 |     retries: "{{ RETRIES | default(40) }}"
16 |     delay: 3
17 |     changed_when: false
18 | 


--------------------------------------------------------------------------------
/templates/dashboard_profile1.j2:
--------------------------------------------------------------------------------
 1 | image:
 2 |   repository: '{{ images_repo | default ("docker.io") }}/kubernetesui/dashboard'
 3 | ingress:
 4 |   enabled: true
 5 |   hosts:
 6 |   - 'dashboard.{{ custom.networking.dnsDomain }}'
 7 |   - '{{ custom.networking.masterha_fqdn | default (groups["primary-master"][0]) }}'
 8 |   - '{{ groups["primary-master"][0] }}'
 9 | metricsScraper:
10 |   enabled: true
11 |   image:
12 |     repository: '{{ images_repo | default ("docker.io") }}/kubernetesui/metrics-scraper'
13 | nodeSelector:
14 |   node-role.kubernetes.io/infra: ""
15 | protocolHttp: true
16 | rbac:
17 |   clusterReadOnlyRole: true
18 |   create: true
19 | tolerations:
20 | - effect: NoSchedule
21 |   key: node-role.kubernetes.io/infra
22 | - effect: PreferNoSchedule
23 |   key: node-role.kubernetes.io/infra
24 | 


--------------------------------------------------------------------------------
/roles/keepalived/templates/keepalived.conf.j2:
--------------------------------------------------------------------------------
 1 | ! Configuration File for keepalived
 2 | global_defs {
 3 | 	router_id {{ CLUSTER_NAME }}
 4 | }
 5 | vrrp_script check_apiserver {
 6 |   script "/etc/keepalived/check_apiserver.sh"
 7 |   interval 3
 8 |   weight -2
 9 |   fall 10
10 |   rise 2
11 | }
12 | 
13 | vrrp_instance VI_{{ CLUSTER_NAME }}_1 {
14 | {% if 'primary-master' in group_names %}
15 |     state MASTER
16 | {% else %}
17 |     state BACKUP
18 | {% endif %}
19 |     interface {{ ansible_default_ipv4.interface }}
20 |     virtual_router_id 97
21 | {% if 'primary-master' in group_names %}
22 |     priority 101
23 | {% else %}
24 |     priority 100
25 | {% endif %}
26 |     authentication {
27 |         auth_type PASS
28 |         auth_pass e1{{ CLUSTER_NAME }}483e10ad1d
29 |     }
30 |     virtual_ipaddress {
31 |         {{ custom.networking.masterha_ip }}
32 |     }
33 |     track_script {
34 |         check_apiserver
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/roles/storage/files/vsphere_bug_fix.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | export KUBECONFIG=/etc/kubernetes/admin.conf
 3 | NeedRebootList=""
 4 | for h in $(kubectl get nodes | tail -n +2 | awk '{print $1}'); do
 5 |   uuid=$(kubectl describe node/$h | grep -i UUID | tr '[:upper:]' '[:lower:]' | awk '{print $3}')
 6 |   eval kubectl patch node $h -p \'{\"spec\":{\"providerID\":\"vsphere://${uuid}\"}}\' | grep 'no change' >/dev/null
 7 |   if [[ $? -gt 0 ]]; then
 8 |     kubectl delete node $h  # As per vmware support suggetion: delete node and restart kubelet (see code: https://github.com/kubernetes/kubernetes/blob/v1.14.1/pkg/cloudprovider/providers/vsphere/vsphere.go#L278 )
 9 |     NeedRebootList="$NeedRebootList $h"
10 |   fi
11 | done
12 | if [[ -n $NeedRebootList ]]; then
13 |   echo "$NeedRebootList" | tr ' ' '\n' | tail -n +2
14 | fi
15 | ### NeedRebootList holds the list of machines where there was a change and requrie reboot (or maybe at least kubelet restart)
16 | 
17 | 


--------------------------------------------------------------------------------
/roles/common/handlers/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Restart docker
 3 |   systemd: name=docker state=restarted enabled=yes daemon_reload=yes
 4 | 
 5 | - name: Restart containerd
 6 |   systemd: name=containerd state=restarted enabled=yes daemon_reload=yes
 7 | 
 8 | - name: Restart crio
 9 |   systemd: name=crio state=restarted enabled=yes daemon_reload=yes
10 | 
11 | - name: Restart docker-storage-setup
12 |   systemd: name=docker-storage-setup state=restarted
13 | 
14 | - name: Reload systemd
15 |   command: systemctl daemon-reload
16 | 
17 | - name: Restart kubelet
18 |   systemd: name=kubelet state=restarted enabled=yes daemon_reload=yes
19 | 
20 | #Debian is ntp, RedHat ntpd
21 | #- name: Restart ntpd
22 | #  systemd: name=ntpd state=restarted enabled=yes
23 | 
24 | - name: Restart iptables
25 |   systemd: name=iptables state=restarted enabled=yes
26 | 
27 | - name: Reboot
28 |   shell: sleep 2 && /sbin/shutdown -r now 
29 |   async: 1
30 |   poll: 0
31 |   ignore_errors: true
32 |   when: allow_restart | default ( false )
33 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/nfs_reset.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # - hosts: master
 3 | #   gather_facts: False
 4 | #   become: yes
 5 | #   become_method: sudo
 6 | #   tags:
 7 | #   - reset
 8 | #   - nfs_storage
 9 | #   tasks:
10 | 
11 |   - set_fact:
12 |       env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin"  }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}'
13 |     tags:
14 |     - reset
15 |     - nfs_storage
16 | 
17 |   - name: delete old nfs provisioner
18 |     environment:
19 |       KUBECONFIG: /etc/kubernetes/admin.conf
20 |     shell: "kubectl delete --namespace kube-system -f /tmp/nfs.yml"
21 |     when: nfs_k8s is defined and nfs_k8s.enabled
22 |     tags:
23 |     - reset
24 |     - nfs_storage
25 |     ignore_errors: true
26 | 
27 |   - name: wipe nfs host_path
28 |     file: path={{nfs_k8s.host_path}} state=absent
29 |     when: nfs_k8s is defined and nfs_k8s.enabled and nfs_k8s.wipe
30 |     tags:
31 |     - reset
32 |     - nfs_storage
33 |     ignore_errors: true
34 | 
35 | 


--------------------------------------------------------------------------------
/roles/common/tasks/ntpd.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | ### ntpd:
 3 | - block:
 4 |   - name: ntpd and ntpdate | it is mandatory to have the time from all machines in sync
 5 |     package: state=present name={{ item }}
 6 |     environment: '{{ proxy_env | default ({}) }}'
 7 |     with_items:
 8 |     - ntp
 9 |     register: ntp_output
10 | #    notify: # Debian & RH diff service name...
11 | #    - Restart ntpd
12 | 
13 |   - name: ntpd configuration
14 |     copy: content={{ntp_conf}} dest=/etc/ntp.conf backup=yes
15 |     when: ntp_conf is defined
16 | #    notify: # Debian & RH diff service name...
17 | #    - Restart ntpd
18 | 
19 |   - name: Restart ntpd - RedHat/CentOS
20 |     systemd: name=ntpd state=restarted enabled=yes
21 |     when: ansible_os_family == "RedHat"
22 | 
23 |   - name: Restart ntp - Debian
24 |     systemd: name=ntp state=restarted enabled=yes
25 |     when: ansible_os_family == "Debian"
26 | 
27 |   when: 
28 |   - ntp_setup is defined
29 |   - ntp_setup 
30 |   - ntp_package is defined
31 |   - ntp_package == "ntp"
32 |   tags:
33 |   - ntp
34 | 


--------------------------------------------------------------------------------
/group_vars/all/JoinConfiguration.yml:
--------------------------------------------------------------------------------
 1 | #https://pkg.go.dev/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3?tab=doc#JoinConfiguration
 2 | #check latest api ver here: https://pkg.go.dev/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm?tab=subdirectories
 3 | JoinConfiguration:
 4 |   timeouts:
 5 |     controlPlaneComponentHealthCheck: 8m0s
 6 | #  controlPlane: # define controlPlane section only in secondary-masters; not required in minions.
 7 | #    localAPIEndpoint:
 8 | #      advertiseAddress: <your_address>
 9 | #      bindPort: 6443
10 |   apiVersion: kubeadm.k8s.io/v1beta4
11 |   # caCertPath: /etc/kubernetes/pki/ca.crt
12 |   # discovery:
13 |   #   bootstrapToken:
14 |   #     apiServerEndpoint: 10.1.2.3:6443
15 |   #     token: abcdef.0123456789abcdef
16 |   #     unsafeSkipCAVerification: true
17 |   #   timeout: 5m0s
18 |   #   tlsBootstrapToken: abcdef.0123456789abcdef
19 |   #   file: #either file or tlsBootstrapToken
20 |   kind: JoinConfiguration
21 |   # nodeRegistration:
22 |   #   criSocket: /var/run/dockershim.sock
23 |   #   name: <your_master_node_name>
24 |   # skipPhases: 
25 |   # patches:
26 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/nfs.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | ## NFS
 3 |   - name: install nfs-utils package - RedHat/CentOS
 4 |     package:
 5 |       name: "nfs-utils"
 6 |       state: present
 7 |     when:
 8 |       - nfs_k8s is defined and nfs_k8s.enabled
 9 |       - ansible_os_family == "RedHat"
10 |     tags:
11 |       - nfs_storage
12 | 
13 |   - name: install nfs-common package - Debian
14 |     package:
15 |       name: "nfs-common"
16 |       state: present
17 |     when:
18 |       - nfs_k8s is defined and nfs_k8s.enabled
19 |       - ansible_os_family == "Debian"
20 |     tags:
21 |       - nfs_storage
22 | 
23 |   - name: prepare nfs provisioner
24 |     template:
25 |       src: nfs.j2
26 |       dest: /tmp/nfs.yml
27 |       #backup: yes
28 |       force: yes
29 |     when: nfs_k8s is defined and nfs_k8s.enabled
30 |     tags:
31 |     - nfs_storage
32 | 
33 |   - name: create nfs provisioner
34 |     environment:
35 |       KUBECONFIG: /etc/kubernetes/admin.conf
36 |     shell: "kubectl create --namespace kube-system -f /tmp/nfs.yml"
37 |     when: nfs_k8s is defined and nfs_k8s.enabled
38 |     tags:
39 |     - nfs_storage
40 | 
41 | 


--------------------------------------------------------------------------------
/docs/Troubleshooting.md:
--------------------------------------------------------------------------------
 1 | # Ansible related errors:
 2 | ### forks, number of open files
 3 | ERROR: "ERROR! A worker was found in a dead state"
 4 | REASON: this might appear if you have low limits of number of open files, and your number of hosts in inventory is big.     
 5 | Solution(s):        
 6 | - run this command before starting ansible: `ulimit -Sn $(ulimit -Hn)` to increase the softlimit up to the hard limit (which I suggest 16384 or more)    
 7 | - increase both soft limit and hardlimits (see links below)
 8 | - if `lsof | wc -l` is more than 1/2 of `ulimit -Sn`, you **may** want to reboot the control machine (from where you invoke ansible).(ideally reboot also the target machines if they were not restarted for very long time)    
 9 | - limit number of forks ansible uses, by using the -f1 parameter on the ansible.
10 | Other related resources:
11 | - https://github.com/ansible/ansible/issues/32554
12 | - https://www.whatan00b.com/posts/debugging-a-segfault-from-ansible/
13 | - https://stackoverflow.com/questions/21752067/counting-open-files-per-process
14 | - https://www.tecmint.com/increase-set-open-file-limits-in-linux/
15 | 


--------------------------------------------------------------------------------
/templates/cert-manager_profile1.j2:
--------------------------------------------------------------------------------
 1 | acmesolver:
 2 |   image:
 3 |     repository: '{{ images_repo | default ("quay.io") }}/jetstack/cert-manager-acmesolver'
 4 | cainjector:
 5 |   image:
 6 |     repository: '{{ images_repo | default ("quay.io") }}/jetstack/cert-manager-cainjector'
 7 | http_proxy: "{{proxy_env.http_proxy | default ('') }}"
 8 | https_proxy: "{{proxy_env.https_proxy | default ('') }}"
 9 | image:
10 |   repository: '{{ images_repo | default ("quay.io") }}/jetstack/cert-manager-controller'
11 | installCRDs: true
12 | no_proxy: '{{proxy_env.no_proxy | default ("") | replace(",","\\,") }}'
13 | nodeSelector:
14 |   node-role.kubernetes.io/infra: ""
15 | prometheus:
16 |   servicemonitor:
17 |     enabled: true
18 |     #namespace: monitoring
19 | startupapicheck:
20 |   image:
21 |     repository: '{{ images_repo | default ("quay.io") }}/jetstack/cert-manager-ctl'
22 | tolerations:
23 | - effect: NoSchedule
24 |   key: node-role.kubernetes.io/infra
25 | - effect: PreferNoSchedule
26 |   key: node-role.kubernetes.io/infra
27 | webhook:
28 |   image:
29 |     repository: '{{ images_repo | default ("quay.io") }}/jetstack/cert-manager-webhook'
30 | 


--------------------------------------------------------------------------------
/vagrant_known_issues.md:
--------------------------------------------------------------------------------
 1 | ## Virtualbox bugs (for those using vagrant solution):
 2 | - Issue: After some times it shows on console:
 3 |   		 "kernel:NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! kworker"
 4 |   		 and the vm is no longer responding. It happens on master.
 5 |   Solution: 
 6 |     a) The project already implemented the code to change hdd controller from IDE to SATA.
 7 |   Status: SOLVED
 8 | 
 9 | 
10 | - Issue: at boot time, it says:
11 |   "kernel: piix4_smbus 0000:00:07.0: SMBus base address uninitialized - upgrade BIOS or use force_addr=0xaddr"
12 |   
13 |   Tried: 
14 |   -	vi /etc/default/grub #in the GRUB_CMDLINE_LINUX line, at the end, add:    pci=noacpi acpi=off noapic  
15 | 	#and run: 
16 | 	grub2-mkconfig -o /boot/grub2/grub.cfg
17 | 
18 | 	But did not work.
19 | 
20 |   - change motherboard chipset frp, piix3 to some other version
21 |     But did not work.
22 | 
23 |   - echo -e "\nblacklist i2c_piix4\n" >> /etc/modprobe.d/blacklist.conf  
24 | 	#echo -e "\nintel_powerclamp\n" >> /etc/modprobe.d/blacklist.conf  # did not try
25 | 	and reboot
26 | 	But did not help either (actually block machine login via ssh). Maybe try to put blacklist i2c_piix4 also in /etc/dracut.conf.d/nofloppy.conf's omit_drivers list.
27 | 
28 | 


--------------------------------------------------------------------------------
/roles/storage/templates/rook-cluster.j2:
--------------------------------------------------------------------------------
 1 | apiVersion: rook.io/v1alpha1
 2 | kind: Cluster
 3 | metadata:
 4 |   name: rook
 5 |   namespace: rook
 6 | spec:
 7 |   {{ rook.cluster_spec | to_yaml | indent(2) }}
 8 | 
 9 |   # See more options here: https://github.com/rook/rook/blob/master/demo/kubernetes/rook-cluster.yaml and https://github.com/rook/rook/blob/master/Documentation/cluster-tpr.md
10 |   # Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named
11 |   # nodes below will be used as storage resources.  Each node's 'name' field should match their 'kubernetes.io/hostname' label.
12 |   #    nodes:
13 |   #    - name: "172.17.4.101"
14 |   #     directories:         # specific directores to use for storage can be specified for each node
15 |   #     - path: "/rook/storage-dir"
16 |   #   - name: "172.17.4.201"
17 |   #     devices:             # specific devices to use for storage can be specified for each node
18 |   #     - name: "sdb"
19 |   #     - name: "sdc"
20 |   #     storeConfig:         # configuration can be specified at the node level which overrides the cluster level config
21 |   #       storeType: bluestore
22 |   #   - name: "172.17.4.301"
23 |   #     deviceFilter: "^sd."
24 | 
25 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org>
25 | 


--------------------------------------------------------------------------------
/hosts.example:
--------------------------------------------------------------------------------
 1 | 
 2 | # Use FullyQualifiedDomainNames (FQDN) (e.g. machine1.corp.example.com)
 3 | # make sure `hostname -s` returns indeed short name (e.g. master1)
 4 | # make sure `hostname -f` returns indeed the desired fqdn name (e.g. master1.corp.example.com)
 5 | # normally, ansible_fqdn should return the same as `hostname -f` (otherwise one can reach this: https://github.com/ansible/ansible/issues/38777 )
 6 | # try: `ansible localhost -m setup -a "filter=ansible_fqdn"` and make sure it matches: `hostname -f`
 7 | [primary-master]
 8 | master1.corp.example.com
 9 | 
10 | [secondary-masters]
11 | # If there is only one master, make this section empty
12 | master[2:3].corp.example.com
13 | 
14 | [masters:children]
15 | primary-master
16 | secondary-masters
17 | 
18 | [nodes]
19 | # If there is only one machine both master and node, make this section empty
20 | # Best practice is to have few machines allocated for Prometheus/Ingresses/eventual ELK. 
21 | #      These are usually labeled "infra", and tainted with NoSchedule or at least PreferNoSchedule
22 | #      See "taint_for_label" in group_vars/all/global.yaml
23 | node[1:2].corp.example.com label=node-role.kubernetes.io/infra=
24 | # All other nodes are automatically labeled "compute" and without any taint.
25 | node[3:7].corp.example.com # label=node-role.kubernetes.io/compute=
26 | 
27 | 


--------------------------------------------------------------------------------
/group_vars/all/InitConfiguration.yml:
--------------------------------------------------------------------------------
 1 | #https://pkg.go.dev/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta4?tab=doc#InitConfiguration
 2 | #check latest api ver here: https://pkg.go.dev/k8s.io/kubernetes/cmd/kubeadm/app/apis #/kubeadm?tab=subdirectories
 3 | InitConfiguration:
 4 |   kind: InitConfiguration
 5 |   timeouts:
 6 |     controlPlaneComponentHealthCheck: 8m0s
 7 |   localAPIEndpoint:
 8 |   #  advertiseAddress: 1.2.3.4
 9 |   #  bindPort: 6443
10 |   apiVersion: kubeadm.k8s.io/v1beta4
11 |   # bootstrapTokens:
12 |   # - groups:
13 |   #   - system:bootstrappers:kubeadm:default-node-token
14 |   #   token: abcdef.0123456789abcdef
15 |   #   ttl: 24h0m0s
16 |   #   usages:
17 |   #   - signing
18 |   #   - authentication
19 |   nodeRegistration:
20 |   #   criSocket: unix:///var/run/dockershim.sock
21 |   #   criSocket: unix:///var/run/crio/crio.sock
22 |   #   criSocket: unix:///run/cri-dockerd.sock  # https://github.com/Mirantis/cri-dockerd
23 |     criSocket: unix:///run/cri-dockerd.sock
24 | #   name: <your_master_node_name>
25 |     imagePullSerial: false
26 |     kubeletExtraArgs:
27 |       - name: cgroup-driver
28 |         value: "systemd"
29 |       - name: fail-swap-on
30 |         value: "false"
31 |     taints:
32 |     - effect: NoSchedule
33 |       key: node-role.kubernetes.io/control-plane
34 |     # kubeletExtraArgs:
35 |     #   cgroupDriver: "cgroupfs"
36 | 


--------------------------------------------------------------------------------
/templates/nginx-ingress_profile1.j2:
--------------------------------------------------------------------------------
 1 | controller:
 2 |   admissionWebhooks:
 3 |     enabled: false
 4 |     patch:
 5 |       image:
 6 |         registry: '{{ images_repo | default ("registry.k8s.io") }}'
 7 |   config:
 8 |     hide-headers: Server
 9 |     server-tokens: "false"
10 |   hostNetwork: true
11 |   image:
12 |     registry: '{{ images_repo | default ("registry.k8s.io") }}'
13 |   ingressClassResource:
14 |     default: true
15 |   kind: DaemonSet
16 |   metrics:
17 |     enabled: true
18 |     serviceMonitor:
19 |       additionalLabels:
20 |         monitoring: prometheusoperator
21 |       enabled: false
22 |       #namespace: monitoring
23 |   nodeSelector:
24 |     node-role.kubernetes.io/infra: ""
25 |   service:
26 |     type: ClusterIP
27 |   stats:
28 |     enabled: true
29 |   tolerations:
30 |   - effect: NoSchedule
31 |     key: node-role.kubernetes.io/infra
32 |   - effect: PreferNoSchedule
33 |     key: node-role.kubernetes.io/infra
34 |   watchIngressWithoutClass: true
35 | defaultBackend:
36 |   image:
37 |     image: 'defaultbackend-{{ HOST_ARCH | default ("amd64") }}'
38 | rbac:
39 |   create: true
40 | serviceAccount:
41 |   create: true
42 | 
43 | #https://github.com/kubernetes/ingress-nginx/blob/master/charts/ingress-nginx/Chart.yaml#L5
44 | # PARAMS explained: https://kubernetes.github.io/ingress-nginx/deploy/baremetal/ and https://github.com/kubernetes/ingress-nginx/blob/main/charts/ingress-nginx/values.yaml


--------------------------------------------------------------------------------
/roles/post_deploy/tasks/taints.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 |   - name: tainting as per taint_for_label mapping
 3 |     environment:
 4 |       KUBECONFIG: /etc/kubernetes/admin.conf
 5 |     shell: 'kubectl taint nodes --selector {{ item.label }} {{ item.label }}:{{ item.taint }} --overwrite'
 6 |     with_items: "{{ taint_for_label }}"
 7 |     when:
 8 |     - groups['all'] | length > 1
 9 |     tags:
10 |     - taint
11 | 
12 |   - block:
13 |     - name: get name of primary_master when single node cluster
14 |       environment:
15 |         KUBECONFIG: /etc/kubernetes/admin.conf
16 |       shell: 'kubectl get no -o=jsonpath="{.items[0].metadata.name}"'
17 |       register: result_primary_master_name
18 |   
19 |     - name: when cluster is one machine only, remove NoSchedule taint from master
20 |       ## TODO: Use InitConfiguration to remove the taint on master, with the same condition.
21 |       environment:
22 |         KUBECONFIG: /etc/kubernetes/admin.conf
23 |       shell: 'kubectl taint nodes {{ result_primary_master_name.stdout_lines[0] }} {{ item }} --overwrite'
24 |       with_items: #'{{ taints_master }}'
25 |       - 'node-role.kubernetes.io/control-plane:NoSchedule-'
26 |       - 'node-role.kubernetes.io/control-plane=:PreferNoSchedule'
27 |       - 'node-role.kubernetes.io/infra=:PreferNoSchedule'
28 |       ignore_errors: true
29 |       tags:
30 |       - taints
31 |     when:
32 |     - groups['all'] | length == 1
33 |     tags:
34 |     - taints
35 | 
36 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/remove_pvs.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # - hosts: master
 3 | #   become: yes
 4 | #   become_method: sudo
 5 | #   environment:
 6 | #     KUBECONFIG: /etc/kubernetes/admin.conf
 7 | #   tags:
 8 | #   - reset
 9 | #   - node
10 | #   tasks:
11 | 
12 | - block:
13 |   - name: get all pvcs
14 |     shell: kubectl get --all-namespaces pvc --no-headers -- | awk '{print "--namespace " $1 " pvc/" $2}'
15 |     register: command_results
16 |     ignore_errors: true
17 |     changed_when: false
18 | 
19 |   - name: delete all pvcs
20 |     environment:
21 |       KUBECONFIG: /etc/kubernetes/admin.conf
22 |     command: kubectl delete {{ item }}
23 |     ignore_errors: true
24 |     with_items: "{{command_results.stdout_lines}}"
25 | 
26 |   - name: wait till all pvcs are removed/cleaned
27 |     shell: "kubectl get pvc --no-headers -- || true"
28 |     register: command_result
29 |     until: command_result.stdout == ""
30 |     retries: 10
31 |     delay: 3
32 |     ignore_errors: true
33 |     changed_when: false
34 | 
35 |   - name: wait till all pvs are removed/cleaned
36 |     shell: "kubectl get pv --no-headers -- || true"
37 |     register: command_result
38 |     until: command_result.stdout == ""
39 |     retries: "{{ RETRIES | default(40) }}"
40 |     delay: 3
41 |     ignore_errors: true
42 |     changed_when: false
43 | 
44 |   when: storage.delete_pvs is defined and storage.delete_pvs
45 |   environment:
46 |     KUBECONFIG: /etc/kubernetes/admin.conf
47 |   tags:
48 |   - reset
49 |   
50 | 


--------------------------------------------------------------------------------
/roles/keepalived/tasks/main.yaml:
--------------------------------------------------------------------------------
 1 | #
 2 | # Configure keepalived for the masters.
 3 | #
 4 | ---
 5 | - name: "keepalived from package"
 6 |   block:
 7 |   - name: Install keepalived via package manager
 8 |     package: name=keepalived-{{ KEEPALIVED_VERSION }} state=present
 9 |   
10 |   - name: Enable and start keepalived service
11 |     service: name=keepalived enabled=yes state=restarted
12 |   when: custom.networking.masterha_vip_keepalived_deploy_type == 'package'
13 | 
14 | - name: Copy check script
15 |   template: src=check_apiserver.sh.j2 dest=/etc/keepalived/check_apiserver.sh owner=root group=root mode=0755
16 | 
17 | - name: Generate configuraton file
18 |   template: src=keepalived.conf.j2 dest=/etc/keepalived/keepalived.conf
19 | 
20 | - name: keepalived with docker image
21 |   block:
22 |   - name: cleanup previous keepalived
23 |     shell: docker rm -f keepalived
24 |     ignore_errors: true
25 | 
26 |   - name: Use keepalived from docker image
27 |     shell: docker run --restart Always --name keepalived --env KEEPALIVED_INTERFACE=$(ip route | grep default | head -1 | cut -d' ' -f5) --env KEEPALIVED_PASSWORD='d0cker' --cap-add=NET_ADMIN --net=host --volume /etc/keepalived/keepalived.conf:/container/service/keepalived/assets/keepalived.conf --volume /etc/keepalived/check_apiserver.sh:/etc/keepalived/check_apiserver.sh --detach {{ masterha_vip_keepalived_docker_image | default ("osixia/keepalived:2.0.17") }} --copy-service # --loglevel debug
28 |     register: docker_result
29 | 
30 |   when: custom.networking.masterha_vip_keepalived_deploy_type == 'docker'
31 | 
32 | 


--------------------------------------------------------------------------------
/other_tools/k8s_cli_tools.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | ########################
 3 | ########################
 4 | echo "kubectx"
 5 | 
 6 | sudo git clone -b master --single-branch https://github.com/ahmetb/kubectx.git /opt/kubectx
 7 | sudo ln -s /opt/kubectx/kubectx /usr/local/bin/kubectx
 8 | sudo ln -s /opt/kubectx/kubens /usr/local/bin/kubens
 9 | 
10 | # Bash completions
11 | COMPDIR=$(pkg-config --variable=completionsdir bash-completion)
12 | sudo ln -sf /opt/kubectx/completion/kubens.bash $COMPDIR/kubens
13 | sudo ln -sf /opt/kubectx/completion/kubectx.bash $COMPDIR/kubectx
14 | 
15 | # Zsh completions
16 | mkdir -p ~/.oh-my-zsh/completions
17 | chmod -R 755 ~/.oh-my-zsh/completions
18 | ln -s /opt/kubectx/completion/kubectx.zsh ~/.oh-my-zsh/completions/_kubectx.zsh
19 | ln -s /opt/kubectx/completion/kubens.zsh ~/.oh-my-zsh/completions/_kubens.zsh
20 | 
21 | ########################
22 | ########################
23 | echo "krew (kubectl krew package manager)"
24 | tmpdir="$(mktemp -d)"
25 | cd $tmpdir
26 | curl -fsSLO "https://github.com/kubernetes-sigs/krew/releases/download/v0.3.2/krew.{tar.gz,yaml}"
27 | tar zxvf krew.tar.gz
28 | ./krew-"$(uname | tr '[:upper:]' '[:lower:]')_amd64" install \
29 |   --manifest=krew.yaml --archive=krew.tar.gz
30 | cd -
31 | rm -rf $tmpdir
32 | sudo cp ~/.krew/bin/kubectl-krew /usr/local/bin
33 | 
34 | ########################
35 | ########################
36 | echo "kubeval"
37 | curl -sSL https://github.com/instrumenta/kubeval/releases/download/0.14.0/kubeval-linux-amd64.tar.gz | sudo tar -xzf - -C /usr/local/bin/
38 | sudo chmod +x /usr/local/bin/kubeval
39 | 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/docs/add-remove-nodes.md:
--------------------------------------------------------------------------------
 1 | # Adding nodes (either secondary-masters or infra or compute)
 2 | It's important to understand that both secondary-masters and nodes are treated the same.
 3 | E.g. Adding secondary-masters to existing cluster - it runs the same flow it's actually working even when defined from first run: it does initially the primary-master, and then adds all those in the [secondary-masters]
 4 | the steps to add (either additional master or additional compute nodes)
 5 | 
 6 | Here are the steps to be performed to add nodes post install:
 7 | 1. prepare the hosts file and make sure:
 8 | a. it has the [primary-master] defined
 9 | b. in the other groups it has **ONLY** the machines you want to add (either masters under the [secondary-masters] or nodes under [nodes]
10 | 2. run: `ansible-playbook -i hosts site.yml --tags node`
11 | 
12 | # Removing nodes:
13 | To remove a node, do similarly:
14 | 1. Put in the inventory (hosts file), under [nodes] group only the machines you wish to reset(remove) as well as populate the [primary-master] with the proper primary-master machine.  
15 | 2. `ansible-playbook -i hosts site.yml --tags node`
16 | 
17 | Note: the primary-master won't be touched, but it's required in order to properly drain the nodes before reset).
18 | 
19 | # Removig secondary-masters:
20 | For safety reasons, currently it was decided that only nodes can be removed, while any [master] (being it primary-master or secondary-masters ) won't be automatically removed.    
21 | If you want to remove a machine that is secondary-master, you have to **move** it under [nodes] group, (and remove it from the [secondary-masters] group) - and follow the "Removing nodes" steps above.
22 | 


--------------------------------------------------------------------------------
/roles/common/tasks/decide_master_name.yml:
--------------------------------------------------------------------------------
 1 | ## Decide how to approach the master: inventory or force fqdn (for non MasterHA cases); via masterha_ip,MasterHA (for MasterHA cases)
 2 | ## TODO:
 3 | # if inventory_hostname is already fqdn, do not use ansible_fqdn, as ansible_fqdn is problematic:
 4 | # https://github.com/ReSearchITEng/kubeadm-playbook/issues/81  ( https://github.com/ansible/ansible/issues/38777 )
 5 | - block:
 6 |   - name: by default set master name to inventory definition (no MasterHA case)
 7 |     set_fact: master_name={{ groups['primary-master'][0] }}
 8 |     when:
 9 |     - groups['masters'] | length == 1
10 | 
11 |   - name: force use fqdn for master name (no MasterHA case) if inventory was not defined fqdn and we have to discover...
12 |     set_fact: master_name={{ hostvars[groups['primary-master'][0]]['ansible_fqdn'] }}
13 |     when:
14 |     - custom.networking.fqdn.always or custom.networking.fqdn.master
15 |     - groups['masters'] | length == 1
16 |     - '"." not in groups["primary-master"][0]'  # meaning it was not defined with fqdn, but we would like to force fqdn (per above custom.networking.fqdn condition)
17 | 
18 |   - name: force use fqdn for master name (MasterHA case)
19 |     set_fact: master_name={{ custom.networking.masterha_fqdn }}
20 |     when:
21 |     - custom.networking.fqdn.always or custom.networking.fqdn.master
22 |     - groups['masters'] | length > 1
23 | 
24 |   - name: force use ip for master name (MasterHA case)
25 |     set_fact: master_name={{ custom.networking.masterha_ip }}
26 |     when:
27 |     - not custom.networking.fqdn.always
28 |     - not custom.networking.fqdn.master
29 |     - groups['masters'] | length > 1
30 |     
31 |   tags:
32 |   - always
33 | 


--------------------------------------------------------------------------------
/group_vars/all/KubeProxyConfiguration.yml:
--------------------------------------------------------------------------------
 1 | #https://pkg.go.dev/k8s.io/kube-proxy/config/v1alpha1?tab=doc#KubeProxyConfiguration
 2 | #check latest api ver here: https://pkg.go.dev/k8s.io/kube-proxy/config/
 3 | KubeProxyConfiguration:
 4 |   apiVersion: kubeproxy.config.k8s.io/v1alpha1
 5 |   # bindAddress: 0.0.0.0
 6 |   # clientConnection:
 7 |   #   acceptContentTypes: ""
 8 |   #   burst: 10
 9 |   #   contentType: application/vnd.kubernetes.protobuf
10 |   #   kubeconfig: /var/lib/kube-proxy/kubeconfig.conf
11 |   #   qps: 5
12 |   # clusterCIDR: ""
13 |   clusterCIDR: "{{ POD_NETWORK_CIDR }}"
14 |   ##podSubnet -> Calico is now able to autodetect. If calico is used, this can be commented out.
15 |  # Not required: if here is empty it will read from the ClusterConfiguration.
16 |   # configSyncPeriod: 15m0s
17 |   # conntrack:
18 |   #   max: null
19 |   #   maxPerCore: 32768
20 |   #   min: 131072
21 |   #   tcpCloseWaitTimeout: 1h0m0s
22 |   #   tcpEstablishedTimeout: 24h0m0s
23 |   # enableProfiling: false
24 |   # healthzBindAddress: 0.0.0.0:10256
25 |   # hostnameOverride: ""
26 |   # iptables:
27 |   #   masqueradeAll: false
28 |   #   masqueradeBit: 14
29 |   #   minSyncPeriod: 0s
30 |   #   syncPeriod: 30s
31 |   ipvs:
32 |     strictARP: true
33 |   #   excludeCIDRs: null
34 |   #   minSyncPeriod: 0s
35 |   #   scheduler: ""
36 |   #   syncPeriod: 30s
37 |   kind: KubeProxyConfiguration
38 |   # metricsBindAddress: 127.0.0.1:10249
39 |   # mode: ""
40 |   mode: "ipvs"
41 |   ## Leave mode undefined or "" for the default, which usually is the old iptables method
42 |   # nodePortAddresses: null
43 |   # oomScoreAdj: -999
44 |   # portRange: ""
45 |   # resourceContainer: /kube-proxy
46 |   # udpIdleTimeout: 250ms
47 | 


--------------------------------------------------------------------------------
/pre_sanity.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - hosts: all
 3 |   become: yes
 4 |   become_method: sudo
 5 |   tags:
 6 |   - pre_sanity
 7 |   - master
 8 |   - node
 9 |   tasks:
10 | 
11 |   - name: check 127.0.1.1 /etc/hosts
12 |     # For a system with a permanent IP address, that permanent IP address should be used here instead of 127.0.1.1
13 |     # https://www.debian.org/doc/manuals/debian-reference/ch05.en.html#_the_hostname_resolution
14 |     # This is a good rule, while not mandatory. If you want, you can skip this task with --skip-tags check_etc_hosts
15 |     command: grep '^\s*127.0.1.1' /etc/hosts
16 |     register: command_result
17 |     failed_when: command_result.stdout != ""
18 |     tags:
19 |     - check_etc_hosts
20 |     changed_when: false
21 | 
22 |   - name: check docker is running
23 |     command: 'docker info'
24 |     tags:
25 |     - check_docker
26 |     when: docker_setup is defined and docker_setup=false
27 |     changed_when: false
28 | 
29 |   - name: check docker hello world
30 |     shell: docker run --rm -i hello-world | awk '/Hello/ {print $1}'
31 |     register: command_result
32 |     failed_when: command_result.stdout != "Hello"
33 |     tags:
34 |     - check_docker
35 |     when: docker_setup is defined and docker_setup=false
36 |     changed_when: false
37 | 
38 | # Ideally, to ensure there is no x509 certificate error like:
39 | #' docker pull gcr.io/google_containers/kube-apiserver-amd64:v1.7.2
40 | #Trying to pull repository gcr.io/google_containers/kube-apiserver-amd64 ...
41 | #Get https://gcr.io/v1/_ping: x509: certificate signed by unknown authority '
42 | # yum check-update ca-certificates; (($?==100)) && yum update ca-certificates || yum reinstall ca-certificates
43 | # update-ca-trust extract
44 | 
45 | # Check ports: https://kubernetes.io/docs/setup/independent/install-kubeadm/
46 | 


--------------------------------------------------------------------------------
/roles/common/tasks/selinux.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | ### SELINUX
 3 | 
 4 | - block:
 5 |   - name: Disable selinux
 6 |     selinux: 
 7 |       policy: "{{ selinux_policy | default ('targeted') }}"
 8 |       state:  "{{ selinux_state  | default ('permissive') }}"
 9 |     register: command_result
10 |     # changed_when: command_result.reboot_required # works in Ansible 2.4+. To support 2.3, we keep current solution which reboots when there is any time of change...
11 |     # changed_when: "'state change will take effect next reboot' in command_result.stdout" 
12 |   #  notify:
13 |   #  - Reboot
14 | 
15 |   - name: trigger Reboot when required due to selinux changes
16 |     command: /bin/true
17 |     notify:
18 |     - Reboot
19 |     #when: command_result.reboot_required
20 |     when: command_result.changed
21 | 
22 |   - name: flush_handlers (trigger reboot when required)
23 |     meta: flush_handlers
24 |     changed_when: command_result.changed
25 |     #changed_when: command_result.reboot_required 
26 | 
27 |   - name: Pause till machine is up again
28 |     pause:
29 |       seconds: 30
30 |     when:
31 |     - command_result.reboot_required | default( false ) 
32 |     - allow_restart | default ( false ) # Is ansible able to keep this state after reboot?
33 | 
34 |   - name: Wait for server come back from restart (if selinux required it)
35 |     local_action: wait_for
36 |     args:
37 |       host: "{{ inventory_hostname }}"
38 |       port: 22
39 |       state: started
40 |       delay: 15
41 |       timeout: 180
42 | 
43 |   #Starting Ansible 2.3 one can do:
44 |   #- name: Wait for system to become reachable # Ansible 2.3+
45 |   #  wait_for_connection:
46 |   #    timeout: 200
47 | 
48 |   - name: Gather facts for first time after restart
49 |     setup:
50 | 
51 |   tags:
52 |   - selinux
53 |   when:
54 |   - ansible_os_family == "RedHat"  # Is ansible able to keep this state after reboot?
55 |   - selinux_state is defined
56 |   


--------------------------------------------------------------------------------
/roles/tools/tasks/cluster_sanity.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - block:
 3 | ## Decide how to approach the master: inventory or force fqdn (for non MasterHA cases); via masterha_ip,MasterHA (for MasterHA cases)
 4 |   - name: decide_master_name
 5 |     include_role:
 6 |       name: common
 7 |       tasks_from: decide_master_name
 8 | 
 9 |   
10 |   - name: "Wait 300 seconds for master at {{ master_name }}:{{ InitConfiguration.localAPIEndpoint.bindPort | default (6443) }} to become open (MasterHA)"
11 |     wait_for:
12 |       port: "{{ InitConfiguration.localAPIEndpoint.bindPort | default (6443) }}"
13 |       host: "{{ master_name }}"
14 |       delay: 1
15 |       timeout: 300
16 | ### END CODE DUPLICATION
17 | 
18 |   - name: kubectl get nodes
19 |     shell: "kubectl get nodes --no-headers | grep -v -w 'Ready' || true "
20 |     register: command_result
21 |     tags:
22 |     - node_sanity
23 |     until: command_result.stdout == ""
24 |     retries: "{{ RETRIES | default(40) }}"
25 |     delay: 3
26 |     changed_when: false
27 | 
28 |   - name: kubectl get pods
29 |     shell: "kubectl get --namespace kube-system pods --no-headers | grep -v -w 'Running' || true "
30 |     register: command_result
31 |     tags:
32 |     - pod_sanity
33 |     until: command_result.stdout == ""
34 |     retries: "{{ RETRIES | default(40) }}"
35 |     delay: 3
36 |     changed_when: false
37 | 
38 | # >= and not == because we may use this role to only to add nodes also.
39 |   - name: Check all nodes were registered
40 |     shell: "/usr/bin/test $(kubectl get nodes | grep -ow Ready | wc -l) -ge {{ groups['all'] | length }}"
41 |     register: command_result
42 |     retries: 30
43 |     delay: 3
44 |     until: command_result is success
45 |     changed_when: false
46 |     tags:
47 |     - cluster_info
48 |     - cluster_status
49 |     - node_sanity
50 | 
51 |   environment:
52 |     KUBECONFIG: /etc/kubernetes/admin.conf
53 |   tags:
54 |   - sanity
55 |   - cluster_sanity
56 | 
57 | 


--------------------------------------------------------------------------------
/roles/common/tasks/kube_config.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: Make sure folder /etc/kubernetes/ exists
 3 |   file: path=/etc/kubernetes state=directory mode=0755
 4 | 
 5 | ### prepare cloud-config
 6 | - block:
 7 |   - name: prepare cloud-config file for cloudProvider {{ClusterConfiguration.cloudProvider}}
 8 |     set_fact: fact_cloud_provider='--cloud-provider={{ ClusterConfiguration.cloudProvider }} --cloud-config=/etc/kubernetes/cloud-config'
 9 | 
10 |   - name: prepare cloud-config file
11 |     template:
12 |       src: cloud-config.j2
13 |       dest: /etc/kubernetes/cloud-config
14 |       force: yes
15 |     tags:
16 |     - init
17 |     # notify:
18 |     # - Restart kubelet
19 | 
20 |   # BLOCK ENDS with its condition:
21 |   when: 
22 |   - ClusterConfiguration is defined
23 |   - ClusterConfiguration.cloudProvider is defined
24 |   - inventory_hostname in groups['masters']
25 |   tags:
26 |   - kubelet
27 | 
28 | ### kubeadm settings:
29 | - name: Replace {{ClusterConfiguration.networking.dnsDomain}} under cluster-domain in kubelet.service.d/10-kubeadm.conf
30 |   replace:
31 |     dest: /etc/systemd/system/kubelet.service.d/10-kubeadm.conf
32 |     regexp: '(--cluster-domain=)[A-Za-z0-9\-\.]+(.*)'
33 |     replace: '\1{{ ClusterConfiguration.networking.dnsDomain | default("cluster.local") }}\2'
34 |     #backup: yes
35 |   when: ClusterConfiguration is defined and ClusterConfiguration.networking is defined and ClusterConfiguration.networking.dnsDomain is defined
36 |   notify:
37 |   - Reload systemd # already by other handler
38 |   - Restart kubelet
39 |   tags:
40 |   - kubeadm
41 |   - kubelet
42 | 
43 | - name: Create kubeadm sysctl file
44 |   copy: src=90-kubeadm.conf dest=/etc/sysctl.d/90-kubeadm.conf
45 |   tags:
46 |   - kubeadm
47 |   - kubelet
48 | 
49 | - name: Set sysctl settings
50 |   command: sysctl --system
51 |   ignore_errors: true # ignore errors which appear in vm simulated with docker 
52 |   tags:
53 |   - kubeadm
54 |   - kubelet
55 | 
56 | 


--------------------------------------------------------------------------------
/roles/tools/tasks/reset_drain.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | #- hosts: master
 3 | #  gather_facts: False
 4 | #  become: yes
 5 | #  become_method: sudo
 6 |  
 7 | ## TODO:
 8 | # 1. We should make sure master is tainted so pods will not move there either.
 9 | # 2. k get no emtpy: Now works only if we removed all nodes (which might not be the case; should be limited to machines in groups.node )
10 | 
11 | - block:
12 |   #- set_fact:
13 |   #    env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/bin/"  }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}'
14 |   #  tags:
15 |   #  - always
16 | 
17 |   - name: get nodes
18 |     shell: "kubectl get nodes --no-headers -- | cut -f 1 -d ' '"
19 |     register: command_results
20 |     changed_when: false
21 | 
22 |   - name: drain nodes
23 |     command: kubectl drain {{ item }} --delete-local-data --force --ignore-daemonsets --grace-period=5 --timeout=60s
24 |     #with_items: "{{command_results.stdout_lines}}"
25 |     with_items: groups.nodes
26 |     ignore_errors: true
27 | 
28 |   - name: delete nodes
29 |     command: kubectl delete node {{ item }}
30 |     #with_items: "{{command_results.stdout_lines}}"
31 |     with_items: groups.nodes
32 |     ignore_errors: true
33 | 
34 |     #shell: "kubectl get nodes -o jsonpath='{.items[*].metadata.name}'"
35 |     #with_items: "{{ groups['nodes'] }}"
36 | 
37 |   - name: kubectl get nodes must be empty by now (if target was full cluster and not partial update)
38 |     shell: "kubectl get nodes --no-headers | grep -v 'node-role.kubernetes.io/control-plane' | grep -v -w 'Ready' || true"
39 |     register: command_result
40 |     until: command_result.stdout == ""
41 |     retries: 10
42 |     delay: 3
43 |     ignore_errors: true
44 |     changed_when: false
45 | 
46 |   environment:
47 |     KUBECONFIG: /etc/kubernetes/admin.conf
48 |   when: reset_gracefully is defined and reset_gracefully
49 |   tags:
50 |   - reset
51 |   - drain
52 | 
53 | 


--------------------------------------------------------------------------------
/other_tools/dockerize.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | set -e 
 3 |       # Ubuntu
 4 |       #sudo apt-get update
 5 |       #sudo apt-get install -y git docker #ansible
 6 | 
 7 |       # CentOS/RHEL
 8 |       #sudo yum install -y git docker ansible curl tar zip unzip
 9 |       #ssh-copy-id
10 |       sudo yum install -y docker iptables-services 
11 |       sudo sh -c 'echo EXTRA_STORAGE_OPTIONS=\"--storage-opt overlay2.override_kernel_check=true\">/etc/sysconfig/docker-storage-setup'
12 |       sudo sh -c 'echo STORAGE_DRIVER=\"overlay2\" >>/etc/sysconfig/docker-storage-setup'
13 |       sudo rm -f /etc/sysconfig/docker-storage || true
14 | 
15 | # Firewalld (and selinux) do not play well with k8s (and especially with kubeadm). 
16 | # NOTE: A machine reboot may be required if SELinux was enforced previously
17 | systemctl stop firewalld || true
18 | systemctl disable firewalld || true
19 | systemctl mask firewalld || true
20 | systemctl start iptables
21 | systemctl enable iptables
22 | systemctl unmask iptables
23 | 
24 |       sudo systemctl stop docker
25 |       sudo systemctl start docker-storage-setup
26 |       sudo systemctl restart docker
27 |       sudo systemctl enable docker
28 |       #sudo chown vagrant /var/run/docker.sock # optional
29 | 
30 | # SET Default Policies to ACCEPT
31 | iptables -P FORWARD ACCEPT
32 | iptables -P INPUT ACCEPT
33 | iptables -P OUTPUT ACCEPT
34 | 
35 | # Remove the Default REJECT rules, so it will hit the default Policy
36 | iptables -D INPUT -j REJECT --reject-with icmp-host-prohibited
37 | iptables -D FORWARD -j REJECT --reject-with icmp-host-prohibited
38 | 
39 | # If someone wants to enable only some ports (there will be many, and most of them dynamic), here is a start: 6443 (k8s api), 10250, etc. (maybe both tcp and udp...)
40 | #sudo iptables -I INPUT -p tcp --dport 6443 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT
41 | #sudo iptables -I OUTPUT -p tcp --sport 6443 -m conntrack --ctstate ESTABLISHED -j ACCEPT
42 | 
43 | # DEBUG LIVE WITH:
44 | # watch -n1 iptables -vnL
45 | 


--------------------------------------------------------------------------------
/roles/post_deploy/tasks/network.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # - hosts: master
 3 | #   become: yes
 4 | #   become_method: sudo
 5 | #   tasks:
 6 |   - set_fact:
 7 |       env_kc: '{{ proxy_env |default({}) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}'
 8 |     tags:
 9 |     - always
10 |   
11 |   # Not clear if mandatory for weave: 
12 |   # https://www.weave.works/docs/net/latest/kubernetes/kube-addon/
13 |   - name: Copy weave net script
14 |     environment: '{{ proxy_env | default ({}) }}'
15 |     get_url: url=https://raw.githubusercontent.com/weaveworks/weave/master/weave dest=/usr/local/bin/weave mode=u+rxw force=yes
16 |     ignore_errors: true  # Currently there is no way to check if the user is using weave
17 |     when: 
18 |     - podNetwork is defined
19 |     - podNetwork == "weave"
20 |     tags:
21 |     - weave
22 |     - network
23 |     
24 |   - name: Create cluster network (when is via manifest instead of helm chart)
25 |     command: /usr/bin/kubectl apply -f {{ item }}
26 |     with_items: "{{ k8s_network_addons_urls }}"
27 |     #with_items: "{{ k8s_network_addons_urls | default ([]) }}"
28 |     #with_items: '{{ k8s_network_addons_urls | default ("https://docs.projectcalico.org/manifests/calico.yaml") }}'
29 |     environment: '{{env_kc}}'
30 |     when:
31 |     - k8s_network_addons_urls is defined
32 | #    - k8s_network_addons_urls | length > 0
33 |     - ( k8s_network_addons_urls | default([]) ) | length > 0
34 |     tags:
35 |     - k8s_network_addons
36 |     - network
37 | 
38 |   - name: sanity - wait for alls pod to be running (besides kube-dns for now, as it might be forced to a node if master did not allow it due to tains)
39 |     environment:
40 |       KUBECONFIG: /etc/kubernetes/admin.conf
41 |     shell: "kubectl get --namespace kube-system pods --no-headers | grep -v -w 'Running' | grep -v 'kube-dns' | grep -v 'coredns' || true "
42 |     register: command_result
43 |     tags:
44 |     - k8s_network_addons
45 |     - sanity
46 |     - network
47 |     until: command_result.stdout == ""
48 |     retries: "{{ RETRIES | default(40) }}"
49 |     delay: 3
50 |     changed_when: false
51 | 


--------------------------------------------------------------------------------
/docs/portable_machine_setup.md:
--------------------------------------------------------------------------------
 1 | # What is included:
 2 | If the inventory has only [primary-master] section populated, it understands it's a one machine cluster (at least for now).
 3 | The playbook will do most of the settings accordingly.
 4 | 
 5 | # ingress with local binding
 6 | For ingress controller to listen to 127.*, you may want to use option 2 of the ingress controller defined in addons.yml
 7 | 
 8 | # Portable IP address:
 9 | Should you have this installation in a vm, and your IP address changes, you may want to make it "portable", so it does not depend on the ip address..
10 | 
11 | ```
12 | echo "make installation agnostic to ip address"
13 | CURRENT_IP=`hostname -I | cut -d" " -f1`
14 | sudo perl -p -i -e "s/${CURRENT_IP}/127.0.0.1/g" ` find /etc/kubernetes/ -type f \( -name \*.yaml -o -name \*.conf \) `
15 | ```
16 | 
17 | # add ingresses to hosts file
18 | In such cases, most probably you don't have a wildcard dns either, so create similar entries in the /etc/hosts file.
19 | (of course, these entries have to be in sync with the group_vars/all/network.yml (and, if you customized, eventually hosts/domains defined in addons.yml) .
20 | # dns entries for ingresses. 
21 | ```
22 | echo "127.0.1.2       dashboard.k8s.local.example.com prometheus.k8s.local.example.com grafana.k8s.corp.example.com" | sudo tee -a /etc/hosts >/dev/null
23 | ```
24 | 
25 | # compress image
26 | In case you want to ship such a portable vm image with k8s inside, you may want to make it as small as possible before shutdown.
27 | 
28 | ```
29 | sudo systemctl stop kubelet || true
30 | sudo systemctl disable kubelet || true
31 | docker rmi -f $(docker images -q)
32 | ```
33 | 
34 | # If you want to temporary turn off your kubernetes (keep only its configuration), do:
35 | ```
36 | sudo systemctl stop kubelet; sudo systemctl disable kubelet; docker ps | grep kube | cut -d" " -f1 | xargs docker stop ; docker ps | grep k8s | cut -d" " -f1 | xargs docker stop; docker ps
37 | ```
38 | 
39 | # To save space, you may want to also delete some or even all docker images which are not currently used:
40 | `docker rmi $(docker images -q)`
41 | 
42 | # other tipcs:
43 | you may want to do `sudo fstrim /`
44 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/vsphere.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | ## vsphere/vmware/vcenter
 3 | - block:
 4 |   - set_fact:
 5 |       env_kc: '{{ proxy_env |default({}) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}'
 6 |     tags:
 7 |     - always
 8 | 
 9 |   - name: Create vsphere storage class
10 |     command: /usr/bin/kubectl apply -f {{ item }}
11 |     with_items: "{{ vsphere_storageclass_urls | default ([]) }}"
12 |     environment: '{{env_kc}}'
13 |     when:
14 |     - vsphere_storageclass_urls is defined
15 |     tags:
16 |     - vsphere_storageclass_urls
17 |     - vsphere
18 | 
19 |   - block:
20 |     - name: vsphere_bug_fix github.com/vmware/kubernetes/issues/495
21 |       copy: src=vsphere_bug_fix.sh dest=/tmp/vsphere_bug_fix.sh mode='0755'
22 | 
23 |     - name: execute vsphere_bug_fix.sh
24 |       environment: '{{env_kc}}'
25 |       shell: /tmp/vsphere_bug_fix.sh
26 |       register: list
27 | 
28 |     - name: build machine reboot list due to vsphere_bug
29 |       add_host: name={{item}} group=mustrebootlist
30 |       with_items:
31 |         '{{list.stdout_lines}}'
32 | 
33 |   # NOW it's done via .sh, but in future maybe do:
34 |   # - name: "vpshere bug (No VM found) => so we need to delete master(s) (and restart kubelet)"
35 |   #   environment: '{{env_kc}}'
36 |   #   command: kubectl delete node {{ inventory_hostname_short }}
37 |   #   ignore_errors: true
38 |   #   notify: # kubelet restart is needed for 1.14+, for cloud=vsphere, otherwise we get: "Unable to find VM by UUID. VM UUID:" or Error "No VM found" node info for node
39 |   #   - Restart kubelet
40 | 
41 |   # # - meta: flush_handlers is not enough, as sometimes delete was not with success, so forcing trigger like this:
42 |   # - name: "vpshere bug (No VM found) =>trigger kubelet restart (after master node deleted)"
43 |   #   debug: msg="vpshere bug =>trigger kubelet restart (after master node deleted)"
44 |   #   notify: 
45 |   #   - Restart kubelet
46 |   #   changed_when: true
47 |     when:
48 |     - vsphere_bug_fix is defined
49 |     - vsphere_bug_fix
50 |     tags:
51 |     - vsphere_bug_fix
52 | 
53 |   when:
54 |   - ClusterConfiguration.cloudProvider is defined 
55 |   - ClusterConfiguration.cloudProvider == 'vsphere'
56 |   tags:
57 |   - vsphere
58 | 
59 | 


--------------------------------------------------------------------------------
/group_vars/all/ClusterConfiguration.yml:
--------------------------------------------------------------------------------
 1 | #https://pkg.go.dev/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3?tab=doc#ClusterConfiguration
 2 | #check latest api ver here: https://pkg.go.dev/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm?tab=subdirectories
 3 | ClusterConfiguration:
 4 |   apiVersion: kubeadm.k8s.io/v1beta4
 5 |   apiServer:
 6 |     #extraEnvs: []
 7 |     extraArgs: # https://kubernetes.io/docs/admin/kube-apiserver/
 8 |       - name: endpoint-reconciler-type
 9 |         value: "lease"  # needs k8s 1.9+ More info: https://kubernetes.io/docs/admin/high-availability/building/#endpoint-reconciler
10 |       - name: service-node-port-range
11 |         value: '79-32767' #Default 32000-32767 ; Ensure the local ports on all nodes are set accordingly
12 |   #   auditPolicy:
13 |   #     logDir: /var/log/kubernetes/audit
14 |   #     logMaxAge: 2
15 |   #     path: ""
16 |   # certificatesDir: /etc/kubernetes/pki
17 |   # clusterName: kubernetes
18 |   clusterName: "{{ CLUSTER_NAME }}"
19 |   # controlPlaneEndpoint: ""
20 |   # etcd:
21 |   #   local:
22 |   #     serverCertSANs:
23 |   #     - "10.33.46.215"
24 |   #     extraArgs:
25 |   #       cipher-suites: TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256
26 |   #     dataDir: /var/lib/etcd
27 |   #     image: ""
28 |   imageRepository: '{{ images_repo | default ("registry.k8s.io") }}'
29 |   kind: ClusterConfiguration
30 |   kubernetesVersion: "v{{ KUBERNETES_VERSION }}"
31 |   # dns:
32 |   networking:
33 |   #   dnsDomain: cluster.local
34 |     serviceSubnet: "{{ SERVICE_NETWORK_CIDR }}"
35 |   #   podSubnet: ""
36 |     podSubnet: "{{ POD_NETWORK_CIDR }}"
37 |     ##podSubnet -> Calico is now able to autodetect. If calico is used, this can be commented out.
38 |   #cloudProvider: 'vsphere' # WE NEED THIS Even after 1.11 (v1alpha2) (due to a bug in ansible on vars with "-"); this is also required: govc vm.change -e="disk.enableUUID=1" -vm=<machines> and requires setup of cloud_config below
39 |   # controllerManager:
40 |   #   controllerManagerExtraArgs: # https://kubernetes.io/docs/admin/kube-controller-manager/
41 |   #     pod-eviction-timeout: '3m00s' # Default 5m0s #PodEvictionTimeout controls grace peroid for deleting pods on failed nodes.  Takes time duration string (e.g. '300ms' or '2m30s').  Valid time units are 'ns', 'us', 'ms', 's', 'm', 'h'.
42 |   #     <argument>: <value|string>
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/rook_reset.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # - hosts: master
 3 | #   become: yes
 4 | #   become_method: sudo
 5 | #   tags:
 6 | #   - rook
 7 | #   - reset
 8 | #   tasks:
 9 | 
10 | - block:
11 |   - set_fact:
12 |       env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin"  }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}'
13 | 
14 |   - name: deploy rook operator (using helm chart) - prefered method
15 |     environment: '{{env_kc}}'
16 |     command: 'helm delete --purge rook-operator '
17 |     when: rook.operator_url is not defined
18 |     ignore_errors: true
19 | 
20 |   - name: clean install rook operator
21 |     environment: '{{env_kc}}'
22 |     command: /usr/bin/kubectl delete {{ item }}
23 |     with_items:
24 |     - "-n rook po/rook-client"
25 |     - "-n rook po/rook-tools"
26 |     - "-n rook cm/rook-config-override"
27 |     - "-n rook deploy/rook-api"
28 |     - "-n rook ds/osd"
29 |     - "-n rook cluster rook"
30 |     - "-n rook serviceaccount rook-api"
31 |     - "clusterrole rook-api"
32 |     - "clusterrolebinding rook-api"
33 |     - "thirdpartyresources cluster.rook.io pool.rook.io"
34 |     - "secret rook-rook-user"
35 |     - "namespace rook"
36 |     - "sc rook-block"
37 |     tags:
38 |     - reset
39 |     ignore_errors: true
40 | 
41 |   - name: clean install rook operator
42 |     environment: '{{env_kc}}'
43 |     command: /usr/bin/kubectl delete {{ item }}
44 |     when: rook.operator_url is defined
45 |     with_items:
46 |     - "-f {{ rook.operator_url }}"
47 | 
48 |   - name: clean install rook secrets from all rook.allowed_consumer_namespaces
49 |     # TODO: scan all namespaces and remove it. Do not rely on the previously defined rook.allowed_consumer_namespaces
50 |     environment: '{{env_kc}}'
51 |     command: /usr/bin/kubectl delete -n {{ item }} secret rook-admin
52 |     when: rook.allowed_consumer_namespaces is defined
53 |     with_items: "{{ rook.allowed_consumer_namespaces }}"
54 |     ignore_errors: true
55 | 
56 |   - name: rook post cleanup/reset sanity
57 |     environment:
58 |       KUBECONFIG: /etc/kubernetes/admin.conf
59 |     shell: "kubectl get --all-namespaces pods --no-headers | grep -v -w 'Running' || true "
60 |     register: command_result
61 |     tags:
62 |     - sanity
63 |     until: command_result.stdout == ""
64 |     retries: 30
65 |     delay: 3
66 |     changed_when: false
67 | 
68 |   - name: clean rook ceph persistant storage directories
69 |     file: path=/storage/rook/* state=absent force=yes
70 |     tags:
71 |     - uninstall
72 |     when: rook.reset.storage_delete is defined and rook.reset.storage_delete
73 | 
74 |   when: rook is defined and rook.enabled
75 |   tags:
76 |   - rook
77 |   - reset
78 | 


--------------------------------------------------------------------------------
/roles/helm/tasks/helm_reset.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | #- hosts: master
 3 | #  become: yes
 4 | #  become_method: sudo
 5 | #  tags:
 6 | #  - helm_reset
 7 | #  - helm
 8 | #  - reset
 9 | #  tasks:
10 | 
11 | - set_fact:
12 |     env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/bin"  }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}'
13 |   tags:
14 |   - helm_reset
15 |   - reset
16 |   - helm_purge
17 |   - helm2
18 | 
19 | - block:
20 |   - name: helm reset - helm list all apps
21 |     environment: '{{env_kc}}'
22 |     shell: helm list | tail -n +2 | awk '{print $1}'
23 |     register: command_results
24 |     ignore_errors: true
25 |     changed_when: false
26 |     tags:
27 |     - helm_purge
28 | 
29 |   - name: helm reset - helm delete purge all apps
30 |     environment: '{{env_kc}}'
31 |     command: helm delete --purge {{ item }}
32 |     with_items: "{{ command_results.stdout_lines }}"
33 |     ignore_errors: true
34 |     tags:
35 |     - helm_purge
36 | 
37 |   - name: helm reset - wait till all helm apps are deleted/purged
38 |     environment: '{{env_kc}}'
39 |     shell: helm list | tail -n +2
40 |     register: command_result
41 |     until: command_result.stdout == ""
42 |     retries: 20
43 |     delay: 3
44 |     changed_when: false
45 |     ignore_errors: true
46 |     tags:
47 |     - helm_purge
48 | 
49 |   - name: helm reset - remove/cleanup
50 |     environment: '{{env_kc}}'
51 |     shell: ( helm reset --force --remove-helm-home --tiller-connection-timeout 30 || true ) && sleep 2 && ( kubectl --namespace=kube-system delete --cascade=true --now=true --wait=true --ignore-not-found=true deployment tiller-deploy || true ) && ( kubectl delete service --namespace=kube-system tiller-deploy  || true ) && (  kubectl delete clusterrolebinding tiller  || true ) && (  kubectl delete --namespace=kube-system sa tiller )
52 |     ignore_errors: true 
53 | 
54 |   - name: helm reset - list all k8s resources with tiller-deploy in their name
55 |     environment: '{{env_kc}}'
56 |     shell: kubectl --namespace=kube-system get all --no-headers | grep tiller-deploy | awk '{print $1}'
57 |     register: command_results
58 |     ignore_errors: true
59 |     changed_when: false
60 | 
61 |   - name: helm reset - delete all k8s resources with tiller-deploy in their name
62 |     environment: '{{env_kc}}'
63 |     command: kubectl --namespace=kube-system delete --cascade=true --now=true --wait=true --ignore-not-found=true {{ item }}
64 |     with_items: "{{ command_results.stdout_lines }}"
65 |     ignore_errors: true
66 |     tags:
67 |     - helm_purge
68 |   when:
69 |   - full_helm_reinstall is defined and full_helm_reinstall
70 |   tags:
71 |   - helm_reset
72 |   - reset
73 |   - helm2
74 | 
75 | 


--------------------------------------------------------------------------------
/group_vars/all/KubeletConfiguration.yml:
--------------------------------------------------------------------------------
 1 | #https://pkg.go.dev/k8s.io/kubelet/config/v1beta1?tab=doc#KubeletConfiguration
 2 | #https://pkg.go.dev/k8s.io/kubernetes/pkg/kubelet/apis/config/v1beta1?tab=doc#KubeletConfiguration
 3 | #check latest api ver here: https://pkg.go.dev/k8s.io/kubernetes/pkg/kubelet/apis/config?tab=subdirectories 
 4 | #                           https://pkg.go.dev/k8s.io/kubelet/config #?tab=subdirectories
 5 | KubeletConfiguration:
 6 |   #nodeRegistration:
 7 |   #  kubeletExtraArgs:
 8 |   #    --cloud-provider={{ kubeadm_master_config.cloudProvider }} --cloud-config=/etc/kubernetes/cloud-config
 9 |   # address: 0.0.0.0
10 |   apiVersion: kubelet.config.k8s.io/v1beta1
11 |   # authentication:
12 |   #   anonymous:
13 |   #     enabled: false
14 |   #   webhook:
15 |   #     cacheTTL: 2m0s
16 |   #     enabled: true
17 |   #   x509:
18 |   #     clientCAFile: /etc/kubernetes/pki/ca.crt
19 |   # authorization:
20 |   #   mode: Webhook
21 |   #   webhook:
22 |   #     cacheAuthorizedTTL: 5m0s
23 |   #     cacheUnauthorizedTTL: 30s
24 |   cgroupDriver: systemd #cgroupfs  # systemd should be the new default with 1.24+
25 |   # cgroupsPerQOS: true
26 |   # clusterDNS:
27 |   # - 10.96.0.10
28 |   # clusterDomain: cluster.local
29 |   # configMapAndSecretChangeDetectionStrategy: Watch
30 |   # containerLogMaxFiles: 5
31 |   # containerLogMaxSize: 10Mi
32 |   # contentType: application/vnd.kubernetes.protobuf
33 |   # cpuCFSQuota: true
34 |   # cpuCFSQuotaPeriod: 100ms
35 |   # cpuManagerPolicy: none
36 |   # cpuManagerReconcilePeriod: 10s
37 |   # enableControllerAttachDetach: true
38 |   # enableDebuggingHandlers: true
39 |   # enforceNodeAllocatable:
40 |   # - pods
41 |   # eventBurst: 10
42 |   # eventRecordQPS: 5
43 |   # evictionHard:
44 |   #   imagefs.available: 15%
45 |   #   memory.available: 100Mi
46 |   #   nodefs.available: 10%
47 |   #   nodefs.inodesFree: 5%
48 |   # evictionPressureTransitionPeriod: 5m0s
49 |   # failSwapOn: true
50 |   # fileCheckFrequency: 20s
51 |   # hairpinMode: promiscuous-bridge
52 |   # healthzBindAddress: 127.0.0.1
53 |   # healthzPort: 10248
54 |   # httpCheckFrequency: 20s
55 |   imageGCHighThresholdPercent: 70
56 |   imageGCLowThresholdPercent: 60
57 |   # imageMinimumGCAge: 2m0s
58 |   # iptablesDropBit: 15
59 |   # iptablesMasqueradeBit: 14
60 |   kind: KubeletConfiguration
61 |   # kubeAPIBurst: 10
62 |   # kubeAPIQPS: 5
63 |   # makeIPTablesUtilChains: true
64 |   # maxOpenFiles: 1000000
65 |   # maxPods: 110
66 |   # nodeLeaseDurationSeconds: 40
67 |   # nodeStatusUpdateFrequency: 10s
68 |   # oomScoreAdj: -999
69 |   # podPidsLimit: -1
70 |   # port: 10250
71 |   # registryBurst: 10
72 |   # registryPullQPS: 5
73 |   # resolvConf: /etc/resolv.conf
74 |   # rotateCertificates: true
75 |   # runtimeRequestTimeout: 2m0s
76 |   # serializeImagePulls: true
77 |   # staticPodPath: /etc/kubernetes/manifests
78 |   # streamingConnectionIdleTimeout: 4h0m0s
79 |   # syncFrequency: 1m0s
80 |   # volumeStatsAggPeriod: 1m0s
81 | 


--------------------------------------------------------------------------------
/roles/tools/tasks/labels.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | #- hosts: nodes
 3 | #  tasks:
 4 | ## The below should be 4 times, (to cover nodes fqdn, nodes nofqdn, master fqdn, master nofqdn) based on:
 5 | #    - not custom.networking.fqdn.always
 6 | #    - not custom.networking.fqdn.node
 7 | #    - inventory_hostname not in groups['masters']
 8 | #  As it's overcomplicating and sometimes k8s still has nodes with short (even if fqdn is true), we default to:
 9 | #  Due to ansible in probelmatic netw setups (https://github.com/ReSearchITEng/kubeadm-playbook/issues/81, https://github.com/ansible/ansible/issues/38777 )
10 | #  we have to add the 3rd option as well...
11 | 
12 | - block:
13 |   - name: labeling using inventory_hostname_short - {{ inventory_hostname_short }} - (this or below 2 will end with error)
14 |     delegate_to: "{{groups['primary-master'][0]}}"
15 |     environment:
16 |       KUBECONFIG: /etc/kubernetes/admin.conf
17 |     shell: kubectl label nodes {{ inventory_hostname_short }} {{label | default ("node-role.kubernetes.io/compute=") }} --overwrite
18 |     register: command_result
19 |     changed_when: '"not labeled" not in command_result.stdout'
20 |     ignore_errors: true
21 | 
22 |   - name: labeling using ansible_fqdn - {{ inventory_hostname }} - (this or above or below will end with error)
23 |     delegate_to: "{{groups['primary-master'][0]}}"
24 |     environment:
25 |       KUBECONFIG: /etc/kubernetes/admin.conf
26 |     shell: kubectl label nodes {{ inventory_hostname }} {{label | default ("node-role.kubernetes.io/compute=") }} --overwrite
27 |     register: command_result
28 |     changed_when: '"not labeled" not in command_result.stdout'
29 |     ignore_errors: true
30 | 
31 |   - name: labeling using ansible_fqdn - {{ ansible_fqdn }} - (this or one of the above 2 will end with error)
32 |     delegate_to: "{{groups['primary-master'][0]}}"
33 |     environment:
34 |       KUBECONFIG: /etc/kubernetes/admin.conf
35 |     shell: kubectl label nodes {{ ansible_fqdn }} {{label | default ("node-role.kubernetes.io/compute=") }} --overwrite
36 |     register: command_result
37 |     changed_when: '"not labeled" not in command_result.stdout'
38 |     ignore_errors: true
39 |   when:
40 |   - inventory_hostname not in groups['masters']
41 |   tags:
42 |   - all
43 | 
44 | 
45 | - block:
46 | #    - name: get name of primary_master when single node cluster
47 | #      environment:
48 | #        KUBECONFIG: /etc/kubernetes/admin.conf
49 | #      shell: 'kubectl get no -o=jsonpath="{.items[0].metadata.name}"'
50 | #      register: result_primary_master_name
51 | 
52 |     - name: when cluster is one machine only, labeling it also as infra node
53 |       environment:
54 |         KUBECONFIG: /etc/kubernetes/admin.conf
55 |       shell: 'kubectl label nodes -l="node-role.kubernetes.io/control-plane=" "node-role.kubernetes.io/infra=" --overwrite'
56 |       register: command_result
57 |       changed_when: '"not labeled" not in command_result.stdout'
58 |       ignore_errors: true
59 |   when:
60 |   - groups['all'] | length == 1
61 |   tags:
62 |   - all
63 |     
64 | 


--------------------------------------------------------------------------------
/batch_deploy_serial_non_parallel.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | #- hosts: master
 3 | #  become: yes
 4 | #  become_method: sudo
 5 | #  tags:
 6 | #  - helm
 7 | #  tasks:
 8 | 
 9 | - set_fact:
10 |     env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/bin:/home/linuxbrew/.linuxbrew/bin"  }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}'
11 |   tags:
12 |   - always
13 | 
14 | - name: helm related crds,webhooks,rbacs,operators via manifests
15 |   block:
16 |   - name: applying pre_helm_manifests
17 |     command: kubectl apply -f {{ item }}
18 |     with_items: '{{ pre_helm_manifests }}'
19 |     environment: '{{env_kc}}'
20 |     when:
21 |     - pre_helm_manifests is defined
22 |     - pre_helm_manifests | length > 0
23 |     register: command_result
24 |     changed_when: '" changed" in command_result.stdout or " created" in command_result.stdout'
25 |     # there might be mutiplce artifacts in the same yaml, so we cannot simply rely on "unchanged"
26 |   tags:
27 |   - pre_helm_manifests
28 |   - charts_deploy
29 | 
30 | - block:
31 |   - name: Group helm charts by batch
32 |     set_fact:
33 |       charts_by_batch: "{{ charts_by_batch | default({}) | combine({ (item.batch | default(50)) | string: (charts_by_batch[item.batch | default(50) | string] | default([])) + [item] }) }}"
34 |     with_items: "{{ helm.packages_list }}"
35 |     when:
36 |     - helm is defined
37 |     - helm.packages_list is defined
38 | 
39 |   - name: Get sorted batch numbers
40 |     set_fact:
41 |       sorted_batches: "{{ charts_by_batch.keys() | map('int') | sort | map('string') }}"
42 |     when:
43 |     - charts_by_batch is defined
44 | 
45 |   - name: Deploy helm charts batch by batch
46 |     include_tasks: deploy_batch.yml
47 |     vars:
48 |       batch_number: "{{ batch_item }}"
49 |       charts_in_batch: "{{ charts_by_batch[batch_item] }}"
50 |     loop: "{{ sorted_batches }}"
51 |     loop_control:
52 |       loop_var: batch_item
53 |     when:
54 |     - sorted_batches is defined
55 |     - charts_by_batch is defined
56 |   tags:
57 |   - helm
58 |   - charts_deploy
59 | 
60 | - name: "helm full sanity - wait for all installed charts to become running after all batches deployed"
61 |   block:
62 |   - name: Wait 3 seconds before helm sanity check
63 |     pause: 
64 |       seconds: 3
65 |     changed_when: false
66 | 
67 |   - name: helm full sanity - wait for all pods to be running (excluding some that may not be ready immediately)
68 |     shell: "kubectl get pods --all-namespaces --no-headers | grep -v -w 'Running' | grep -v 'kube-dns' | grep -v 'coredns' | grep -v 'tiller-deploy' || true"
69 |     environment: '{{ env_kc }}'
70 |     register: command_result
71 |     until: command_result.stdout == ""
72 |     retries: "{{ RETRIES | default(40) }}"
73 |     delay: 3
74 |     changed_when: false
75 |     tags:
76 |     - sanity
77 |     - helm_sanity
78 | 
79 |   - name: Display cluster status after all helm charts deployed
80 |     shell: "kubectl get pods --all-namespaces"
81 |     environment: '{{ env_kc }}'
82 |     register: final_status
83 |     changed_when: false
84 | 
85 |   - debug:
86 |       msg: "All helm charts deployed successfully. Final cluster status:"
87 |   - debug:
88 |       var: final_status.stdout_lines
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/roles/helm/tasks/charts_deploy.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | #- hosts: master
 3 | #  become: yes
 4 | #  become_method: sudo
 5 | #  tags:
 6 | #  - helm
 7 | #  tasks:
 8 | 
 9 | - set_fact:
10 |     env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/bin:/home/linuxbrew/.linuxbrew/bin"  }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}'
11 |   tags:
12 |   - always
13 | 
14 | - name: helm related crds,webhooks,rbacs,operators via manifests
15 |   block:
16 |   - name: applying pre_helm_manifests
17 |     command: kubectl apply -f {{ item }}
18 |     with_items: '{{ pre_helm_manifests }}'
19 |     environment: '{{env_kc}}'
20 |     when:
21 |     - pre_helm_manifests is defined
22 |     - pre_helm_manifests | length > 0
23 |     register: command_result
24 |     changed_when: '" changed" in command_result.stdout or " created" in command_result.stdout'
25 |     # there might be mutiplce artifacts in the same yaml, so we cannot simply rely on "unchanged"
26 |   tags:
27 |   - pre_helm_manifests
28 |   - charts_deploy
29 | 
30 | - block:
31 |   - name: Group helm charts by batch
32 |     set_fact:
33 |       charts_by_batch: "{{ charts_by_batch | default({}) | combine({ (item.batch | default(50)) | string: (charts_by_batch[item.batch | default(50) | string] | default([])) + [item] }) }}"
34 |     with_items: "{{ helm.packages_list }}"
35 |     when:
36 |     - helm is defined
37 |     - helm.packages_list is defined
38 | 
39 |   - name: Get sorted batch numbers
40 |     set_fact:
41 |       sorted_batches: "{{ charts_by_batch.keys() | map('int') | sort | map('string') }}"
42 |     when:
43 |     - charts_by_batch is defined
44 | 
45 |   - name: Deploy helm charts batch by batch
46 |     include_tasks: deploy_batch.yml
47 |     vars:
48 |       batch_number: "{{ batch_item }}"
49 |       charts_in_batch: "{{ charts_by_batch[batch_item] }}"
50 |     loop: "{{ sorted_batches }}"
51 |     loop_control:
52 |       loop_var: batch_item
53 |     when:
54 |     - sorted_batches is defined
55 |     - charts_by_batch is defined
56 |   tags:
57 |   - helm
58 |   - charts_deploy
59 | 
60 | - name: "helm full sanity - wait for all installed charts to become running after all batches deployed"
61 |   tags:
62 |   - helm
63 |   - charts_deploy
64 |   block:
65 |   - name: Wait 3 seconds before helm sanity check
66 |     pause:
67 |       seconds: 3
68 |     changed_when: false
69 | 
70 |   - name: helm full sanity - wait for all installed charts to become running
71 |     environment:
72 |       KUBECONFIG: /etc/kubernetes/admin.conf
73 |     shell: "kubectl get --all-namespaces pods --no-headers | grep -v -w 'Running' || true "
74 |     register: command_result
75 |     tags:
76 |     - sanity_helm
77 |     - sanity
78 |     - charts_deploy_sanity
79 |     until: command_result.stdout == ""
80 |     retries: 60
81 |     delay: 3
82 |     changed_when: false
83 |     when: wait_charts_deploy_sanity | default('false') | bool # it's not mandatory to wait/make sure absolutelly all deployments are fine.
84 | 
85 |   - name: Display cluster status after all helm charts deployed
86 |     shell: "kubectl get pods --all-namespaces"
87 |     environment: '{{ env_kc }}'
88 |     register: final_status
89 |     changed_when: false
90 | 
91 |   - debug:
92 |       msg: "All helm charts deployed successfully. Final cluster status:"
93 |   - debug:
94 |       var: final_status.stdout_lines
95 | 
96 | 


--------------------------------------------------------------------------------
/roles/common/tasks/kernel_modules.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 |   ## ip_vs is optional for k8s (but ideal), and it's mandatory for keepalived especially when in docker
 3 | - name: kernel_modules - load ip_vs group
 4 |   modprobe: name={{ item }} state=present
 5 |   with_items:
 6 |   - ip_vs_wrr
 7 |   - ip_vs_rr
 8 |   - ip_vs_sh
 9 |   - ip_vs
10 | #  - nf_conntrack_ipv4 # removed as it fails in newer kernels
11 |   when:
12 |   - KubeProxyConfiguration.mode is defined
13 |   - KubeProxyConfiguration.mode == "ipvs"
14 | 
15 | - name: kernel_modules - persist ip_vs kernel modules
16 |   copy:
17 |     dest: /etc/modules-load.d/k8s_ip_vs.conf
18 |     content: |
19 |       ip_vs_wrr
20 |       ip_vs_rr
21 |       ip_vs_sh
22 |       ip_vs
23 | #      nf_conntrack_ipv4 # removed as it fails in newer kernels
24 |   when:
25 |   - KubeProxyConfiguration.mode is defined
26 |   - KubeProxyConfiguration.mode == "ipvs"
27 | 
28 | - name: load bridge kernel module ( for /proc/sys/net/bridge/bridge-nf-call-iptables )
29 |   modprobe: name={{item}} state=present
30 |   with_items:
31 |   - bridge
32 |   ignore_errors: true
33 | 
34 | - name: every reboot load bridge kernel modules
35 |   copy:
36 |     dest: "/etc/modules-load.d/k8s_bridge.conf"
37 |     content: |
38 |       bridge
39 | 
40 | - name: load br_netfilter kernel module (for /proc/sys/net/bridge/bridge-nf-call-iptables in newer kernels)
41 |   modprobe: name={{item}} state=present
42 |   with_items:
43 |   - br_netfilter
44 |   register: br_netfilter_load_result
45 |   ignore_errors: true # because in some old kernels br_netfilter does not exist and bridge is enough
46 | 
47 | - name: every reboot load bridge and br_netfilter kernel modules (for k8s)
48 |   # Note: br_netfilter is available only in the newer kernel versions
49 |   copy:
50 |     dest: "/etc/modules-load.d/k8s_br_netfilter.conf"
51 |     content: |
52 |       br_netfilter
53 |   when:
54 |   - br_netfilter_load_result is not failed
55 | 
56 | - name: setup kernel parameters for k8s - reboot might be required, but we will not trigger
57 |   #here RH asks for reboot: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/load_balancer_administration/s1-initial-setup-forwarding-vsa
58 |   sysctl: name={{item.name}} value={{item.value}} state=present
59 |   with_items:
60 |     - {name:  "net.bridge.bridge-nf-call-iptables", value: "1" }
61 |     - {name:  "net.bridge.bridge-nf-call-ip6tables", value: "1" }
62 |     - {name:  "net.ipv4.ip_forward", value: "1" }
63 |     - {name:  "net.ipv4.ip_nonlocal_bind", value: "1" }
64 | 
65 | - name: setup kernel parameters for network net.netfilter.nf_conntrack_maxi (optional)
66 |   # https://docs.projectcalico.org/maintenance/troubleshoot/troubleshooting
67 |   sysctl: name={{item.name}} value={{item.value}} state=present
68 |   with_items:
69 |     - {name:  "net.netfilter.nf_conntrack_max", value: "1000000" }
70 |   when:
71 |   - ( KubeProxyConfiguration.mode is not defined ) or ( KubeProxyConfiguration.mode == "iptables" )
72 | 
73 | - name: setup kernel parameters for eventual elasticsearch - reboot might be required, but we will not trigger
74 |   #here RH asks for reboot: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/load_balancer_administration/s1-initial-setup-forwarding-vsa
75 |   sysctl: name={{item.name}} value={{item.value}} state=present
76 |   with_items:
77 |     - {name:  "vm.max_map_count", value: "262144" }
78 | 
79 | 


--------------------------------------------------------------------------------
/docs/PRODUCTION_TIPS.md:
--------------------------------------------------------------------------------
 1 | # When to use this chart
 2 | Run this helm chart only for:
 3 | - generating the cluster first time:
 4 | - Adding a new node, using these steps:
 5 |   1. create a new hosts file and populate values only for **primary-master** (which won't be touched) and the sections where new nodes to be joining the cluster
 6 |   (either compute under **[nodes]** or master(control plane) under **[secondary-masters]** ; All non-relevant groups shoud be empty)
 7 |   2. run the `ansible-playbook -i hosts site.yml --tags node` (note the **--tags node** )
 8 | 
 9 | # Use conventions
10 | Besides "master" role, it's suggested to use also "infra" role (by specifying `label=node-role.kubernetes.io/infra=` in the hosts file).
11 | Machines marked as infra usually hold Prometheus, nginx ingress controllers, grafana, EFK, etc...  
12 | Usually there should be min. 3 master nodes and min 3 infra nodes ( compute (aka worker) nodes -> as many as required by the actual workload of the cluster).
13 | 
14 | # Secure Dashboard
15 | - from addons.yaml, remove "--set enableInsecureLogin=True --set enableSkipLogin=True"
16 | - also you may want to review the dashboard service account perms you desire  
17 | 
18 | # Heads-up
19 | When you have master-ha, the cluster can function properly when there are up at least 1/2 + 1 masters (so the quorum will function). If you have 3 masters, you must have at least 2 masters up for the cluster function.
20 | FYI: the good part is that the workload of a k8s cluster will continue to serve everything even without any master running, BUT, if any pod crashes, or there are any activities that need masters up, those won't be done till masters are up again.
21 | 
22 | # Certificates:
23 | - certs will expire 1 year after installation. The good part is that every kubeadm upgrade, the certs are getting regenerated. 
24 | So, if you upgrade the cluster at least once a year (which you should to keep up with security fixes at least), then you don't need to be concerned.
25 | 
26 | # Check security settings:
27 | - https://www.stackrox.com/post/2019/09/12-kubernetes-configuration-best-practices/ (PRs based on this are welcome)
28 | - https://kubernetes.io/docs/tasks/administer-cluster/securing-a-cluster/
29 | - secure using: https://github.com/nirmata/kyverno/blob/master/samples/README.md
30 | - test using: https://github.com/aquasecurity/kube-bench
31 | 
32 | # Security improvements already done:
33 | - inhibited nginx version info in headers: server-token=False, hide-headers=Server ; More params on: https://github.com/kubernetes/ingress-nginx/blob/master/docs/user-guide/nginx-configuration/configmap.md
34 | 
35 | # Known pending improvements of k8s/kubeadm:
36 | - metrics-server cannot validate kubelet certs; Proper fix for "--kubelet-insecure-tls" will be in k8s 1.19 https://github.com/kubernetes/kubeadm/issues/1602 ; Alternatives is manually generating and approving certs for each node: serverTLSBootstrap: https://github.com/kubernetes-sigs/metrics-server/issues/146#issuecomment-472655656
37 | 
38 | 
39 | # Other usefull charts:
40 | - https://github.com/planetlabs/draino/tree/master/helm/draino -> when node is not heathy, it's automatically cordoned and containers drained (Kubernetes Node Problem Detector and Cluster Autoscaler).
41 | - Use Public IP Address from a cloud vendor, simulating a LoadBalancer: https://github.com/inlets/inlets-operator
42 | 
43 | # Debian - package hold
44 | make sure k8s tools are not upgraded by mistake (do it post ansible)
45 | ```
46 | sudo apt-mark hold kubectl kubelet kubeadm kubernetes-cni cri-tools
47 | ```
48 | allow k8s tools to be upgraded (do it when upgrade is desired)
49 | ```
50 | sudo apt-mark unhold kubectl kubelet kubeadm kubernetes-cni cri-tools
51 | ```
52 | 
53 | 


--------------------------------------------------------------------------------
/roles/helm/tasks/helm.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | #- hosts: master
  3 | #  become: yes
  4 | #  become_method: sudo
  5 | #  tags:
  6 | #  - helm
  7 | #  tasks:
  8 | 
  9 | - set_fact:
 10 |     env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/bin:/home/linuxbrew/.linuxbrew/bin"  }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}'
 11 |   tags:
 12 |   - always
 13 | 
 14 | - block:
 15 |   - name: check helm version (if exists)
 16 |     environment: '{{env_kc}}'
 17 |     shell: helm version --template="{{ '{{.Version}}' }}"
 18 |     #local_action: shell helm version --template="{{ '{{.Version}}' }}"
 19 |     register: helm_version_result
 20 | 
 21 |   - name: set_fact helm_install_not_required when existing version is identical
 22 |     set_fact: helm_install_not_required="not"
 23 |     when:
 24 |     - helm_version_result is defined
 25 |     - helm_version_result.stdout | length > 0
 26 |     - helm_version_result.stdout_lines[0] == helm.helm_version
 27 | 
 28 |   - debug:
 29 |       msg: 'helm versions compare. Existing (if any): requested: {{helm.helm_version}} and existings (if any): {{helm_version_result.stdout_lines[0]}}'
 30 |     when:
 31 |     - helm_version_result is defined
 32 |     - helm_version_result.stdout | length > 0
 33 |     #when: helm_version_result.stdout_lines[0] == helm.helm_version
 34 | 
 35 |   ignore_errors: true
 36 |   when:
 37 |   - helm.helm_version is defined
 38 |   tags:
 39 |   - helm
 40 | 
 41 | - block:
 42 |   - name: 'Download helm binary archive {{ helm.archive_url | default ("https://get.helm.sh") }}/helm-{{ helm.helm_version }}-linux-{{ HOST_ARCH }}.tar.gz'
 43 |     environment: '{{env_kc}}'
 44 |     get_url: 
 45 |       url: "{{ helm.archive_url | default ('https://get.helm.sh') }}/helm-{{ helm.helm_version }}-linux-{{ HOST_ARCH }}.tar.gz"
 46 |       dest: /tmp/helm-{{ helm.helm_version }}-linux-{{ HOST_ARCH }}.tar.gz
 47 |       force: no
 48 |       mode: "0755"
 49 |     retries: 3
 50 |     delay: 3
 51 |     register: result
 52 |     until: result is not failed
 53 |     
 54 |   - name: unarchive
 55 |     unarchive:
 56 |       dest: /tmp/
 57 |       src: /tmp/helm-{{ helm.helm_version }}-linux-{{ HOST_ARCH }}.tar.gz
 58 |       mode: "0755"
 59 |       remote_src: yes
 60 | 
 61 |   - name: helm - place binary in destination folder
 62 |     copy:
 63 |       src: /tmp/linux-{{ HOST_ARCH }}/helm
 64 |       dest: /usr/local/bin/
 65 |       force: yes
 66 |       mode: "0755"
 67 |       remote_src: yes
 68 |     become: true
 69 |     become_user: root
 70 |   when:
 71 |   - helm_install_not_required is not defined
 72 |   - helm.helm_version is defined
 73 |   tags:
 74 |   - helm
 75 | 
 76 | - block:
 77 | 
 78 | #  - name: helm repo remove (before add) - deprecated by --force-update flag
 79 | #    environment: '{{env_kc}}'
 80 | #    command: helm repo remove {{ item.name }}
 81 | #    with_items:
 82 | #    - '{{ helm.repos | default("") }}'
 83 | #    when: helm is defined and helm.repos is defined
 84 | #    ignore_errors: true
 85 | 
 86 |   - name: helm repo add 
 87 |     environment: '{{env_kc}}'
 88 |     command: helm repo add --force-update {{ item.name }} {{ item.url }}
 89 |     with_items: 
 90 |     - '{{ helm.repos | default("") }}'
 91 |     when: helm is defined and helm.repos is defined
 92 |     retries: 7
 93 |     delay: 3
 94 |     register: result
 95 |     until: result is not failed
 96 | 
 97 |   - name: helm repo update #Sometimes initial repo add corrupts the repo and update fixes it.
 98 |     environment: '{{env_kc}}'
 99 |     command: helm repo update
100 |     when: helm is defined
101 |     retries: 7
102 |     delay: 3
103 |     register: result
104 |     until: result is not failed
105 | 
106 |   tags:
107 |   - helm
108 | 
109 | 


--------------------------------------------------------------------------------
/roles/tools/tasks/postinstall_messages.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | - set_fact:
  3 |     env_kc: '{{ {} | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/bin/"  }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}'
  4 |   tags:
  5 |   - always
  6 | 
  7 | ### Post install messages
  8 | - block:
  9 | 
 10 | # Too much clutter, so disabling get events
 11 |   # - name: Get events
 12 |   #   command: kubectl get events --all-namespaces
 13 |   #   register: command_result
 14 |   #   changed_when: false
 15 |   #   tags:
 16 |   #   - cluster_info
 17 |   #   - cluster_status
 18 | 
 19 |   # - name: Display events
 20 |   #   debug:
 21 |   #     var: command_result.stdout_lines
 22 |   #   changed_when: false
 23 |   #   tags:
 24 |   #   - cluster_info
 25 |   #   - cluster_status
 26 | 
 27 |   - name: Get deployed helm charts
 28 |     shell: "helm list --all-namespaces --all --superseded --pending"
 29 |     changed_when: false
 30 |     register: command_result
 31 |     tags:
 32 |     - cluster_status
 33 |     - helm
 34 |     - post_deploy
 35 | 
 36 |   - name: Print all helm charts
 37 |     debug:
 38 |       var: command_result.stdout_lines
 39 |     changed_when: false
 40 | 
 41 |   - name: Get pods
 42 |     command: "kubectl get pods -o wide --all-namespaces --show-labels=true --show-kind=true"
 43 |     register: command_result
 44 |     changed_when: false
 45 |     tags:
 46 |     - cluster_status
 47 | 
 48 |   - name: Display pods
 49 |     debug:
 50 |       var: command_result.stdout_lines
 51 |     changed_when: false
 52 |     tags:
 53 |     - cluster_status
 54 | 
 55 |   - name: Get pods not yet in Running status
 56 |     shell: "kubectl get --all-namespaces pods --no-headers | grep -v -w 'Running' || true "
 57 |     register: command_result
 58 |     changed_when: false
 59 | 
 60 |   - name: Print pods not yet in Running status
 61 |     debug:
 62 |       var: command_result.stdout_lines
 63 | 
 64 |   - name: Get nodes
 65 |     command: kubectl get nodes
 66 |     register: command_result
 67 |     changed_when: false
 68 | 
 69 |   - name: Display nodes
 70 |     debug:
 71 |       var: command_result.stdout_lines
 72 |     changed_when: false
 73 | 
 74 |   - name: Get cluster-info
 75 |     command: kubectl cluster-info
 76 |     register: command_result
 77 |     changed_when: false
 78 | 
 79 |   - name: cluster-info
 80 |     debug:
 81 |       #msg: "{{ command_result.stdout_lines | from_yaml }}"
 82 |       var: command_result.stdout_lines    
 83 |     changed_when: false
 84 | 
 85 |   - name: Print cluster information and other useful commands.
 86 |     vars:
 87 |       msg: |
 88 |            Your cluster should be up and running !
 89 |            Now you may:
 90 |            - type: alias to see the predefined aliases
 91 |            - for a command line "dashboard", on master type:
 92 |              - wp     # watch pods in all namespaces (optionally --show-labels )
 93 |              - kg po  # kg='kubectl get --all-namespaces -o wide' <po|ing|etc...>
 94 |              - ks     # ks='kubectl -n kube-system ' get po
 95 |            - k top node / pod # More tips: https://kubernetes.io/docs/user-guide/kubectl-cheatsheet/
 96 |            - to check pods which are not yet in Running status, run: "kubectl get --all-namespaces pods --no-headers | grep -v -w 'Running' "
 97 |            
 98 |            - browse your master (using fqdn) to see the dashboard:
 99 |              http://{{groups["masters"][0]}}
100 |                or, ideally (depending what was defined in config)
101 |              http://dashboard.{{custom.networking.dnsDomain}} (when var custom.networking.dnsDomain properly defined and set in your dns )
102 |     debug: var=msg
103 | #     msg: "{{ msg.split('\n') }}"
104 |     register: command_result
105 |     changed_when: false
106 |     #http://dashboard.{{ custom.networking.dnsDomain | default ("you may want to define custom.networking.dnsDomain") }}
107 | 
108 | #  - name: Print cluster information and other useful commands.
109 | #    debug: vars=command_result
110 | #    changed_when: false
111 |   environment: '{{env_kc}}'
112 |   tags:
113 |   - cluster_info
114 |   - postinstall_messages
115 |   run_once: true
116 | 


--------------------------------------------------------------------------------
/roles/helm/tasks/deploy_batch.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | - name: "Deploy batch {{ batch_number }} helm charts"
  3 |   debug:
  4 |     msg: "Deploying batch {{ batch_number }} with {{ charts_in_batch | length }} charts in parallel"
  5 | 
  6 | - name: "Deploy helm charts in batch {{ batch_number }} (without namespace) - async"
  7 |   command: >
  8 |     helm upgrade --install {{ item.name }} {{ item.repo }}
  9 |     {{ item.options | default('') }}
 10 |     --create-namespace
 11 |   with_items: "{{ charts_in_batch }}"
 12 |   async: 600  # 10 minutes timeout for each chart
 13 |   poll: 0     # Don't wait, start all in parallel
 14 |   environment: '{{ env_kc }}'
 15 |   when:
 16 |     - item.namespace is not defined or item.namespace == ""
 17 |   register: helm_jobs_no_ns
 18 | 
 19 | - name: "Deploy helm charts in batch {{ batch_number }} (with namespace) - async"
 20 |   command: >
 21 |     helm upgrade --install {{ item.name }} {{ item.repo }}
 22 |     --namespace {{ item.namespace }}
 23 |     {{ item.options | default('') }}
 24 |     --create-namespace
 25 |   with_items: "{{ charts_in_batch }}"
 26 |   async: 600  # 10 minutes timeout for each chart
 27 |   poll: 0     # Don't wait, start all in parallel
 28 |   environment: '{{ env_kc }}'
 29 |   when:
 30 |     - item.namespace is defined
 31 |     - item.namespace != ""
 32 |   register: helm_jobs_with_ns
 33 | 
 34 | - name: "Wait for all helm charts in batch {{ batch_number }} to complete"
 35 |   async_status:
 36 |     jid: "{{ item.ansible_job_id }}"
 37 |   register: helm_result_no_ns
 38 |   until: helm_result_no_ns.finished
 39 |   retries: 120  # 10 minutes total (120 * 5 seconds)
 40 |   delay: 5
 41 |   with_items: "{{ helm_jobs_no_ns.results | default([]) }}"
 42 |   when:
 43 |     - helm_jobs_no_ns is defined
 44 |     - helm_jobs_no_ns.results is defined
 45 |     - item.ansible_job_id is defined
 46 |   changed_when: '"deployed" in helm_result_no_ns.stdout'
 47 | 
 48 | - name: "Wait for all helm charts with namespace in batch {{ batch_number }} to complete"
 49 |   async_status:
 50 |     jid: "{{ item.ansible_job_id }}"
 51 |   register: helm_result_with_ns
 52 |   until: helm_result_with_ns.finished
 53 |   retries: 120  # 10 minutes total (120 * 5 seconds)
 54 |   delay: 5
 55 |   with_items: "{{ helm_jobs_with_ns.results | default([]) }}"
 56 |   when:
 57 |     - helm_jobs_with_ns is defined
 58 |     - helm_jobs_with_ns.results is defined
 59 |     - item.ansible_job_id is defined
 60 |   changed_when: '"deployed" in helm_result_with_ns.stdout'
 61 | 
 62 | - name: "Display completion status for batch {{ batch_number }}"
 63 |   debug:
 64 |     msg: "All {{ charts_in_batch | length }} charts in batch {{ batch_number }} have been deployed"
 65 | 
 66 | - name: "CALICO BLOCK - after batch {{ batch_number }} when tigera-operator in charts_in_batch"
 67 |   when: '"tigera-operator" in (charts_in_batch | map(attribute="namespace") | list)'
 68 |   block:
 69 | 
 70 |   - name: Calico - Wait few seconds for deployments to start - wait to make sure calico-node is getting started - required for containerd...
 71 |     pause: seconds=10
 72 |     changed_when: false
 73 | 
 74 |   - name: Wait for calico-node daemonset to be ready
 75 |     shell: kubectl -n calico-system get daemonset calico-node -o jsonpath='{.status.numberReady}/{.status.desiredNumberScheduled}'
 76 |     environment: '{{ env_kc }}'
 77 |     register: calico_node_ready
 78 |     until: calico_node_ready.stdout.split('/')[0] == calico_node_ready.stdout.split('/')[1] and calico_node_ready.stdout.split('/')[0] | int > 0
 79 |     retries: 60
 80 |     delay: 10
 81 |     ignore_errors: true
 82 | 
 83 |   - name: Calico - Restart containerd due to containerd cni bugs are still there in containerd 1.6.6
 84 |     systemd: name=containerd state=restarted enabled=yes daemon_reload=yes
 85 | 
 86 |   - name: Calico - Wait few seconds for containerd to restart
 87 |     pause: seconds=10
 88 |     changed_when: false
 89 | 
 90 |   - name: Wait for CoreDNS deployment to be ready
 91 |     shell: kubectl -n kube-system get deployment coredns -o jsonpath='{.status.readyReplicas}'
 92 |     environment: '{{ env_kc }}'
 93 |     register: coredns_ready
 94 |     until: coredns_ready.stdout | int > 0
 95 |     retries: 20
 96 |     delay: 10
 97 |     ignore_errors: false
 98 |     changed_when: false
 99 | 
100 |   - debug:
101 |       msg: "Calico networking components are ready, proceeding to next batch"
102 | 


--------------------------------------------------------------------------------
/all_reset.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | ## Preparations
 3 | ## Making sure python exists on all nodes, so Ansible will be able to run:
 4 | - hosts: all
 5 |   gather_facts: no
 6 |   become: yes
 7 |   become_method: sudo
 8 |   pre_tasks:
 9 |   ## It would be best to have ansible already installed on all machines. 
10 |   ## But if it is not, we'll try to do it:
11 |   - name: when no python2, install python2 for Ansible<2.8 (usually required on ubuntu, which defaults to python3) # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3
12 |     raw: test -e /usr/bin/python || (apt -y update && apt install -y python-minimal) || (yum install -y python2 python-simplejson)
13 |     register: output
14 |     changed_when: output.stdout != ""
15 |     tags: always
16 |     when:
17 |     - ansible_version.full is version_compare('2.8', '<')
18 |     - ( ansible_python_interpreter is not defined or ansible_python_interpreter == "/usr/bin/python" )
19 |     # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required)
20 |     ignore_errors: true
21 |     ## reason for ignore_errors: true
22 |     ## "version_compare" was replaced with "version" starting ansible 2.5;
23 |     ## CentOS/RHEL 7.x use ansible 2.4, so not able to grasp what version_compare is.
24 |     ## Ansible 2.9 removes the version_compare and does not recognize it any longer.
25 |     ## As our need is to add python2 only on versions before 2.8, if this fails
26 |     ## (due to missing version_compare command), we are fine.
27 |     ## We do not cover cases where it fails due to other reasons, but that is a reasonable risk,
28 |     ## and that issue will be captured later in the flow.
29 | 
30 |   - name: when no python(2/3), install python3(Debian) python2(RedHat) for Ansible>=2.8 # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3
31 |     raw: test -e /usr/bin/python || (apt -y update && apt install -y python3-minimal) || (yum install -y python2 python-simplejson)
32 |     register: output
33 |     changed_when: output.stdout != ""
34 |     tags: always
35 |     when:
36 |     - ansible_version.full is version('2.8', '>=') or ( ansible_python_interpreter is defined and ansible_python_interpreter == "/usr/bin/python3" )
37 |     # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required)
38 |     ignore_errors: true
39 |     ## reason for ignore_errors: true
40 |     ## is similar to the one explained above (complements it)
41 | 
42 |   - setup: # aka gather_facts
43 |     tags: always # required for tags, see ansible issue: #14228
44 |     
45 |   - name: test min. vars (group_vars/all) are set (ClusterConfiguration and k8s_network_addons_urls)
46 |     debug: msg='Make sure min. vars are set in group_vars/all/ (e.g. ClusterConfiguration and k8s_network_addons_urls)'
47 |     when: 
48 |     - ClusterConfiguration is not defined
49 |     - JoinConfiguration is not defined
50 |     failed_when: 
51 |     - ClusterConfiguration is not defined
52 |     - JoinConfiguration is not defined
53 |     tags: always # always check if we have vars in place
54 | 
55 | ## proper reset of any previous cluster (if any)
56 | - hosts: primary-master
57 |   become: yes
58 |   become_method: sudo
59 |   tags:
60 |   - reset
61 |   - master
62 |   roles:
63 |   #- { role: helm, task: helm_reset, tags: [ 'reset', 'helm_reset' ] } # in helm3 is no longer required
64 |   - { role: storage, task: remove_pvs, tags: [ 'reset', 'storage_reset', 'pvs_reset' ] }
65 |   - { role: storage, task: nfs_reset, tags: [ 'reset', 'storage_reset', 'nfs_reset' ] }
66 |   - { role: storage, task: rook_reset, tags: [ 'reset', 'storage_reset', 'rook_reset' ] }
67 |   - { role: tools, task: reset_drain, tags: [ 'reset', 'node_reset', 'drain', 'node_drain' ] } #done on master, affecting nodes
68 | 
69 | ## nodes -> reset and install common part (for all nodes)
70 | - hosts: nodes
71 |   become: yes
72 |   become_method: sudo
73 |   tags:
74 |   - node
75 |   roles:
76 |   - { role: tools, task: reset, tags: [ 'reset', 'node_reset' ], when: "inventory_hostname not in groups['masters']" }
77 |   - { role: tools, task: weave_reset, tags: [ 'reset', 'node_reset', 'network_reset', 'weave_reset', 'weave' ], when: "inventory_hostname not in groups['masters']" }
78 | 
79 | - hosts: masters
80 |   become: yes
81 |   become_method: sudo
82 |   tags:
83 |   - master
84 |   roles:
85 |   - { role: tools, task: reset, tags: [ 'reset', 'master_reset' ] }
86 |   - { role: tools, task: weave_reset, tags: [ 'reset', 'master_reset', 'network_reset', 'weave', 'weave_reset' ] }
87 | 
88 | 


--------------------------------------------------------------------------------
/roles/common/tasks/iptables.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | ### iptables:
  3 | - block:
  4 |   # Currently it will disable the REJECT rules and change policy to allow all.
  5 |   # For making pin-pointed rules, one may look at:
  6 |   # https://github.com/kubernetes/contrib/blob/master/ansible/roles/node/tasks/iptables.yml
  7 |   # For weave netw plugin, open also: TCP 6783 and UDP 6783/6784
  8 | 
  9 |   - name: Disable firewalld (CentOS/RHEL)
 10 |     systemd: name=firewalld state=stopped enabled=no
 11 |     when:  ansible_os_family == "RedHat"
 12 |     ignore_errors: true  # in case it is not even installed
 13 |     # For developing firewalld friendly solution, check:
 14 |     # https://github.com/kubernetes/contrib/tree/master/ansible/roles/
 15 | 
 16 |   - name: Install iptables-services (if does not exist) - RedHat/CentOS
 17 |     package: state=present name={{ item }} #-{{version}}
 18 |     #environment: '{{ proxy_env | default ({}) }}'
 19 |     when:  ansible_os_family == "RedHat"
 20 |     with_items:
 21 |     - iptables-services
 22 |     notify:
 23 |     - Restart iptables
 24 | 
 25 |   - name: Install netfilter-persistent required for saving iptables rule - Debian
 26 |     package: state=present name={{ item }} #-{{version}}
 27 |     #environment: '{{ proxy_env | default ({}) }}'
 28 |     when:  ansible_os_family == "Debian"
 29 |     with_items:
 30 |     - netfilter-persistent
 31 | 
 32 |   - name: iptables default policies need to be ACCEPT on all chains
 33 |     iptables:
 34 |       chain: '{{item}}'
 35 |       policy: ACCEPT
 36 |     with_items:
 37 |     - INPUT
 38 |     - FORWARD
 39 |     - OUTPUT
 40 | 
 41 |   - name: remove the REJECT rules on all chains
 42 |     iptables:
 43 |       chain: '{{item}}'
 44 |       state: absent
 45 |       reject_with: 'icmp-host-prohibited'
 46 |     with_items:
 47 |     - INPUT
 48 |     - FORWARD
 49 |     - OUTPUT
 50 | 
 51 |   - name: remove the REJECT rules on all chains from the /etc/sysconfig/iptables (persisting the change) on RH/CentOS
 52 |     lineinfile:
 53 |       name: /etc/sysconfig/iptables
 54 |       state: absent
 55 |       line: "{{ item }}"
 56 |     with_items:
 57 |     - '-A INPUT -j REJECT --reject-with icmp-host-prohibited'
 58 |     - '-A FORWARD -j REJECT --reject-with icmp-host-prohibited'
 59 |     when: ansible_os_family == "RedHat"
 60 | 
 61 | # alternative option to previous persistence solution with /etc/sysconfig/iptables
 62 |   #- name: Save iptables rules - sol2
 63 |   #  command: service iptables save
 64 |   #  when: ansible_os_family == "Redhat"
 65 | 
 66 |   - name: save iptables rules (Debian)
 67 |     shell: netfilter-persistent save
 68 |     when: ansible_os_family == "Debian"
 69 | 
 70 |   when: iptables_setup is defined and iptables_setup
 71 |   tags:
 72 |   - iptables
 73 |   - firewall
 74 | 
 75 | # Debug iptables with:
 76 | # watch -n1 iptables -vnL
 77 | # Check ports: https://kubernetes.io/docs/setup/independent/install-kubeadm/
 78 | 
 79 | ### TODO: test min. ports to be allowed
 80 | # sudo iptables -I INPUT -p tcp --dport 6443 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT
 81 | # ### sudo iptables -I FORWARD -p tcp --dport 6443 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT
 82 | # sudo iptables -I OUTPUT -p tcp --sport 6443 -m conntrack --ctstate ESTABLISHED -j ACCEPT
 83 | 
 84 | 
 85 | # sudo iptables -I INPUT -p tcp --dport 10250 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT
 86 | # ### sudo iptables -D FORWARD -p tcp --dport 10250 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT
 87 | # sudo iptables -I OUTPUT -p tcp --sport 10250 -m conntrack --ctstate ESTABLISHED -j ACCEPT
 88 | 
 89 | # ### sudo iptables -I INPUT -p tcp --dport 9898 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT
 90 | # sudo iptables -D FORWARD -p tcp --dport 9898 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT
 91 | # ### sudo iptables -I OUTPUT -p tcp --sport 9898 -m conntrack --ctstate ESTABLISHED -j ACCEPT
 92 | 
 93 | # sudo iptables -D INPUT -p udp --dport 53 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT
 94 | # sudo iptables -I FORWARD -p udp --dport 53 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT
 95 | # sudo iptables -D OUTPUT -p udp --sport 53 -m conntrack --ctstate ESTABLISHED -j ACCEPT
 96 | 
 97 | # sudo iptables -I FORWARD -p udp --sport 53 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT
 98 | # sudo iptables -I FORWARD -p tcp --dport 9100 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT
 99 | # sudo iptables -I FORWARD -p tcp --sport 9100 -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT
100 | 
101 | 
102 | # http://www.slsmk.com/how-to-log-iptables-dropped-packets-to-syslog/ and monitor with journalctl -kf
103 | # iptables -N LOGGINGA
104 | # iptables -I FORWARD 9 -j LOGGINGA
105 | # # iptables -D FORWARD -j LOGGINGA
106 | # iptables -A LOGGINGA -m limit --limit 10/min -j LOG --log-prefix "IPTables-A: " --log-level 4
107 | # iptables -A LOGGINGA -j DROP
108 | 
109 | 


--------------------------------------------------------------------------------
/roles/common/tasks/aliases_completion.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | - name: aliases and shell completion
  3 |   block:
  4 | ### BASHRC / ZSHRC file set
  5 |   - name: aliases - choose where to put aliases - default local
  6 |     set_fact: 
  7 |       BASHRC: '~/.bashrc'
  8 |       ZSHRC: '~/.zshrc'
  9 | 
 10 |   - name: aliases - choose where to put aliases - when global, on Debian family
 11 |     set_fact: 
 12 |       BASHRC: '/etc/bash.bashrc'
 13 |       ZSHRC: '/etc/zshrc'
 14 |     when:
 15 |     - aliases.rc is defined
 16 |     - aliases.rc == "global"
 17 |     - ansible_os_family == "Debian"
 18 | 
 19 |   - name: aliases - choose where to put aliases - when global, on RedHat family
 20 |     set_fact: 
 21 |       BASHRC: '/etc/bashrc'
 22 |       ZSHRC: '/etc/zshrc'
 23 |     when:
 24 |     - aliases.rc is defined
 25 |     - aliases.rc == "global"
 26 |     - ansible_os_family == "RedHat"
 27 | 
 28 |   - name: aliases - choose where to put aliases - when custom
 29 |     set_fact: 
 30 |       BASHRC: "{{ aliases.rc_bash_custom | default ('~/.bashrc') }}"
 31 |       ZSHRC: "{{ aliases.rc_zsh_custom | default ('~/.zshrc') }}"
 32 |     when:
 33 |     - aliases.rc is defined
 34 |     - aliases.rc == "custom"
 35 | 
 36 | ### BASH aliases
 37 |   - name: aliases-bash - kubectl and helm aliases to "{{ BASHRC | default ('~/.bashrc') }}" (if exists)
 38 |     lineinfile:
 39 |       dest: "{{ BASHRC | default ('~/.bashrc') }}"
 40 |       line: "{{ item }}"
 41 |       state: present
 42 |       create: "{{ aliases.file_create_if_missing | default ('no') }}"
 43 |     with_items: "{{ aliases.list | default ([]) }}"
 44 | 
 45 | ### ZSH aliases
 46 |   - name: aliases-zsh - kubectl aliases to "{{ ZSHRC | default ('~/.zshrc') }}" (if exists)
 47 |     lineinfile:
 48 |       dest: "{{ ZSHRC | default ('~/.zshrc') }}"
 49 |       line: "{{ item }}"
 50 |       state: present
 51 |       create: "{{ aliases.file_create_if_missing | default ('no') }}"
 52 |     with_items: "{{ aliases.list | default ([]) }}"
 53 | 
 54 | ### BASH Completion    
 55 |   - name: aliases-bash-completion - Install optional packages like bash-completion
 56 |     package: name={{ item }} state={{ package_state | default ('present') }}
 57 |     with_items:
 58 |     - bash-completion
 59 | 
 60 |   - name: "create /usr/share/bash-completion/completions/[kubeadm, kubectl, helm]"
 61 |     shell: "{{ item }} completion bash | sudo tee /usr/share/bash-completion/completions/{{ item }} >/dev/null"
 62 |     with_items:
 63 |     - kubeadm
 64 |     - kubectl
 65 |     - helm
 66 | 
 67 |   - name: aliases-bash-completion to kubectl aliases in "{{ BASHRC | default ('~/.bashrc') }}" (if exists)
 68 |     lineinfile:
 69 |       dest: "{{ BASHRC | default ('~/.bashrc') }}"
 70 |       line: "[[ -n $PS1 ]] && complete -F __start_kubectl {{ item }}"
 71 |       state: present
 72 |       create: "{{ aliases.file_create_if_missing | default ('no') }}"
 73 |     with_items: "{{ aliases.kubectl_complete_also_aliases | default ([]) }}"
 74 |     when:
 75 |     - aliases.kubectl_complete_also_aliases is defined
 76 | 
 77 |   - name: aliases-bash-completion to helm aliases in "{{ BASHRC | default ('~/.bashrc') }}" (if exists)
 78 |     lineinfile:
 79 |       dest: "{{ BASHRC | default ('~/.bashrc') }}"
 80 |       line: "[[ -n $PS1 ]] && complete -F __start_helm {{ item }}"
 81 |       state: present
 82 |       create: "{{ aliases.file_create_if_missing | default ('no') }}"
 83 |     with_items: "{{ aliases.helm_complete_also_aliases | default ([]) }}"
 84 |     when:
 85 |     - aliases.helm_complete_also_aliases is defined
 86 | 
 87 | ### ZSH Completion
 88 |   - name: "create /usr/local/share/zsh/site-functions/_[kubeadm, kubectl, helm]"
 89 |     shell: "{{ item }} completion zsh | sudo tee /usr/local/share/zsh/site-functions/_{{ item }} >/dev/null"
 90 |     with_items:
 91 |     - kubeadm
 92 |     - kubectl
 93 |     - helm
 94 | 
 95 |   - name: aliases-zsh-completion to kubectl aliases in "{{ ZSHRC | default ('~/.zshrc') }}" (if exists)
 96 |     lineinfile:
 97 |       dest: "{{ ZSHRC | default ('~/.zshrc') }}"
 98 |       line: "[[ -n $PS1 ]] && compdef __start_kubectl {{ item }}"
 99 |       state: present
100 |       create: "{{ aliases.file_create_if_missing | default ('no') }}"
101 |     with_items: "{{ aliases.kubectl_complete_also_aliases | default ([]) }}"
102 |     when:
103 |     - aliases.kubectl_complete_also_aliases is defined
104 | 
105 |   - name: aliases-zsh-completion to helm aliases in "{{ ZSHRC | default ('~/.zshrc') }}" (if exists)
106 |     lineinfile:
107 |       dest: "{{ ZSHRC | default ('~/.zshrc') }}"
108 |       line: "[[ -n $PS1 ]] && compdef __start_helm {{ item }}"
109 |       state: present
110 |       create: "{{ aliases.file_create_if_missing | default ('no') }}"
111 |     ignore_errors: true
112 |     with_items: "{{ aliases.helm_complete_also_aliases | default ([]) }}"
113 |     when:
114 |     - aliases.helm_complete_also_aliases is defined
115 | 
116 |   - name: "remove .zcompdump before recreate"
117 |     ansible.builtin.file:
118 |       path: ~/.zcompdump
119 |       state: absent
120 | 
121 |   - name: run compinit
122 |     ansible.builtin.shell: "zsh -c 'compinit -C'"
123 |     args:
124 |       executable: /bin/zsh
125 | 
126 |   tags:
127 |   - aliases
128 |   when:
129 |   - aliases is defined
130 |   ignore_errors: true
131 | 


--------------------------------------------------------------------------------
/roles/storage/templates/nfs.j2:
--------------------------------------------------------------------------------
  1 | #https://raw.githubusercontent.com/kubernetes-incubator/external-storage/master/nfs/deploy/kubernetes/rbac.yaml
  2 | #https://raw.githubusercontent.com/kubernetes-incubator/external-storage/master/nfs/deploy/kubernetes/deployment.yaml
  3 | #https://raw.githubusercontent.com/kubernetes-incubator/external-storage/master/nfs/deploy/kubernetes/class.yaml
  4 | #https://github.com/kubernetes-incubator/external-storage/tree/master/nfs
  5 | 
  6 | kind: ClusterRole
  7 | apiVersion: rbac.authorization.k8s.io/v1
  8 | metadata:
  9 |   name: nfs-provisioner-runner
 10 | rules:
 11 |   - apiGroups: [""]
 12 |     resources: ["persistentvolumes"]
 13 |     verbs: ["get", "list", "watch", "create", "delete"]
 14 |   - apiGroups: [""]
 15 |     resources: ["persistentvolumeclaims"]
 16 |     verbs: ["get", "list", "watch", "update"]
 17 |   - apiGroups: ["storage.k8s.io"]
 18 |     resources: ["storageclasses"]
 19 |     verbs: ["get", "list", "watch"]
 20 |   - apiGroups: [""]
 21 |     resources: ["events"]
 22 |     verbs: ["list", "watch", "create", "update", "patch"]
 23 |   - apiGroups: [""]
 24 |     resources: ["services", "endpoints"]
 25 |     verbs: ["get"]
 26 |   - apiGroups: ["extensions"]
 27 |     resources: ["podsecuritypolicies"]
 28 |     resourceNames: ["nfs-provisioner"]
 29 |     verbs: ["use"]
 30 | ---
 31 | apiVersion: v1
 32 | kind: ServiceAccount
 33 | metadata:
 34 |   name: nfs-provisioner
 35 | ---
 36 | kind: ClusterRoleBinding
 37 | apiVersion: rbac.authorization.k8s.io/v1
 38 | metadata:
 39 |   name: run-nfs-provisioner
 40 | subjects:
 41 |   - kind: ServiceAccount
 42 |     name: nfs-provisioner
 43 |     # replace with namespace where provisioner is deployed
 44 |     namespace: kube-system
 45 | roleRef:
 46 |   kind: ClusterRole
 47 |   name: nfs-provisioner-runner
 48 |   apiGroup: rbac.authorization.k8s.io
 49 | ---
 50 | kind: Role
 51 | apiVersion: rbac.authorization.k8s.io/v1
 52 | metadata:
 53 |   name: leader-locking-nfs-provisioner
 54 | rules:
 55 |   - apiGroups: [""]
 56 |     resources: ["endpoints"]
 57 |     verbs: ["get", "list", "watch", "create", "update", "patch"]
 58 | ---
 59 | kind: RoleBinding
 60 | apiVersion: rbac.authorization.k8s.io/v1
 61 | metadata:
 62 |   name: leader-locking-nfs-provisioner
 63 | subjects:
 64 |   - kind: ServiceAccount
 65 |     name: nfs-provisioner
 66 |     # replace with namespace where provisioner is deployed
 67 |     namespace: kube-system
 68 | roleRef:
 69 |   kind: Role
 70 |   name: leader-locking-nfs-provisioner
 71 |   apiGroup: rbac.authorization.k8s.io
 72 | ---
 73 | kind: Service
 74 | apiVersion: v1
 75 | metadata:
 76 |   name: nfs-provisioner
 77 |   labels:
 78 |     app: nfs-provisioner
 79 | spec:
 80 |   ports:
 81 |     - name: nfs
 82 |       port: 2049
 83 |     - name: mountd
 84 |       port: 20048
 85 |     - name: rpcbind
 86 |       port: 111
 87 |     - name: rpcbind-udp
 88 |       port: 111
 89 |       protocol: UDP
 90 |   selector:
 91 |     app: nfs-provisioner
 92 | ---
 93 | kind: Deployment
 94 | apiVersion: apps/v1
 95 | metadata:
 96 |   name: nfs-provisioner
 97 | spec:
 98 |   selector:
 99 |     matchLabels:
100 |       app: nfs-provisioner
101 |   replicas: 1
102 |   strategy:
103 |     type: Recreate 
104 |   template:
105 |     metadata:
106 |       labels:
107 |         app: nfs-provisioner
108 |     spec:
109 |       serviceAccount: nfs-provisioner
110 |       nodeSelector:
111 |         node-role.kubernetes.io/control-plane: ""
112 |       tolerations:
113 |         - key: "node-role.kubernetes.io/control-plane"
114 |           effect: NoSchedule
115 |       containers:
116 |         - name: nfs-provisioner
117 |           image: quay.io/kubernetes_incubator/nfs-provisioner:v2.2.1-k8s1.12
118 |           ports:
119 |             - name: nfs
120 |               containerPort: 2049
121 |             - name: mountd
122 |               containerPort: 20048
123 |             - name: rpcbind
124 |               containerPort: 111
125 |             - name: rpcbind-udp
126 |               containerPort: 111
127 |               protocol: UDP
128 |           securityContext:
129 |             capabilities:
130 |               add:
131 |                 - DAC_READ_SEARCH
132 |                 - SYS_RESOURCE
133 |           args:
134 |             - "-provisioner={{ nfs_k8s.provisioner }}"
135 |           env:
136 |             - name: POD_IP
137 |               valueFrom:
138 |                 fieldRef:
139 |                   fieldPath: status.podIP
140 |             - name: SERVICE_NAME
141 |               value: nfs-provisioner
142 |             - name: POD_NAMESPACE
143 |               valueFrom:
144 |                 fieldRef:
145 |                   fieldPath: metadata.namespace
146 |           imagePullPolicy: "IfNotPresent"
147 |           volumeMounts:
148 |             - name: export-volume
149 |               mountPath: /export
150 |       volumes:
151 |         - name: export-volume
152 |           hostPath:
153 |             path: {{ nfs_k8s.host_path }}
154 | ---
155 | kind: StorageClass
156 | apiVersion: storage.k8s.io/v1
157 | metadata:
158 |   name: "{{nfs_k8s.provisioner }}"
159 |   annotations:
160 |     storageclass.beta.kubernetes.io/is-default-class: "{{ nfs_k8s.is_default_class | default('true') }}"
161 |   labels:
162 |     kubernetes.io/cluster-service: "true"
163 | provisioner: "{{nfs_k8s.provisioner }}"
164 | mountOptions:
165 |   - vers=4.1
166 | 


--------------------------------------------------------------------------------
/only_nodes_only_install.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | ## Preparations
  3 | ## Making sure python exists on all nodes, so Ansible will be able to run:
  4 | - hosts: nodes
  5 |   gather_facts: no
  6 |   become: yes
  7 |   become_method: sudo
  8 |   pre_tasks:
  9 |   ## It would be best to have ansible already installed on all machines. 
 10 |   ## But if it is not, we'll try to do it:
 11 |   - name: when no python2, install python2 for Ansible<2.8 (usually required on ubuntu, which defaults to python3) # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3
 12 |     raw: test -e /usr/bin/python || (apt -y update && apt install -y python-minimal) || (yum install -y python2 python-simplejson)
 13 |     register: output
 14 |     changed_when: output.stdout != ""
 15 |     tags: always
 16 |     when:
 17 |     - ansible_version.full is version_compare('2.8', '<')
 18 |     - ( ansible_python_interpreter is not defined or ansible_python_interpreter == "/usr/bin/python" )
 19 |     # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required)
 20 |     ignore_errors: true
 21 |     ## reason for ignore_errors: true
 22 |     ## "version_compare" was replaced with "version" starting ansible 2.5;
 23 |     ## CentOS/RHEL 7.x use ansible 2.4, so not able to grasp what version_compare is.
 24 |     ## Ansible 2.9 removes the version_compare and does not recognize it any longer.
 25 |     ## As our need is to add python2 only on versions before 2.8, if this fails
 26 |     ## (due to missing version_compare command), we are fine.
 27 |     ## We do not cover cases where it fails due to other reasons, but that is a reasonable risk,
 28 |     ## and that issue will be captured later in the flow.
 29 | 
 30 |   - name: when no python(2/3), install python3(Debian) python2(RedHat) for Ansible>=2.8 # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3
 31 |     raw: test -e /usr/bin/python || (apt -y update && apt install -y python3-minimal) || (yum install -y python2 python-simplejson)
 32 |     register: output
 33 |     changed_when: output.stdout != ""
 34 |     tags: always
 35 |     when:
 36 |     - ansible_version.full is version('2.8', '>=') or ( ansible_python_interpreter is defined and ansible_python_interpreter == "/usr/bin/python3" )
 37 |     # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required)
 38 |     ignore_errors: true
 39 |     ## reason for ignore_errors: true
 40 |     ## is similar to the one explained above (complements it)
 41 | 
 42 |   - setup: # aka gather_facts
 43 |     tags: always # required for tags, see ansible issue: #14228
 44 |     
 45 |   - name: test min. vars (group_vars/all) are set (ClusterConfiguration and k8s_network_addons_urls)
 46 |     debug: msg='Make sure min. vars are set in group_vars/all/ (e.g. ClusterConfiguration and k8s_network_addons_urls)'
 47 |     when: 
 48 |     - ClusterConfiguration is not defined
 49 |     - JoinConfiguration is not defined
 50 |     failed_when: 
 51 |     - ClusterConfiguration is not defined
 52 |     - JoinConfiguration is not defined
 53 |     tags: always # always check if we have vars in place
 54 | 
 55 | - hosts: nodes
 56 |   become: yes
 57 |   become_method: sudo
 58 |   tags:
 59 |   - node
 60 |   roles:
 61 |   - { role: common, task: all, tags: [ 'common', 'install', 'common_install', 'node_install', 'node' ], when: "inventory_hostname not in groups['masters']" }
 62 | 
 63 | ## node -> install nodes (kubeadm join, etc)
 64 | - hosts: nodes
 65 |   become: yes
 66 |   become_method: sudo
 67 |   any_errors_fatal: yes
 68 |   tags:
 69 |   - node
 70 |   - install
 71 |   - node_install
 72 |   roles:
 73 |   - { role: non-primary-master, tags: [ 'node', 'install', 'node_install'], when: "inventory_hostname not in groups['masters']" }
 74 | 
 75 | ## node -> label nodes (even when master is also a node)
 76 | - hosts: nodes
 77 |   become: yes
 78 |   become_method: sudo
 79 |   any_errors_fatal: yes
 80 |   tags:
 81 |   - node
 82 |   - install
 83 |   - node_install
 84 |   - label
 85 |   roles:
 86 |   - { role: tools, task: labels, tags: [ 'label'] }
 87 | 
 88 | ### For fixes like vsphere's bug, we have to reboot after some more fixes...
 89 | #https://github.com/vmware/kubernetes/issues/495
 90 | - hosts: mustrebootlist
 91 |   gather_facts: no
 92 |   become: yes
 93 |   become_method: sudo
 94 |   tags:
 95 |   - mustrebootlist
 96 |   - vsphere_bug_fix
 97 |   - vsphere
 98 |   roles:
 99 |   - { role: tools, task: reboot, tags: [ 'reboot_minimal' ], when: "ClusterConfiguration.cloudProvider is defined and ClusterConfiguration.cloudProvider == 'vsphere' and allow_restart | default(False) and vsphere_bug_fix is defined and vsphere_bug_fix" }
100 | 
101 | ## Generic Sanity
102 | - hosts: masters
103 |   become: yes
104 |   become_method: sudo
105 |   tags:
106 |   - master
107 |   pre_tasks:
108 |   - name: remove temporary mustreboot temporary group
109 |     group:
110 |       name: mustrebootlist
111 |       state: absent
112 |   roles:
113 |   - { role: tools, task: cluster_sanity, tags: [ 'cluster_sanity', 'sanity' ] }
114 |   - { role: tools, task: postinstall_messages, tags: [ 'cluster_sanity', 'sanity' ] }
115 | 
116 | ## to reset/add only some (more) nodes:
117 | ##   1. keep in hosts only:
118 | ##      - the master
119 | ##      - the affected node (all other nodes should not be there)
120 | ##   2. Have the token defined in the group_vars/all
121 | ##   3. Run using only this/these tag(s):
122 | ## ansible-playbook -i hosts -v site.yml --tags "node"   # same with: ansible-playbook -i hosts -v site.yml --tags "node_reset,node_install,cluster_sanity,cluster_info"
123 | 
124 | ## To get cluster info/sanity:
125 | ## ansible-playbook -i hosts -v site.yml --tags "cluster_sanity,cluster_info"
126 | 


--------------------------------------------------------------------------------
/only_secondaryMasters_only_install.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | ## Preparations
  3 | ## Making sure python exists on all nodes, so Ansible will be able to run:
  4 | - hosts: secondary-masters
  5 |   gather_facts: no
  6 |   become: yes
  7 |   become_method: sudo
  8 |   pre_tasks:
  9 |   ## It would be best to have ansible already installed on all machines. 
 10 |   ## But if it is not, we'll try to do it:
 11 |   - name: when no python2, install python2 for Ansible<2.8 (usually required on ubuntu, which defaults to python3) # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3
 12 |     raw: test -e /usr/bin/python || (apt -y update && apt install -y python-minimal) || (yum install -y python2 python-simplejson)
 13 |     register: output
 14 |     changed_when: output.stdout != ""
 15 |     tags: always
 16 |     when:
 17 |     - ansible_version.full is version_compare('2.8', '<')
 18 |     - ( ansible_python_interpreter is not defined or ansible_python_interpreter == "/usr/bin/python" )
 19 |     # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required)
 20 |     ignore_errors: true
 21 |     ## reason for ignore_errors: true
 22 |     ## "version_compare" was replaced with "version" starting ansible 2.5;
 23 |     ## CentOS/RHEL 7.x use ansible 2.4, so not able to grasp what version_compare is.
 24 |     ## Ansible 2.9 removes the version_compare and does not recognize it any longer.
 25 |     ## As our need is to add python2 only on versions before 2.8, if this fails
 26 |     ## (due to missing version_compare command), we are fine.
 27 |     ## We do not cover cases where it fails due to other reasons, but that is a reasonable risk,
 28 |     ## and that issue will be captured later in the flow.
 29 | 
 30 |   - name: when no python(2/3), install python3(Debian) python2(RedHat) for Ansible>=2.8 # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3
 31 |     raw: test -e /usr/bin/python || (apt -y update && apt install -y python3-minimal) || (yum install -y python2 python-simplejson)
 32 |     register: output
 33 |     changed_when: output.stdout != ""
 34 |     tags: always
 35 |     when:
 36 |     - ansible_version.full is version('2.8', '>=') or ( ansible_python_interpreter is defined and ansible_python_interpreter == "/usr/bin/python3" )
 37 |     # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required)
 38 |     ignore_errors: true
 39 |     ## reason for ignore_errors: true
 40 |     ## is similar to the one explained above (complements it)
 41 | 
 42 |   - setup: # aka gather_facts
 43 |     tags: always # required for tags, see ansible issue: #14228
 44 |     
 45 |   - name: test min. vars (group_vars/all) are set (ClusterConfiguration and k8s_network_addons_urls)
 46 |     debug: msg='Make sure min. vars are set in group_vars/all/ (e.g. ClusterConfiguration and k8s_network_addons_urls)'
 47 |     when: 
 48 |     - ClusterConfiguration is not defined
 49 |     - JoinConfiguration is not defined
 50 |     failed_when: 
 51 |     - ClusterConfiguration is not defined
 52 |     - JoinConfiguration is not defined
 53 |     tags: always # always check if we have vars in place
 54 | 
 55 | - hosts: secondary-masters
 56 |   become: yes
 57 |   become_method: sudo
 58 |   tags:
 59 |   - master
 60 |   - secondary_masters
 61 |   roles:
 62 |   - { role: tools, task: reset, tags: [ 'reset', 'master_reset' ] }
 63 |   - { role: tools, task: weave_reset, tags: [ 'reset', 'master_reset', 'network_reset', 'weave', 'weave_reset' ] }
 64 |   - { role: common, task: all, tags: [ 'common', 'install', 'common_install', 'master_install'] }
 65 | 
 66 | ## master -> install keepalived on masters (relevat if HA)
 67 | - hosts: secondary-masters
 68 |   become: yes
 69 |   become_method: sudo
 70 |   any_errors_fatal: yes
 71 |   tags:
 72 |   - master
 73 |   - install
 74 |   - ha
 75 |   - master_install
 76 |   - secondary_masters
 77 |   roles:
 78 |   - role: keepalived
 79 |     tags: [ 'master', 'install', 'master_install', 'ha', 'keepalived']
 80 |     when: 
 81 |     - ( groups['masters'] | length ) > 1 
 82 |     - ( custom.networking.masterha_type | default('vip') ) == 'vip'
 83 | 
 84 | - hosts: secondary-masters
 85 |   become: yes
 86 |   become_method: sudo
 87 |   any_errors_fatal: yes
 88 |   tags:
 89 |   - master
 90 |   - install
 91 |   - ha
 92 |   - master_install
 93 |   - secondary_masters
 94 |   roles:
 95 |   - { role: non-primary-master, tags: [ 'secondary-masters', 'master', 'install', 'master_install', 'secondary_masters'] } 
 96 | 
 97 | ### For fixes like vsphere's bug, we have to reboot after some more fixes...
 98 | #https://github.com/vmware/kubernetes/issues/495
 99 | - hosts: mustrebootlist
100 |   gather_facts: no
101 |   become: yes
102 |   become_method: sudo
103 |   tags:
104 |   - mustrebootlist
105 |   - vsphere_bug_fix
106 |   - vsphere
107 |   roles:
108 |   - { role: tools, task: reboot, tags: [ 'reboot_minimal' ], when: "ClusterConfiguration.cloudProvider is defined and ClusterConfiguration.cloudProvider == 'vsphere' and allow_restart | default(False) and vsphere_bug_fix is defined and vsphere_bug_fix" }
109 | 
110 | ## Generic Sanity
111 | - hosts: secondary-masters
112 |   become: yes
113 |   become_method: sudo
114 |   tags:
115 |   - master
116 |   - secondary_masters
117 |   pre_tasks:
118 |   - name: remove temporary mustreboot temporary group
119 |     group:
120 |       name: mustrebootlist
121 |       state: absent
122 |   roles:
123 |   - { role: tools, task: cluster_sanity, tags: [ 'cluster_sanity', 'sanity' ] }
124 |   - { role: tools, task: postinstall_messages, tags: [ 'cluster_sanity', 'sanity' ] }
125 | 
126 | ## to reset/add only some (more) nodes:
127 | ##   1. keep in hosts only:
128 | ##      - the master
129 | ##      - the affected node (all other nodes should not be there)
130 | ##   2. Have the token defined in the group_vars/all
131 | ##   3. Run using only this/these tag(s):
132 | ## ansible-playbook -i hosts -v site.yml --tags "node"   # same with: ansible-playbook -i hosts -v site.yml --tags "node_reset,node_install,cluster_sanity,cluster_info"
133 | 
134 | ## To get cluster info/sanity:
135 | ## ansible-playbook -i hosts -v site.yml --tags "cluster_sanity,cluster_info"
136 | 


--------------------------------------------------------------------------------
/roles/tools/tasks/reset.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | #- hosts: all
  3 | #  gather_facts: False
  4 | #  become: yes
  5 | #  become_method: sudo
  6 | #  tags:
  7 | #  - reset
  8 | #  tasks:
  9 | 
 10 | - block:
 11 | 
 12 |   - name: stop keepalived for cleanup activities
 13 |     systemd: name={{ item }} state=stopped
 14 |     with_items:
 15 |     - keepalived
 16 |     tags:
 17 |     - kubelet
 18 |     - uninstall
 19 |     ignore_errors: true
 20 |     when: 
 21 |     - groups['masters'] | length > 1 
 22 |     - ( custom.networking.masterha_type | default('vip') ) == 'vip'
 23 | 
 24 | # We had to remove it, as it blocks the flow. It also fetches docker.io images and in some setups there is no access to or fails due to limits on docker hub...
 25 | #  - name: Reset weave network # if it was used
 26 | #    shell: /usr/local/bin/weave reset --force
 27 | #    ignore_errors: true
 28 | 
 29 |   - name: remove pods NFS mount leftovers; Note you have to collect them from the remote storage (e.g. vsphere datastore) also
 30 |     shell: umount -f $(mount | grep '/kubelet/pods/' | grep '/volumes/kubernetes.io~nfs' | awk '{print $3}')
 31 |     tags:
 32 |     - umount
 33 |     - nfs_reset
 34 |     ignore_errors: true
 35 | 
 36 |   - name: Reset cluster (kubeadm reset --force --ignore-preflight-errors=all )
 37 |     command: /usr/bin/kubeadm reset --force --ignore-preflight-errors=all
 38 |     ignore_errors: true
 39 |     # TODO: if cluster is installed, but kubedm is no longer available on the machine, we will not have a reset of cluster...
 40 | 
 41 |   - name: Reset cluster (kubeadm reset --force --ignore-preflight-errors=all using --cri-socket loop )
 42 |     # command: /usr/bin/kubeadm reset --force --ignore-preflight-errors=all --cri-socket={{ item }}
 43 |     command: /usr/bin/kubeadm reset --force --ignore-preflight-errors=all --cri-socket={{ InitConfiguration.nodeRegistration.criSocket }}
 44 |     ignore_errors: true
 45 |     # with_items:
 46 |     # - /var/run/dockershim.sock
 47 |     # - /var/run/crio/crio.sock
 48 |     # - /var/run/containerd/containerd.sock
 49 |     # - /var/run/cri-dockerd.sock
 50 | 
 51 |   ### Cleaning full /etc/kubernetes/ ; Starting k8s 1.12 behaves better, at some point we will remove this step:
 52 |   - name: ensure old kubeadm config files were removed
 53 |     file: state=absent path={{ item }}
 54 |     with_items:
 55 |     - /etc/kubernetes/
 56 |     #- /etc/kubernetes/kubeadm.conf
 57 |     #- /etc/kubernetes/kubeadm-master.config
 58 |     #- /etc/kubernetes/kubeadm-master.conf
 59 |     #- /etc/kubernetes/cloud-config
 60 | 
 61 | #  - name: ensure old /etc/kubernetes/ is removed when full_kube_reinstall is true
 62 | #    file: state=absent path={{ item }}
 63 | #    with_items:
 64 | #    - /etc/kubernetes/
 65 | #    #- /var/lib/etcd # there might be cases 
 66 | #    when: full_kube_reinstall is defined and full_kube_reinstall
 67 | 
 68 |   - name: ensure old /var/lib/etcd/member is removed
 69 |     file: state=absent path={{ item }}
 70 |     with_items:
 71 |     - /var/lib/etcd/member
 72 |     when: etcd_clean | default(false)
 73 | 
 74 |   - name: systemctl stop kube*.*.slice
 75 |     shell: 'for i in $(systemctl list-unit-files --no-legend --no-pager -l | grep --color=never -o kube.*\.slice );do echo $i; systemctl stop $i ; done'
 76 |     tags:
 77 |     - umount
 78 | 
 79 |   - name: Reset cluster (kubeadm reset --force) # starting 1.14
 80 |     command: /usr/bin/kubeadm reset --force --ignore-preflight-errors=all
 81 |     ignore_errors: true
 82 |     # TODO: if cluster is installed, but kubedm is no longer available on the machine, we will not have a reset of cluster...
 83 | 
 84 |   - name: stop kubelet and etcd for cleanup activities
 85 |     systemd: name={{ item }} state=stopped
 86 |     with_items:
 87 |     - kubelet
 88 |     - etcd
 89 |     tags:
 90 |     - kubelet
 91 |     - uninstall
 92 |     ignore_errors: true
 93 | 
 94 |   - name: unhold before reinstall packages
 95 |     shell: apt-mark unhold {{ item }}
 96 |     ignore_errors: true
 97 |     with_items:
 98 |     - kubeadm
 99 |     - kubelet
100 |     - kubectl
101 |     - kubernetes-cni
102 |     - cri-tools
103 |     when:
104 |     - full_kube_reinstall | default (False)
105 |     - full_kube_apt_unhold | default (False)
106 |     - ansible_os_family == "Debian"
107 |     tags:
108 |     - kubelet
109 |     - uninstall
110 | 
111 |   - name: Remove before reinstall packages
112 |     package: name={{ item }} state=absent
113 |     with_items:
114 |     - kubeadm
115 |     - kubelet
116 |     - kubectl
117 |     - kubernetes-cni
118 |     when: full_kube_reinstall | default (False) #is defined and full_kube_reinstall
119 |     tags:
120 |     - kubelet
121 |     - uninstall
122 | 
123 |   - name: remove plugins mount leftovers; Note you have to collect them from the remote storage (e.g. vsphere datastore) also
124 |     #shell: 'umount $(mount | grep " on /var/lib/kubelet/plugins/kubernetes.io/" | cut -f1 -d" ")'
125 |     shell: umount -f $(mount | grep '/kubelet/plugins/kubernetes.io/' | awk '{print $3}')
126 |     #shell: 'umount $(mount | grep "/kubelet/plugins/kubernetes.io/" | cut -f1 -d" ")'
127 |     tags:
128 |     - kubelet
129 |     - uninstall
130 |     ignore_errors: true
131 | 
132 |   - name: remove pods mount leftovers; Note you have to collect them from the remote storage (e.g. vsphere datastore) also
133 |     shell: umount -f $(mount | grep '/kubelet/pods/' | grep '/volumes/kubernetes.io~' | awk '{print $3}')
134 |     tags:
135 |     - kubelet
136 |     - uninstall
137 |     ignore_errors: true
138 | 
139 |   - name: docker network prune -f
140 |     shell: 'docker network prune -f'
141 | 
142 |   #https://github.com/kubernetes/kubernetes/issues/39557
143 |   - name: cni0/cbr0 IP alloction issue
144 |     shell: 'rm -rf /var/lib/cni/ /var/lib/kubelet/* /etc/cni/ ; ip link delete cni0; ip link delete cbr0 ; ip link delete flannel.1; ip link delete weave'
145 |     ignore_errors: true
146 |     tags:
147 |     - uninstall
148 |     
149 |   - name: ipvsadm clear
150 |     shell: 'ipvsadm --clear'
151 |     ignore_errors: true
152 |     tags:
153 |     - uninstall    
154 | 
155 |   - name: Reset iptables rules # THIS TASK SHOULD BE REMOVED, is not maintained
156 |     shell: iptables-save | awk '/^[*]/ { print $1 } /^:[A-Z]+ [^-]/ { print $1 " ACCEPT" ; } /COMMIT/ { print $0; }' | iptables-restore
157 |     when: iptables_reset is defined and iptables_reset
158 |     ignore_errors: true
159 |     tags:
160 |     - uninstall
161 | 
162 |   #- name: restart kubelet for cleanup activities
163 |   #  systemd: name={{ item }} state=restarted
164 |   #  with_items:
165 |   #  - kubelet
166 |   #  when: ! (full_kube_reinstall is defined and full_kube_reinstall )
167 |   #  tags:
168 |   #  - kubelet
169 |   #  - uninstall
170 |   #  ignore_errors: true
171 | 
172 |   - name: Remove /etc/systemd/system/kubelet.service.d/20-etcd-service-manager.conf if present from HA etcd setup time (in MasterHA)
173 |     file: 
174 |       path: /etc/systemd/system/kubelet.service.d/20-etcd-service-manager.conf
175 |       state: absent
176 | 
177 |   - name: Remove /etc/sysconfig/kubelet if present
178 |     file: 
179 |       path: /etc/sysconfig/kubelet
180 |       state: absent
181 | 
182 |   tags:
183 |   - reset
184 | 


--------------------------------------------------------------------------------
/docs/popular_helm_charts_cli_deploy.md:
--------------------------------------------------------------------------------
  1 | Examples of popular helm charts with their relevant params.
  2 | Tested in k8s 10, helm 2.8.2, with persistent volumes and proxy.
  3 | 
  4 | # Test k8s deployment with:
  5 | ## Wordpress
  6 | ```
  7 | export K8SMASTER=$(hostname -s)
  8 | helm delete --purge wordpress || true
  9 | helm install --name wordpress --namespace default \
 10 | --set wordpressUsername=admin,wordpressPassword=password \
 11 | --set persistence.size=200Mi \
 12 | --set mariadb.mariadbRootPassword=secretpassword,mariadb.persistence.size=400Mi \
 13 | --set ingress.enabled=true,ingress.hosts[0].name="wordpress.${K8SMASTER}.k8singress.example.com" \
 14 | stable/wordpress
 15 | ```
 16 | 
 17 | # DBs
 18 | ## mysql
 19 | ```
 20 | export K8SMASTER=$(hostname -s)
 21 | helm delete --purge mysql || true
 22 | helm install --namespace default --name mysql \
 23 | --set mysqlRootPassword=secretpassword,mysqlUser=my-user,mysqlPassword=my-password,mysqlDatabase=my-database,persistence.size=400Mi \
 24 | stable/mysql
 25 | ```
 26 | 
 27 | ## MongoDB
 28 | ```
 29 | export K8SMASTER=$(hostname -s)
 30 | helm delete --purge mongodb || true
 31 | helm install --name mongodb --namespace mongodb \
 32 | --set mongodbRootPassword=secretpassword,mongodbUsername=my-user,mongodbPassword=my-password,mongodbDatabase=my-database \
 33 | --set persistence.enabled=True,persistence.size=500Mi \
 34 | stable/mongodb
 35 | ```
 36 | 
 37 | ## PostgreSQL
 38 | ```
 39 | export K8SMASTER=$(hostname -s)
 40 | helm delete --purge postgresql || true
 41 | helm install --name postgresql --namespace default \
 42 | --set postgresUser=my-user,postgresPassword=secretpassword,postgresDatabase=my-database \
 43 | --set persistence.enabled=True,persistence.size=300Mi \
 44 | stable/postgresql
 45 | ```
 46 | 
 47 | # Monitoring
 48 | ## Prometheus
 49 | ```
 50 | export K8SMASTER=$(hostname -s)
 51 | helm delete --purge prometheus || true
 52 | helm install --name prometheus --namespace infra \
 53 | --set rbac.create=True \
 54 | --set alertmanager.ingress.enabled=True,alertmanager.ingress.hosts[0]=alertmanager.${K8SMASTER}.k8singress.example.com \
 55 | --set alertmanager.persistentVolume.enabled=true,alertmanager.persistentVolume.size=300Mi \
 56 | --set server.ingress.enabled=True,server.ingress.hosts[0]=prometheus.${K8SMASTER}.k8singress.example.com \
 57 | --set server.persistentVolume.enabled=True,server.persistentVolume.size=400Mi \
 58 | --set pushgateway.ingress.enabled=True,pushgateway.ingress.hosts[0]=pushgateway.${K8SMASTER}.k8singress.example.com \
 59 | stable/prometheus 
 60 | ```
 61 | 
 62 | ## Grafana (resource intensive/cron jobs)
 63 | ```
 64 | export K8SMASTER=$(hostname -s)
 65 | helm delete --purge grafana || true
 66 | helm install --name grafana --namespace infra \
 67 | --set adminPassword=my-password \
 68 | --set persistence.enabled=True,persistence.size=200Mi,persistence.accessModes[0]=ReadWriteOnce \
 69 | --set ingress.enabled=True,ingress.hosts[0]=grafana.${K8SMASTER}.k8singress.example.com \
 70 | --set datasources.datasources\\.yaml.apiVersion=1 \
 71 | --set datasources.datasources\\.yaml.datasources[0].name=prometheus \
 72 | --set datasources.datasources\\.yaml.datasources[0].type=prometheus \
 73 | --set datasources.datasources\\.yaml.datasources[0].url="http://prometheus-server.infra.svc.cluster.local" \
 74 | --set datasources.datasources\\.yaml.datasources[0].isDefault=true \
 75 | --set datasources.datasources\\.yaml.datasources[0].access=proxy \
 76 | --set datasources.datasources\\.yaml.datasources[1].name=prometheus_direct \
 77 | --set datasources.datasources\\.yaml.datasources[1].type=prometheus \
 78 | --set datasources.datasources\\.yaml.datasources[1].url="http://prometheus.${K8SMASTER}.k8singress.example.com" \
 79 | --set datasources.datasources\\.yaml.datasources[1].isDefault=false \
 80 | --set datasources.datasources\\.yaml.datasources[1].access=direct \
 81 | stable/grafana 
 82 | ```
 83 | 
 84 | # Others
 85 | ## chartmuseum
 86 | TBD: The chart does not provide a way to add proxy curently
 87 | ```
 88 | export K8SMASTER=$(hostname -s)
 89 | helm delete --purge chartmuseum || true
 90 | helm install --name chartmuseum --namespace infra \
 91 | --set persistence.enabled=True,persistence.storageClass="",persistence.size=100Mi \
 92 | --set ingress.enabled=True \
 93 | --set ingress.hosts.chartmuseum\\.${K8SMASTER}\\.k8singress\\.example\\.com[0]="/charts" \
 94 | --set ingress.hosts.chartmuseum\\.${K8SMASTER}\\.k8singress\\.example\\.com[1]="/index.yaml" \
 95 | --set ingress.hosts.chartmuseum\\.${K8SMASTER}\\.k8singress\\.example\\.com[2]="/index.yml" \
 96 | stable/chartmuseum
 97 | 
 98 | #### Optionally, install the binary on the unix side to interact with it:
 99 | curl -LO https://s3.amazonaws.com/chartmuseum/release/latest/bin/linux/amd64/chartmuseum && chmod +x ./chartmuseum && mv ./chartmuseum /usr/local/bin
100 | ```
101 | 
102 | ## Monocular
103 | TBD: The chart does not provide a way to add proxy curently
104 | ```
105 | export K8SMASTER=$(hostname -s)
106 | helm delete --purge monocular || true
107 | helm repo add monocular https://kubernetes-helm.github.io/monocular
108 | helm install --name monocular --namespace infra \
109 | --set ingress.enabled=True,ingress.hosts[0]="monocular.${K8SMASTER}.k8singress.example.com" \
110 | --set mongodb.persistence.enabled=True,mongodb.persistence.size=400Mi \
111 | monocular/monocular
112 | ```
113 | 
114 | # CI/CD
115 | ## Jenkins
116 | ```
117 | export K8SMASTER=$(hostname -s)
118 | helm delete --purge jenkins || true
119 | helm install --name jenkins --namespace infra \
120 | --set rbac.install=true \
121 | --set Master.InstallPlugins[0]="kubernetes:1.5.1" \
122 | --set Master.InstallPlugins[1]="credentials-binding:1.16" \
123 | --set Master.InstallPlugins[2]="git:3.8.0" \
124 | --set Master.InstallPlugins[3]="workflow-job:2.18" \
125 | --set Master.InstallPlugins[4]="workflow-aggregator:2.5" \
126 | --set Master.InitContainerEnv[0].name=http_proxy,Master.InitContainerEnv[0].value='http://proxy.corp.example.com:8080' \
127 | --set Master.InitContainerEnv[1].name=https_proxy,Master.InitContainerEnv[1].value='http://proxy.corp.example.com:8080' \
128 | --set Master.InitContainerEnv[2].name=no_proxy,Master.InitContainerEnv[2].value='localhost\,.svc\,.local\,.example.com' \
129 | --set Master.ContainerEnv[0].name=http_proxy,Master.ContainerEnv[0].value='http://proxy.corp.example.com:8080' \
130 | --set Master.ContainerEnv[1].name=https_proxy,Master.ContainerEnv[1].value='http://proxy.corp.example.com:8080' \
131 | --set Master.ContainerEnv[2].name=no_proxy,Master.ContainerEnv[2].value='localhost\,.svc\,.local\,.example.com' \
132 | --set Master.JavaOpts="-Dhttp.proxyHost=proxy.corp.example.com -Dhttp.proxyPort=8080 -Dhttps.proxyHost=proxy.corp.example.com -Dhttps.proxyPort=8080 -Dhttp.nonProxyHosts='localhost|*.example.com|*.local|*.svc' -Dhttps.nonProxyHosts='localhost|*.example.com|*.local|*.svc' " \
133 | --set Master.ServiceType=ClusterIP \
134 | --set Master.HostName=jenkins.${K8SMASTER}.k8singress.example.com \
135 | --set Persistence.Enabled=True \
136 | --set Persistence.Size=1Gi \
137 | stable/jenkins
138 | echo "Find admin password is:"
139 | printf $(kubectl get secret --namespace infra jenkins -o jsonpath="{.data.jenkins-admin-password}" | base64 --decode);echo
140 | ```
141 | 
142 | ## Nexus
143 | TBD: The chart does not provide a way to add proxy at deploy time (as of now)
144 | ```
145 | export K8SMASTER=$(hostname -s)
146 | helm delete --purge nexus || true
147 | helm install --name nexus --namespace infra \
148 | --set docker.enabled=True,docker.host=myregistry.${K8SMASTER}.k8singress.example.com,docker.port=5000 \
149 | --set persistence.enabled=True,persistence.size=1Gi \
150 | --set service.type=ClusterIP \
151 | --set ingress.enabled=True,ingress.hosts[0]="nexus.${K8SMASTER}.k8singress.example.com" \
152 | stable/sonatype-nexus
153 | ```
154 | 
155 | # Notes:
156 | All sizes are at min.
157 | If proxy is not required, remove the relevant lines 
158 | 


--------------------------------------------------------------------------------
/all_install.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | ## Preparations
  3 | ## Making sure python exists on all nodes, so Ansible will be able to run:
  4 | - hosts: all
  5 |   gather_facts: no
  6 |   become: yes
  7 |   become_method: sudo
  8 |   pre_tasks:
  9 |   ## It would be best to have ansible already installed on all machines. 
 10 |   ## But if it is not, we'll try to do it:
 11 |   - name: when no python2, install python2 for Ansible<2.8 (usually required on ubuntu, which defaults to python3) # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3
 12 |     raw: test -e /usr/bin/python || (apt -y update && apt install -y python-minimal) || (yum install -y python2 python-simplejson)
 13 |     register: output
 14 |     changed_when: output.stdout != ""
 15 |     tags: always
 16 |     when:
 17 |     - ansible_version.full is version_compare('2.8', '<')
 18 |     - ( ansible_python_interpreter is not defined or ansible_python_interpreter == "/usr/bin/python" )
 19 |     # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required)
 20 |     ignore_errors: true
 21 |     ## reason for ignore_errors: true
 22 |     ## "version_compare" was replaced with "version" starting ansible 2.5;
 23 |     ## CentOS/RHEL 7.x use ansible 2.4, so not able to grasp what version_compare is.
 24 |     ## Ansible 2.9 removes the version_compare and does not recognize it any longer.
 25 |     ## As our need is to add python2 only on versions before 2.8, if this fails
 26 |     ## (due to missing version_compare command), we are fine.
 27 |     ## We do not cover cases where it fails due to other reasons, but that is a reasonable risk,
 28 |     ## and that issue will be captured later in the flow.
 29 | 
 30 |   - name: when no python(2/3), install python3(Debian) python2(RedHat) for Ansible>=2.8 # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3
 31 |     raw: test -e /usr/bin/python || (apt -y update && apt install -y python3-minimal) || (yum install -y python2 python-simplejson)
 32 |     register: output
 33 |     changed_when: output.stdout != ""
 34 |     tags: always
 35 |     when:
 36 |     - ansible_version.full is version('2.8', '>=') or ( ansible_python_interpreter is defined and ansible_python_interpreter == "/usr/bin/python3" )
 37 |     # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required)
 38 |     ignore_errors: true
 39 |     ## reason for ignore_errors: true
 40 |     ## is similar to the one explained above (complements it)
 41 | 
 42 |   - setup: # aka gather_facts
 43 |     tags: always # required for tags, see ansible issue: #14228
 44 |     
 45 |   - name: test min. vars (group_vars/all) are set (ClusterConfiguration and k8s_network_addons_urls)
 46 |     debug: msg='Make sure min. vars are set in group_vars/all/ (e.g. ClusterConfiguration and k8s_network_addons_urls)'
 47 |     when: 
 48 |     - ClusterConfiguration is not defined
 49 |     - JoinConfiguration is not defined
 50 |     failed_when: 
 51 |     - ClusterConfiguration is not defined
 52 |     - JoinConfiguration is not defined
 53 |     tags: always # always check if we have vars in place
 54 | 
 55 | ## nodes -> install common part (for all nodes)
 56 | - hosts: nodes
 57 |   become: yes
 58 |   become_method: sudo
 59 |   tags:
 60 |   - node
 61 |   roles:
 62 |   - { role: common, task: all, tags: [ 'common', 'install', 'common_install', 'node_install', 'node' ], when: "inventory_hostname not in groups['masters']" }
 63 | 
 64 | ## master -> install common part (for all masters - and sometimes etcd when colocated with masters)
 65 | - hosts: masters
 66 |   become: yes
 67 |   become_method: sudo
 68 |   tags:
 69 |   - master
 70 |   roles:
 71 |   - { role: common, task: all, tags: [ 'common', 'install', 'common_install', 'master_install'] }
 72 | 
 73 | ## master -> install keepalived on masters (relevat if HA)
 74 | - hosts: masters
 75 |   become: yes
 76 |   become_method: sudo
 77 |   any_errors_fatal: yes
 78 |   tags:
 79 |   - master
 80 |   - install
 81 |   - ha
 82 |   - master_install
 83 |   roles:
 84 |   - role: keepalived
 85 |     tags: [ 'master', 'install', 'master_install', 'ha', 'keepalived']
 86 |     when: 
 87 |     - ( groups['masters'] | length ) > 1 
 88 |     - ( custom.networking.masterha_type | default('vip') ) == 'vip'
 89 | 
 90 | - hosts: primary-master 
 91 |   name: primary-master (or master in general) - it applies to both ha and non-ha
 92 |   become: yes
 93 |   become_method: sudo
 94 |   any_errors_fatal: yes
 95 |   tags:
 96 |   - master
 97 |   - install
 98 |   - master_install
 99 |   - ha
100 |   roles:
101 |   - { role: primary-master, task: primary, tags: [ 'primary-master', 'master', 'install', 'master_install'] } 
102 | 
103 | - hosts: secondary-masters
104 |   become: yes
105 |   become_method: sudo
106 |   any_errors_fatal: yes
107 |   tags:
108 |   - master
109 |   - install
110 |   - ha
111 |   - master_install
112 |   roles:
113 |   - { role: non-primary-master, tags: [ 'secondary-masters', 'master', 'install', 'master_install', 'secondary_masters'] } 
114 | 
115 | ## node -> install nodes (kubeadm join, etc)
116 | - hosts: nodes
117 |   become: yes
118 |   become_method: sudo
119 |   any_errors_fatal: yes
120 |   tags:
121 |   - node
122 |   - install
123 |   - node_install
124 |   roles:
125 |   - { role: non-primary-master, tags: [ 'node', 'install', 'node_install'], when: "inventory_hostname not in groups['masters']" }
126 | 
127 | ## node -> label nodes (even when master is also a node)
128 | - hosts: nodes
129 |   become: yes
130 |   become_method: sudo
131 |   any_errors_fatal: yes
132 |   tags:
133 |   - node
134 |   - install
135 |   - node_install
136 |   - label
137 |   roles:
138 |   - { role: tools, task: labels, tags: [ 'label'] }
139 | 
140 | ## Post deploy (network, storage, taints, helm installation, helm charts deploy, any other addons)
141 | - hosts: primary-master
142 |   become: yes
143 |   become_method: sudo
144 |   tags:
145 |   - post_deploy
146 |   roles:
147 |   - { role: post_deploy, task: all, tags: [ 'post_deploy_no_helm' ] }
148 |   - { role: storage, task: create_all, tags: [ 'storage', 'rook', 'nfs', 'vsphere' ] }
149 |   - { role: helm, task: helm, tags: [ 'helm' ] }
150 |   - { role: helm, task: charts_deploy, tags: [ 'helm', 'charts_deploy' ] }
151 | 
152 | ### For fixes like vsphere's bug, we have to reboot after some more fixes...
153 | #https://github.com/vmware/kubernetes/issues/495
154 | - hosts: mustrebootlist
155 |   gather_facts: no
156 |   become: yes
157 |   become_method: sudo
158 |   tags:
159 |   - mustrebootlist
160 |   - vsphere_bug_fix
161 |   - vsphere
162 |   roles:
163 |   - { role: tools, task: reboot, tags: [ 'reboot_minimal' ], when: "ClusterConfiguration.cloudProvider is defined and ClusterConfiguration.cloudProvider == 'vsphere' and allow_restart | default(False) and vsphere_bug_fix is defined and vsphere_bug_fix" }
164 | 
165 | ## Generic Sanity
166 | - hosts: masters
167 |   become: yes
168 |   become_method: sudo
169 |   tags:
170 |   - master
171 |   pre_tasks:
172 |   - name: remove temporary mustreboot temporary group
173 |     group:
174 |       name: mustrebootlist
175 |       state: absent
176 |   roles:
177 |   - { role: tools, task: cluster_sanity, tags: [ 'cluster_sanity', 'sanity' ] }
178 |   - { role: tools, task: postinstall_messages, tags: [ 'cluster_sanity', 'sanity' ] }
179 | 
180 | ## to reset/add only some (more) nodes:
181 | ##   1. keep in hosts only:
182 | ##      - the master
183 | ##      - the affected node (all other nodes should not be there)
184 | ##   2. Have the token defined in the group_vars/all
185 | ##   3. Run using only this/these tag(s):
186 | ## ansible-playbook -i hosts -v site.yml --tags "node"   # same with: ansible-playbook -i hosts -v site.yml --tags "node_reset,node_install,cluster_sanity,cluster_info"
187 | 
188 | ## To get cluster info/sanity:
189 | ## ansible-playbook -i hosts -v site.yml --tags "cluster_sanity,cluster_info"
190 | 


--------------------------------------------------------------------------------
/Vagrantfile:
--------------------------------------------------------------------------------
  1 | # -*- mode: ruby -*-
  2 | # vi: set ft=ruby :
  3 | 
  4 | # All Vagrant configuration is done below. The "2" in Vagrant.configure
  5 | # configures the configuration version (we support older styles for
  6 | # backwards compatibility). Please don't change it unless you know what
  7 | # you're doing.
  8 | 
  9 | $instance_name_prefix = "k8s"
 10 | $num_instances = 1   # Number of nodes, excluding master which is always created.
 11 | #$custom_networking_dnsDomain = ".ap"  # put same value like custom.networking.dnsDomain in ansible's group_vars/all, BUT this time WITH THE DOT in front!
 12 |                        #E.g.  ".demo.k8s.ap",
 13 | # https://www.virtualbox.org/manual/ch08.html#vboxmanage-natnetwork
 14 | #def nat(config)
 15 | ### Cannot be used, as the rest of vagrant commands fail...
 16 | #    config.vm.provider "virtualbox" do |v|
 17 | #      v.customize ["modifyvm", :id, "--nic1", "bridged", "--bridgeadapter", "enp3s0", "--nictype1", "virtio", "--macaddress1", "auto" ] #, "--nat-network2", "mybridgeinterface", "--nictype1", "virtio"] # 82540EM
 18 | #      v.customize ["modifyvm", :id, "--nic2", "nat", "--nictype2", "virtio"]
 19 | #    end
 20 | #end
 21 | 
 22 | Vagrant.configure(2) do |config|
 23 |   # The most common configuration options are documented and commented below.
 24 |   # For a complete reference, please see the online documentation at
 25 |   # https://docs.vagrantup.com.
 26 | 
 27 |   #config.vm.box_check_update = "false"  # If there is no internet access to get new updates
 28 | 
 29 |   #config.vm.network "public_network", type: "dhcp", bridge: "enp3s0"
 30 |   #config.vm.network "public_network" #, :bridge => "enp3s0" #, mac: "auto" #, :adapter=>1 #, use_dhcp_assigned_default_route: true
 31 |   #config.ssh.port=22
 32 |   #config.vm.network "public_network", type: "dhcp", :bridge => "enp3s0"
 33 |   #config.vm.usable_port_range = (2000..2500)
 34 |   #config.vm.boot_timeout = 90
 35 |   #config.ssh.insert_key = false
 36 |   #config.ssh.username = "your_user"
 37 |   #config.ssh.password = "your_password"
 38 | 
 39 |   config.vm.provider "virtualbox" do |vb|
 40 |      vb.gui = false     # Set to true to view the window in graphical mode
 41 |      vb.memory = "6144" #"4096" #"3072" # 6144
 42 |      vb.cpus = 4
 43 |      #vb.customize ["storagectl", :id, "--name", "IDE Controller", "--remove"] # Make sure it does not use IDE
 44 |      #vb.customize ["storagectl", :id, "--name", "SATA Controller", "--add", "sata"]   # Make it use SATA: faster and less issues
 45 |      # optionally add: , "--hostiocache", "on", "--bootable", "on"] # like here: https://www.virtualbox.org/manual/ch08.html#vboxmanage-storagectl
 46 |   end
 47 | 
 48 |   #### CHOOSE DESIRED OS: 
 49 |   #config.vm.box = "centos/7"
 50 |   #config.vm.box = "centos/atomic-host" # NEVER TESTED
 51 |   config.vm.box = "ubuntu/xenial64"
 52 | 
 53 |   # NODES:
 54 |   (1..$num_instances).each do |i|
 55 |     config.vm.define vm_name = "%s-%02d%s" % [$instance_name_prefix, i, $custom_networking_dnsDomain] do |node|
 56 |      #node.vm.synced_folder ".vagrant", "/vagrant", type: "rsync" #, rsync__exclude: ".local_only" #rsync__include: ".vagrant/"
 57 |      #node.vm.box = "centos/7"
 58 |      #node.vm.box = "centos/atomic-host"
 59 |      node.vm.hostname = vm_name
 60 |      #node.ssh.host = vm_name
 61 |      #node.vm.provision "shell", inline: "echo hello from %s" % [node.vm.hostname]
 62 |      #node.vm.provision "shell" do |s|
 63 |       #s.path= "dockerize.sh"  # no longer required, handled by ansible
 64 |       #s.args= "node"
 65 |      #end
 66 |      node.vm.provision "shell", inline: <<-SHELL
 67 |       sudo cp -rf ~vagrant/.ssh ~root/ || true  # This will allow us to ssh into root with existing vagrant key
 68 |       sudo cp -rf ~ubuntu/.ssh ~root/ || true  # This will allow us to ssh into root with existing vagrant key
 69 |       #chmod 755 /vagrant/dockerize.sh
 70 |       #/vagrant/dockerize.sh
 71 |      SHELL
 72 |      #File.open("ssh_config", "w+") { |file| file.write("boo" ) }
 73 |     end
 74 |   end
 75 | 
 76 |   # MASTER:
 77 |   config.vm.define vm_name = "%s-master%s" % [$instance_name_prefix, $custom_networking_dnsDomain] , primary: true do |k8smaster|
 78 |     #k8smaster.vm.synced_folder ".vagrant", "/vagrant", type: "rsync" #, rsync__exclude: ".local_only" #rsync__include: ".vagrant/"
 79 |     #k8smaster.vm.hostname = "#{k8smaster}"
 80 |     #k8smaster.vm.hostname = "%s" % [ k8smaster ]
 81 |     k8smaster.vm.hostname = vm_name
 82 |     #k8smaster.ssh.host = vm_name
 83 |     #k8smaster.vm.network "forwarded_port", guest: 80, host: 2080, auto_correct: true
 84 |     #k8smaster.vm.network "forwarded_port", guest: 443, host: 2443, auto_correct: true
 85 | 
 86 |     #k8smaster.vm.provision :shell, inline: "echo hello from %s" % [k8smaster.vm.hostname]
 87 |     #k8smaster.vm.provision "shell" do |s|
 88 |      #s.path= "dockerize.sh"  # no longer required, handled by ansible
 89 |      #s.args= "master"
 90 |     #end
 91 | 
 92 |     k8smaster.vm.provision "shell", inline: <<-SHELL
 93 |      sudo cp -rf ~vagrant/.ssh ~root/ || true  # This will allow us to ssh into root with existing vagrant key
 94 |      sudo cp -rf ~ubuntu/.ssh ~root/ || true # This will allow us to ssh into root with existing vagrant key
 95 |      #chmod 755 /vagrant/dockerize.sh
 96 |      #/vagrant/dockerize.sh
 97 |      # curl -SL https://github.com/ReSearchITEng/kubeadm-playbook/archive/master.tar.gz | tar xvz # already in /vagrant
 98 |     SHELL
 99 | 
100 |   end
101 | 
102 |   # Disable automatic box update checking. If you disable this, then
103 |   # boxes will only be checked for updates when the user runs
104 |   # `vagrant box outdated`. This is not recommended.
105 |   # config.vm.box_check_update = false
106 | 
107 |   # Create a forwarded port mapping which allows access to a specific port
108 |   # within the machine from a port on the host machine. In the example below,
109 |   # accessing "localhost:8080" will access port 80 on the guest machine.
110 |   # config.vm.network "forwarded_port", guest: 80, host: 8080
111 | 
112 |   # Create a private network, which allows host-only access to the machine
113 |   # using a specific IP.
114 |   # config.vm.network "private_network", ip: "192.168.33.10"
115 | 
116 |   # Create a public network, which generally matched to bridged network.
117 |   # Bridged networks make the machine appear as another physical device on
118 |   # your network.
119 |   # config.vm.network "public_network"
120 | 
121 |   # Share an additional folder to the guest VM. The first argument is
122 |   # the path on the host to the actual folder. The second argument is
123 |   # the path on the guest to mount the folder. And the optional third
124 |   # argument is a set of non-required options.
125 |   # config.vm.synced_folder "../data", "/vagrant_data"
126 | 
127 |   # Provider-specific configuration so you can fine-tune various
128 |   # backing providers for Vagrant. These expose provider-specific options.
129 |   # Example for VirtualBox:
130 |   #
131 |   # config.vm.provider "virtualbox" do |vb|
132 |   #   # Display the VirtualBox GUI when booting the machine
133 |   #   vb.gui = true
134 |   #
135 |   #   # Customize the amount of memory on the VM:
136 |   #   vb.memory = "1024"
137 |   # end
138 |   #
139 |   # View the documentation for the provider you are using for more
140 |   # information on available options.
141 | 
142 |   # Define a Vagrant Push strategy for pushing to Atlas. Other push strategies
143 |   # such as FTP and Heroku are also available. See the documentation at
144 |   # https://docs.vagrantup.com/v2/push/atlas.html for more information.
145 |   # config.push.define "atlas" do |push|
146 |   #   push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME"
147 |   # end
148 | 
149 |   # Enable provisioning with a shell script. Additional provisioners such as
150 |   # Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the
151 |   # documentation for more information about their specific syntax and use.
152 |   # config.vm.provision "shell", inline: <<-SHELL
153 |   #   sudo apt-get update
154 |   #   sudo apt-get install -y apache2
155 |   # SHELL
156 | end
157 | 


--------------------------------------------------------------------------------
/group_vars/all/storage.yml:
--------------------------------------------------------------------------------
  1 | ##################  STORAGE  ################
  2 | #############################################
  3 | 
  4 | ## General Storage settings
  5 | ## When reseting a previous instalaltion, should it first remove the exsting pvcs&pvs (default false)?
  6 | storage:
  7 |   delete_pvs: false
  8 | 
  9 | ##### STORAGE OPTION: VMWARE VSPHERE Storage #
 10 | ##############################################
 11 | ##### Note: This requires the cloud provider settings below:
 12 | # ClusterConfiguration.cloudProvider: 'vsphere'
 13 | 
 14 | vsphere_storageclass_urls:
 15 |   - https://github.com/kubernetes/kubernetes/raw/master/cluster/addons/storage-class/vsphere/default.yaml
 16 |   #- https://raw.githubusercontent.com/kubernetes/kubernetes/master/examples/volumes/vsphere/vsphere-volume-sc-fast.yaml
 17 | 
 18 | #vsphere_bug_fix github.com/vmware/kubernetes/issues/495 # For k8s 11.x 
 19 | vsphere_bug_fix: False
 20 | 
 21 | #####
 22 | cloud_config_vsphere_specific:
 23 |   server: "vcenter.corp.example.com"
 24 |   secret_name: "vsphere-credentials" #Any name would do
 25 |   secret_namespace: "kube-system"    #kube-system is the usual namespace for such details
 26 |   username: "user@corp.example.com" # move these to your vault
 27 |   password: "PASSWORD"           # move these to your vault
 28 | 
 29 | cloud_config: |
 30 |     [Global]
 31 |     ## Vsphere:
 32 |     ## One must ensure:
 33 |     ##  - all vms have this enabled: ./govc vm.change -e="disk.enableUUID=1" -vm=<machine1-x>
 34 |     ##  - all vms are in the same VCenter
 35 |     ##  - the user below has the following roles at vcenter level:
 36 |     ## Datastore > Allocate space
 37 |     ## Datastore > Low level file Operations
 38 |     ## Virtual Machine > Configuration > Add existing disk
 39 |     ## Virtual Machine > Configuration > Add or remove device
 40 |     ## Virtual Machine > Configuration > Remove disk
 41 |     ## Virtual machine > Configuration > Add new disk
 42 |     ## Virtual Machine > Inventory > Create new
 43 |     ## Network > Assign network
 44 |     ## Resource > Assign virtual machine to resource pool
 45 |     ## Profile-driven storage -> Profile-driven storage view
 46 |       insecure-flag = 1
 47 |       secret-name = "{{ cloud_config_vsphere_specific.secret_name }}"
 48 |       secret-namespace = "{{ cloud_config_vsphere_specific.secret_namespace }}"
 49 |       
 50 |       [VirtualCenter "{{ cloud_config_vsphere_specific.server }}"]
 51 |       port = 443
 52 |       datacenters = DC01
 53 | 
 54 |       [Workspace]
 55 |       server = "{{ cloud_config_vsphere_specific.server }}"
 56 |       datacenter = DC01
 57 |       default-datastore = DS01
 58 |       folder = kubernetes # for VRA usually folder name is: VRM
 59 |       ## Working dir is necessary when your machines are under a directory (and all have to be under the same one)
 60 |       ##./govc vm.info -vm.dns=machine01 | grep Path #and remove the machine name (last string)
 61 | 
 62 |       ## Setup of per machine vm-uuid is usually not required, and it's determined automatically.
 63 |       #cat /sys/class/dmi/id/product_serial   and format like: "4237558d-2231-78b9-e07e-e9028e7cf4a5"
 64 |       #or: ./govc vm.info -vm.dns=machine01 | grep UUID #(well formated also)
 65 |       #machine01: vm-uuid="4215e1de-26df-21ec-c79e-2105fe3f9ad1"
 66 |       #machine02: vm-uuid="4215f1e4-6abd-cff1-1a4c-71ec169d7b11"
 67 |     [Disk]
 68 |       #scsicontrollertype = lsilogic-sas
 69 |       scsicontrollertype = pvscsi
 70 | 
 71 | #####
 72 | 
 73 | ##### STORAGE OPTION: Self Created NFS ###
 74 | ##########################################
 75 | ## Creates a nfs server on the master and exports the below path from the master to all cluster
 76 | nfs_k8s: #https://github.com/kubernetes/kubernetes/blob/master/examples/volumes/nfs/provisioner/nfs-server-gce-pv.yaml
 77 |          #https://github.com/kubernetes-incubator/nfs-provisioner
 78 |   #enabled: "true"
 79 |   enabled: False
 80 |   provisioner: nfs.k8s
 81 |   # Path on the master node:
 82 |   host_path: /storage/nfs
 83 |   is_default_class: 'true' # case sensitive! Also: only one class can be default. Note that vpshere thin is also trying to be set as default, choose which one you want as default
 84 |   wipe: true # When set to true, every reset the files under host_path will be wiped !!!
 85 | 
 86 | ##### STORAGE OPTION: Rook (ceph) ########
 87 | ##########################################
 88 | ## Rook - Ceph Distributed Software Storage
 89 | ## As per spec section of: https://github.com/rook/rook/blob/master/demo/kubernetes/rook-cluster.yaml
 90 | 
 91 | ## NOTE: rook/ceph is moved to the chart version instead! BELOW is not up to date!
 92 | rook:
 93 |   enabled: false
 94 |   os_packages:
 95 |   - jq
 96 |   reset:
 97 |     storage_delete: true
 98 |   ## OLD Installation type, using url. Now we use the helm chart which wraps it.
 99 |   #operator_url:
100 |   #  https://github.com/rook/rook/raw/master/demo/kubernetes/rook-operator.yaml
101 |   client_tools_url:
102 |   - https://github.com/rook/rook/raw/master/demo/kubernetes/rook-client.yaml
103 |   - https://github.com/rook/rook/raw/master/demo/kubernetes/rook-tools.yaml
104 |   sharedfs:
105 |     enabled: false
106 |     fs:
107 |     - { name: "sharedfs", replication: 2 } #ceph osd pool set sharedfs-data size 2 && ceph osd pool set sharedfs-metadata size 2
108 |   allowed_consumer_namespaces:  #E.g.: kubectl get secret rook-admin -n rook -o json | jq '.metadata.namespace = "kube-system"' | kubectl apply -f - # as per: https://github.com/rook/rook/blob/master/Documentation/k8s-filesystem.md
109 |   - "kube-system"
110 |   - "default"
111 |   cluster_spec: # as per: https://github.com/rook/rook/blob/master/demo/kubernetes/rook-cluster.yaml and https://github.com/rook/rook/blob/master/Documentation/cluster-tpr.md
112 |     versionTag: master-latest
113 |     dataDirHostPath: /storage/rook
114 |     storage:                # cluster level storage configuration and selection
115 |       useAllNodes: true
116 |       useAllDevices: false
117 |       deviceFilter:
118 |       metadataDevice:
119 |       location:
120 |       storeConfig:
121 |         storeType: filestore
122 |         databaseSizeMB: 1024 # this value can be removed for environments with normal sized disks (100 GB or larger)
123 |         journalSizeMB: 1024  # this value can be removed for environments with normal sized disks (20 GB or larger)
124 |   ## Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named
125 |   ## nodes below will be used as storage resources.  Each node's 'name' field should match their 'kubernetes.io/hostname' label.
126 |   #    nodes:
127 |   #    - name: "172.17.4.101"
128 |   #     directories:         # specific directores to use for storage can be specified for each node
129 |   #     - path: "/rook/storage-dir"
130 |   #   - name: "172.17.4.201"
131 |   #     devices:             # specific devices to use for storage can be specified for each node
132 |   #     - name: "sdb"
133 |   #     - name: "sdc"
134 |   #     storeConfig:         # configuration can be specified at the node level which overrides the cluster level config
135 |   #       storeType: bluestore
136 |   #   - name: "172.17.4.301"
137 |   #     deviceFilter: "^sd."
138 | 
139 | ## ADVANCED rook options:
140 |   rbd:
141 |     enabled: true
142 |     pool_spec: # as per: https://github.com/rook/rook/blob/master/demo/kubernetes/rook-storageclass.yaml and https://github.com/rook/rook/blob/master/Documentation/pool-tpr.md
143 |       replication:
144 |         size: 1
145 |       ## For an erasure-coded pool, comment out the replication size above and uncomment the following settings.
146 |       ## Make sure you have enough OSDs to support the replica size or erasure code chunks.
147 |       #erasureCode:
148 |       #  codingChunks: 2
149 |       #  dataChunks: 2
150 | 
151 |     storageclass_parameters: # as per: https://github.com/rook/rook/blob/master/demo/kubernetes/rook-storageclass.yaml
152 |       pool: replicapool
153 |       ## Specify the Rook cluster from which to create volumes. If not specified, it will use `rook` as the namespace and name of the cluster.
154 |       # clusterName: rook
155 |       # clusterNamespace: rook
156 | 
157 |   ##ceph_conf: as per https://github.com/rook/rook/blob/master/Documentation/advanced-configuration.md
158 |   #ceph_conf: |
159 |   #  [global]
160 |   #  osd crush update on start = false
161 |   #  osd pool default size = 2
162 | 
163 |   monitoring: # as per: https://github.com/rook/rook/blob/master/Documentation/k8s-monitoring.md
164 |     enabled: true
165 | 
166 | #####
167 | 
168 | 


--------------------------------------------------------------------------------
/group_vars/all/network.yml:
--------------------------------------------------------------------------------
  1 | ## HA
  2 | CLUSTER_NAME: demok8s  # used only for defining the clusterConfiguration and joinConfiguration k8s config, as well as the below dnsDomain and masterha_fqdn
  3 | 
  4 | ## ensure you have the DNS set for wildcard, and pointing all the trafic to master or similar setup
  5 | custom:
  6 |   networking:
  7 |     dnsDomain: "{{ CLUSTER_NAME }}.{{ CORP_DNS_DOMAIN | default ('corp.example.com') }}"  # For MasterHA, if you have dns, put the desired cluster domain here. If no DNS change possible on your side, and you want MasterHA, fix the below 2 values accordinly
  8 | 
  9 |     ######
 10 |     ## masterha_* params are requried when you have MasterHA (meaning when your inventory has "secondary-masters" section not-empty)
 11 |     ## Your setup can either use a LoadBalancer (usually a hw one), 
 12 |     ## or use a VIP address which keepalived will manage (move the address from one master to another as needed)
 13 | 
 14 |     ## Decide on one of 2 the masterha_types possible:
 15 |     # "vip" #Choose VIP and you'll have keepalived instaleld and cofigured for the masterha_ip below (default)
 16 |     # "lb" #Choose lb when you have a MasterHA LB which load-balances across all your masters, on api port (default 6443) 
 17 |     #      # Make sure your LB is setup to forward requests to a specific master ONLY when api port /healthz on that host returns status 200 
 18 |     #masterha_type: "vip" # or "lb"
 19 |     masterha_ip: "192.0.0.171" #| default('') }}"  # Important when you have MasterHA;  # IP of either your LB or the VIP to be used.
 20 |     ## masterha_fqdn is usually the dns name of masterha_ip above. (We cannot get it automatically in ansible...)
 21 |     ## This value is important in order to set apiServerCertSANs in the certs correctly
 22 |     masterha_fqdn: "master-{{ CLUSTER_NAME }}.{{ CORP_DNS_DOMAIN | default ('corp.example.com') }}"  # Important when you have MasterHA, in order to set apiServerCertSANs correctly
 23 |     #masterha_fqdn: "{{ lookup('dig', masterha_ip, 'qtype=PTR') }}" # but requires some pip modules on host...
 24 | 
 25 |     #masterha_bindPort: 6443 #default is 6443; We recommend to keep it 6443.
 26 |     ### end of masterha topic
 27 | 
 28 |     ## When masterha_type is set to "vip", keepalived is deployed automatically. Options to deploy it via linux package (rpm/deb) or using a docker image.
 29 |     ## if you move from one type to another, please make sure you manually remove the previous setup 
 30 |     ## E.g. moving from package to docker, manually do: systemctl stop keepliaved; systemctl disable keepalived
 31 |     ## E.g. moving from docker to package, manually do: docker rm -f keepalived
 32 |     ## Can be either 'docker' or 'package' or 'provided' (when already installed outside of this playbook; this playbook will generate the configuration and check script)
 33 |     masterha_vip_keepalived_deploy_type: docker
 34 |     masterha_vip_keepalived_docker_image: osixia/keepalived:2.0.17 # 2.0.17+; older version do not have curl
 35 | 
 36 |     ## The right way is to always define machines with FQDN in the inventory file (hosts file)
 37 |     ## using http proxy for getting to internet (outside of the env) works fine, only by setting nodes with fqdn in inventory, without the below settings
 38 |     ## Use the below fqdn functionality only if really required (stong understanding of risks)!
 39 |     ## Also do not mix (some fqdn some short name).
 40 |     fqdn: # decide where to force use fqdn for non masterha and nodes. When set to false, it will use the name as defined in the inventory file
 41 |       always: false # makes all the below true
 42 |       master: true  # when true, actions like wait/join will be done against dns name instead of IP
 43 |       node: false   # when true, the join command will have --node-name set to fqdn. When false, k8s will set based on how node machine answers to the hostname command
 44 | 
 45 | ############## THE BELOW SECTION IS NO LONGER RELEVANT, as NETWORK comes via HELM CHARTS (e.g. tigera-operator from calico)
 46 | #Define network for K8S services 
 47 | SERVICE_NETWORK_CIDR: 10.96.0.0/12
 48 | 
 49 | ## Select pod Network. One may add mode simply by adding the deployment url and pod netwrod cidr it needs
 50 | ## This section is obsolete. By default calico is installed using its helm charts (see addons.yaml)
 51 | #podNetwork: 'calico'
 52 | #'flannel'
 53 | #'weavenet'
 54 | #'calico'
 55 | 
 56 | # flannel:
 57 | #   - podSubnet: 10.244.0.0/16
 58 | #   - urls:
 59 | #     - https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
 60 | # 
 61 | # calico:
 62 | #   - podSubnet: 192.168.0.0/16
 63 | #   - urls:
 64 | #     - https://docs.projectcalico.org/v3.2/getting-started/kubernetes/installation/hosted/etcd.yaml
 65 | #     - https://docs.projectcalico.org/v3.2/getting-started/kubernetes/installation/rbac.yaml
 66 | #     - https://docs.projectcalico.org/v3.2/getting-started/kubernetes/installation/hosted/calico.yaml
 67 | # 
 68 | # weavenet:
 69 | #   - podSubnet: 10.32.0.0/12
 70 | #   - urls:
 71 | #     #- "https://cloud.weave.works/k8s/net?k8s-version={{ClusterConfiguration.kubernetesVersion}}&env.IPALLOC_RANGE={{POD_NETWORK_CIDR | default ('10.32.0.0/12') }}"
 72 | #     - "https://cloud.weave.works/k8s/net?k8s-version={{ClusterConfiguration.kubernetesVersion}}&env.IPALLOC_RANGE='10.32.0.0/12'"
 73 | 
 74 | POD_NETWORK_CIDR: 10.244.0.0/16 # Exactly this one is required when Flannel network is used. It can be used also for calico which autodetects the range.
 75 | #POD_NETWORK_CIDR: '192.168.0.0/16' # Calico is able to autodetect, this should never be required.
 76 | #POD_NETWORK_CIDR: '10.32.0.0/12' # Exactly this one is required when Weave network is used (with defaults). If you other network solutions, this entry can be commented out.
 77 | 
 78 | #####
 79 | ## NETWORK
 80 | ## usually, it's not possible to have more than one network solution (but projects like "Multus" exist)
 81 | ## options: https://kubernetes.io/docs/admin/addons/
 82 | ## Usually choices are: flannel, weavenet, calico
 83 | 
 84 | ## We have moved the networking deploy part of the helm charts (addons.yaml)
 85 | ## Should you want to use an overlay network that does not have helm chart, uncomment k8s_network_addons_urls along with of its options:
 86 | 
 87 | #k8s_network_addons_urls:
 88 | 
 89 | ## CALICO # For Calico one has to also ensure above setting ClusterConfiguration.networking.podSubnet is set to 192.168.0.0/16 )
 90 | ## new 2020: as per: https://docs.projectcalico.org/getting-started/kubernetes/self-managed-onprem/onpremises#install-calico-with-kubernetes-api-datastore-50-nodes-or-less
 91 | ## "If you are using a different pod CIDR with kubeadm, no changes are required - Calico will automatically detect the CIDR based on the running configuration."
 92 |   #- https://docs.projectcalico.org/manifests/calico.yaml
 93 | #  - https://docs.projectcalico.org/v3.2/getting-started/kubernetes/installation/hosted/etcd.yaml
 94 | #  - https://docs.projectcalico.org/v3.2/getting-started/kubernetes/installation/rbac.yaml
 95 | #  - https://docs.projectcalico.org/v3.2/getting-started/kubernetes/installation/hosted/calico.yaml
 96 | ## Other Calico version (newer, reusing etcd of k8s, but with other limitations, use with care): 
 97 | #   - https://docs.projectcalico.org/v3.2/getting-started/kubernetes/installation/hosted/kubernetes-datastore/calico-networking/1.7/calico.yaml
 98 | ## OLDER_CALICO:
 99 | #   - https://docs.projectcalico.org/v2.6/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml # versions are 2.4,2.5,2.6
100 | 
101 | 
102 | ## OR 
103 | 
104 | ## Flanned: (for Flanned one has to also ensure above setting ClusterConfiguration.networking.podSubnet is set to 10.244.0.0/16 )
105 |   ##- https://raw.githubusercontent.com/coreos/flannel/master/Documentation/k8s-manifests/kube-flannel-rbac.yml
106 |   #- https://raw.githubusercontent.com/coreos/flannel/v0.9.1/Documentation/kube-flannel.yml # For latest, replace v0.9.0 with master
107 |   #- https://raw.githubusercontent.com/coreos/flannel/v0.10.0/Documentation/kube-flannel.yml
108 |   # flannel for 1.12 (fixes toleartions, and fix is not in v0.10.0)
109 |   #- https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
110 | 
111 | # OR 
112 | 
113 | ## Weave: #https://www.weave.works/docs/net/latest/kubernetes/kube-addon/ 
114 |  #- https://cloud.weave.works/k8s/net?k8s-version={{ClusterConfiguration.kubernetesVersion}}&{{POD_NETWORK_CIDR | default ('env.IPALLOC_RANGE=10.32.0.0/12') }}
115 |  #- "https://cloud.weave.works/k8s/net?k8s-version={{ClusterConfiguration.kubernetesVersion}}&env.IPALLOC_RANGE={{POD_NETWORK_CIDR | default ('10.32.0.0/12') }}"
116 | 
117 | # OR
118 | ## kube-router
119 |  #- https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/daemonset/kube-router-all-service-daemonset.yaml
120 | #####
121 | 
122 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/rook.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | # - hosts: master
  3 | #   become: yes
  4 | #   become_method: sudo
  5 | #   tags:
  6 | #   - rook
  7 | #   tasks:
  8 | 
  9 | ## rook common:
 10 | - block:
 11 |   - set_fact:
 12 |       env_kc: '{{ proxy_env |default({}) | combine ({"PATH" : "/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin"  }) | combine ({"KUBECONFIG" :"/etc/kubernetes/admin.conf"}) }}'
 13 |     tags:
 14 |     - always
 15 | 
 16 |   - name: k8s cluster pre-installation sanity - check if all current k8s pods are in Running status
 17 |     environment:
 18 |       KUBECONFIG: /etc/kubernetes/admin.conf
 19 |     shell: "kubectl get --all-namespaces pods --no-headers | grep -v -w 'Running' || true "
 20 |     register: command_result
 21 |     tags:
 22 |     - sanity
 23 |     until: command_result.stdout == ""
 24 |     retries: 30
 25 |     delay: 3
 26 |     changed_when: false
 27 | 
 28 |   - name: ensure jq third party exists on the machine (can come from from epel, pip, etc)
 29 |     shell: jq --version
 30 |     changed_when: false
 31 | 
 32 |   - name: install rook operator (using rook.operator_url)
 33 |     environment: '{{env_kc}}'
 34 |     command: /usr/bin/kubectl apply -f {{ item }}
 35 |     when: rook.operator_url is defined
 36 |     with_items:
 37 |     - "{{ rook.operator_url  | default ('') }}"
 38 | 
 39 |   - name: git clone rook (till charts.rook.io is created)
 40 |     environment: '{{env_kc}}'
 41 |     git:
 42 |       repo: 'https://github.com/rook/rook.git'
 43 |       dest: /tmp/rook
 44 |       force: yes
 45 |       depth: 1
 46 |       version: master
 47 |       recursive: no
 48 |     when: rook.operator_url is defined
 49 | 
 50 |   - name: deploy rook operator (using helm chart) - prefered method
 51 |     environment: '{{env_kc}}'
 52 |     #command: 'helm install {{ item.repo }} --namespace {{ item.namespace | default("default") }} --name {{ item.name }} {{ item.options | default ("") }}'
 53 |     #command: 'helm install rook/rook-operator --namespace rook --name rook-operator --set image.pullPolicy=Always '
 54 |     command: 'helm install /tmp/rook/demo/helm/rook-operator/ --namespace rook --name rook-operator --set image.pullPolicy=Always '
 55 |     when: rook.operator_url is not defined
 56 | 
 57 |   - name: rook operator pod sanity
 58 |     environment:
 59 |       KUBECONFIG: /etc/kubernetes/admin.conf
 60 |     shell: "kubectl get --all-namespaces pods --no-headers | grep -v -w 'Running' || true "
 61 |     register: command_result
 62 |     tags:
 63 |     - sanity
 64 |     until: command_result.stdout == ""
 65 |     retries: 30
 66 |     delay: 3
 67 |     changed_when: false
 68 | 
 69 |   - name: wait for rook.io/, Kind=Cluster to be created
 70 |     environment:
 71 |       KUBECONFIG: /etc/kubernetes/admin.conf
 72 |     shell: kubectl get thirdpartyresources --no-headers | grep rook.io | grep -i cluster
 73 |     register: command_result
 74 |     tags:
 75 |     - sanity
 76 |     until: command_result.stdout != ""
 77 |     retries: 30
 78 |     delay: 3
 79 |     changed_when: false
 80 | 
 81 |   # This is still required...
 82 |   - name: Wait few more seconds for rook.io/, Kind=Cluster to be created
 83 |     pause: seconds=0
 84 |     changed_when: false
 85 | 
 86 |   - name: prepare rook-cluster.yml file
 87 |     template:
 88 |       src: rook-cluster.j2
 89 |       dest: /tmp/rook-cluster.yml
 90 |       force: yes
 91 |     tags:
 92 |     - rook-cluster
 93 | 
 94 |   - name: install rook cluster and create rook namespace
 95 |     environment: '{{env_kc}}'
 96 |     command: /usr/bin/kubectl apply -f {{ item }}
 97 |     with_items:
 98 |     - /tmp/rook-cluster.yml
 99 |     tags:
100 |     - rook-cluster
101 | 
102 |   - name: rook cluster deploy sanity - wait for all installed pods to become Running
103 |     environment:
104 |       KUBECONFIG: /etc/kubernetes/admin.conf
105 |     shell: "kubectl get --all-namespaces pods --no-headers | grep -v -w 'Running' || true "
106 |     register: command_result
107 |     tags:
108 |     - sanity
109 |     - rook
110 |     until: command_result.stdout == ""
111 |     retries: 30
112 |     delay: 3
113 |     changed_when: false
114 | 
115 |   - name: prepare rook-ceph_conf-ConfigMap.yml file
116 |     template:
117 |       src: rook_ceph_conf.j2
118 |       dest: /tmp/rook_ceph_conf.yml
119 |       force: yes
120 |     when: rook.ceph_conf is defined 
121 | 
122 |   - name: install rook ceph_conf config map - namespace rook should be available
123 |     environment: '{{env_kc}}'
124 |     command: /usr/bin/kubectl apply -f {{ item }}
125 |     when: rook.ceph_conf is defined
126 |     with_items:
127 |     - /tmp/rook_ceph_conf.yml
128 | 
129 |   - name: rook-tools allowed_consumer_namespaces secret injection
130 |     environment:
131 |       KUBECONFIG: /etc/kubernetes/admin.conf
132 |     shell: kubectl get secret rook-admin -n rook -o json | jq '.metadata.namespace = "{{ item }}"' | kubectl apply -f -
133 |     when: rook.allowed_consumer_namespaces is defined
134 |     with_items: "{{ rook.allowed_consumer_namespaces }}"
135 | 
136 |   - name: install rook client and tools - aka client_tools_url
137 |     environment: '{{env_kc}}'
138 |     command: /usr/bin/kubectl apply -f {{ item }}
139 |     with_items: "{{ rook.client_tools_url | default ('') }}"
140 | 
141 |   - name: rook client tools deploy sanity - wait for all installed pods to become Running
142 |     environment:
143 |       KUBECONFIG: /etc/kubernetes/admin.conf
144 |     shell: "kubectl get --all-namespaces pods --no-headers | grep -v -w 'Running' || true "
145 |     register: command_result
146 |     when: rook.client_tools_url is defined
147 |     tags:
148 |     - sanity
149 |     until: command_result.stdout == ""
150 |     retries: 30
151 |     delay: 3
152 | 
153 |   - name: rook cluster deploy full sanity via rook client command rook node ls
154 |     environment:
155 |       KUBECONFIG: /etc/kubernetes/admin.conf
156 |     #kubectl exec -it rook-tools -- bash -c "rook node ls >> /tmp/status.txt"
157 |     shell: "kubectl -n rook exec rook-tools -it /usr/bin/rook node ls | tail -n +2  | grep -v ' OK ' || true "
158 |     register: command_result
159 |     tags:
160 |     - sanity
161 |     until: command_result.stdout == ""
162 |     retries: 30
163 |     delay: 3
164 |     changed_when: false
165 |   when: rook is defined and rook.enabled
166 |   tags:
167 |   - rook
168 | 
169 | ## rook sharedfs:
170 | - block:
171 |   - name: rook-client create rook sharedfs
172 |     environment:
173 |       KUBECONFIG: /etc/kubernetes/admin.conf
174 |     shell: kubectl -n rook exec rook-tools -- rook filesystem create --name {{ item.name | default("sharedfs") }}
175 |     with_items: "{{ rook.sharedfs.fs | default ('') }}"
176 | 
177 |   - name: rook-tools set replication(redundancy) for sharedfs data
178 |     environment:
179 |       KUBECONFIG: /etc/kubernetes/admin.conf
180 |     shell: kubectl -n rook exec rook-tools -- ceph osd pool set {{ item.name | default("sharedfs") }}-data size {{ item.replication | default (2) }}
181 |     with_items: '{{ rook.sharedfs.fs | default("sharedfs") }}'
182 | 
183 |   - name: rook-tools set replication(redundancy) for sharedfs metadata
184 |     environment:
185 |       KUBECONFIG: /etc/kubernetes/admin.conf
186 |     shell: kubectl -n rook exec rook-tools -- ceph osd pool set {{ item.name | default("sharedfs") }}-metadata size {{ item.replication | default (2) }}
187 |     with_items: "{{ rook.sharedfs.fs | default('sharedfs') }}"
188 | 
189 |   - name: wait for rook.io/, Kind=Pool to be created
190 |     environment:
191 |       KUBECONFIG: /etc/kubernetes/admin.conf
192 |     shell: kubectl get thirdpartyresources --no-headers | grep rook.io | grep -i pool
193 |     register: command_result
194 |     tags:
195 |     - sanity
196 |     until: command_result.stdout != ""
197 |     retries: 30
198 |     delay: 3
199 |     changed_when: false
200 |   when: rook is defined and rook.enabled and rook.sharedfs is defined and rook.sharedfs.enabled
201 |   tags:
202 |   - rook
203 |   - rook_sharedfs
204 | 
205 | ## rook rbd:
206 | - block:
207 |   - name: prepare rook-pool.yml file
208 |     template:
209 |       src: rook-pool.j2
210 |       dest: /tmp/rook-pool.yml
211 |       force: yes
212 |     tags:
213 |     - rbd
214 |     when:  rook.rbd is defined and rook.rbd.enabled
215 | 
216 |   - name: install rook rook-pool.yml file
217 |     environment: '{{env_kc}}'
218 |     command: /usr/bin/kubectl apply -f {{ item }}
219 |     with_items:
220 |     - /tmp/rook-pool.yml
221 |     tags:
222 |     - rbd
223 |     when:  rook.rbd is defined and rook.rbd.enabled
224 | 
225 |   - name: prepare rook-storageclass.yml file
226 |     template:
227 |       src: rook-storageclass.j2
228 |       dest: /tmp/rook-storageclass.yml
229 |       force: yes
230 |     tags:
231 |     - rbd
232 |     when:  rook.rbd is defined and rook.rbd.enabled
233 | 
234 |   - name: install rook rook-storageclass.yml file
235 |     environment: '{{env_kc}}'
236 |     command: /usr/bin/kubectl apply -f {{ item }}
237 |     when:  rook.rbd is defined and rook.rbd.enabled
238 |     with_items:
239 |     - /tmp/rook-storageclass.yml
240 |     tags:
241 |     - rbd
242 | 
243 |   # Block ends. Its condition was:
244 |   when: rook is defined and rook.enabled and rook.rbd is defined and rook.rbd.enabled
245 |   tags:
246 |   - rook
247 |   - rook_rbd
248 | 
249 | 
250 | 
251 | 
252 | 


--------------------------------------------------------------------------------
/site.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | ## Preparations
  3 | ## Making sure python exists on all nodes, so Ansible will be able to run; make sure min vars are defined
  4 | - hosts: all
  5 |   gather_facts: true
  6 |   become: true
  7 |   become_method: sudo
  8 |   pre_tasks:
  9 |   ## It would be best to have ansible already installed on all machines.
 10 |   ## But if it is not, we'll try to do it:
 11 |   - name: when no python2, install python2 for Ansible<2.8 (usually required on ubuntu, which defaults to python3) # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3
 12 |     ansible.builtin.raw: test -e /usr/bin/python || (apt -y update && apt install -y python-minimal) || (yum install -y python2 python-simplejson)
 13 |     register: output
 14 |     changed_when: output.stdout != ""
 15 |     tags: always
 16 |     when:
 17 |     - ansible_version.full is version_compare('2.8', '<')
 18 |     - ( ansible_python_interpreter is not defined or ansible_python_interpreter == "/usr/bin/python" )
 19 |     # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required)
 20 |     ignore_errors: true
 21 |     ## reason for ignore_errors: true
 22 |     ## "version_compare" was replaced with "version" starting ansible 2.5;
 23 |     ## CentOS/RHEL 7.x use ansible 2.4, so not able to grasp what version_compare is.
 24 |     ## Ansible 2.9 removes the version_compare and does not recognize it any longer.
 25 |     ## As our need is to add python2 only on versions before 2.8, if this fails
 26 |     ## (due to missing version_compare command), we are fine.
 27 |     ## We do not cover cases where it fails due to other reasons, but that is a reasonable risk,
 28 |     ## and that issue will be captured later in the flow.
 29 | 
 30 |   - name: when no python(2/3), install python3(Debian) python2(RedHat) for Ansible>=2.8 # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3
 31 |     ansible.builtin.raw: test -e /usr/bin/python3 || (apt -y update && apt install -y python3-minimal) || (yum install -y python3 python-simplejson)
 32 |     register: output
 33 |     changed_when: output.stdout != ""
 34 |     tags: always
 35 |     when:
 36 |     - ansible_version.full is version('2.8', '>=') or ( ansible_python_interpreter is defined and ansible_python_interpreter == "/usr/bin/python3" )
 37 |     # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required)
 38 |     ignore_errors: true
 39 |     ## reason for ignore_errors: true
 40 |     ## is similar to the one explained above (complements it)
 41 | 
 42 |   - name: Make sure proxy_env map is defined, even if not required; must be a map, e.g. empty map
 43 |     ansible.builtin.set_fact:
 44 |       proxy_env: '{{ proxy_env |default({}) }}'
 45 |       cacheable: yes
 46 |     tags: always
 47 |     when: proxy_env is not defined
 48 | 
 49 |   - ansible.builtin.setup: # aka gather_facts
 50 |     tags: always # required for tags, see ansible issue: #14228
 51 | 
 52 |   - name: test min. vars (group_vars/all) are set, like ClusterConfiguration (and k8s_network_addons_urls if netw is not via helm chart)
 53 |     ansible.builtin.debug: msg='Make sure min. vars (group_vars/all) are set, like ClusterConfiguration (and k8s_network_addons_urls if netw is not via helm chart)'
 54 |     when:
 55 |     - ClusterConfiguration is not defined
 56 |     - JoinConfiguration is not defined
 57 |     failed_when:
 58 |     - ClusterConfiguration is not defined
 59 |     - JoinConfiguration is not defined
 60 |     tags: always # always check if we have vars in place
 61 | 
 62 | ## proper reset of any previous cluster (if any)
 63 | - hosts: primary-master
 64 |   become: true
 65 |   become_method: sudo
 66 |   tags:
 67 |   - reset
 68 |   - master
 69 |   roles:
 70 |   #- { role: helm, task: helm_reset, tags: [ 'reset', 'helm_reset' ] } # in helm3 is no longer required
 71 |   - { role: storage, task: remove_pvs, tags: [ 'reset', 'storage_reset', 'pvs_reset' ] }
 72 |   - { role: storage, task: nfs_reset, tags: [ 'reset', 'storage_reset', 'nfs_reset' ] }
 73 |   - { role: storage, task: rook_reset, tags: [ 'reset', 'storage_reset', 'rook_reset' ] }
 74 |   - { role: tools, task: reset_drain, tags: [ 'reset', 'node_reset', 'drain', 'node_drain' ] } #done on master, affecting nodes
 75 | 
 76 | ## nodes -> reset and install common part (for all nodes)
 77 | - hosts: nodes
 78 |   become: true
 79 |   become_method: sudo
 80 |   tags:
 81 |   - node
 82 |   roles:
 83 |   - { role: tools, task: reset, tags: [ 'reset', 'node_reset' ], when: "inventory_hostname not in groups['masters']" }
 84 |   - { role: tools, task: weave_reset, tags: [ 'reset', 'node_reset', 'network_reset', 'weave_reset', 'weave' ], when: "inventory_hostname not in groups['masters']" }
 85 |   - { role: common, task: all, tags: [ 'common', 'install', 'common_install', 'node_install', 'node' ], when: "inventory_hostname not in groups['masters']" }
 86 | 
 87 | ## master -> reset and install common part (for all masters - and sometimes etcd when colocated with masters)
 88 | - hosts: masters
 89 |   become: true
 90 |   become_method: sudo
 91 |   tags:
 92 |   - master
 93 |   roles:
 94 |   - { role: tools, task: reset, tags: [ 'reset', 'master_reset' ] }
 95 |   - { role: tools, task: weave_reset, tags: [ 'reset', 'master_reset', 'network_reset', 'weave', 'weave_reset' ] }
 96 |   - { role: common, task: all, tags: [ 'common', 'install', 'common_install', 'master_install'] }
 97 | 
 98 | ## master -> install keepalived on masters (relevat if HA)
 99 | - hosts: masters
100 |   become: true
101 |   become_method: sudo
102 |   any_errors_fatal: true
103 |   tags:
104 |   - master
105 |   - install
106 |   - ha
107 |   - master_install
108 |   roles:
109 |   - role: keepalived
110 |     tags: [ 'master', 'install', 'master_install', 'ha', 'keepalived']
111 |     when:
112 |     - ( groups['masters'] | length ) > 1
113 |     - ( custom.networking.masterha_type | default('vip') ) == 'vip'
114 | 
115 | - hosts: primary-master
116 |   name: primary-master (or master in general) - it applies to both ha and non-ha
117 |   become: true
118 |   become_method: sudo
119 |   any_errors_fatal: true
120 |   tags:
121 |   - master
122 |   - install
123 |   - master_install
124 |   - ha
125 |   roles:
126 |   - { role: primary-master, task: primary, tags: [ 'primary-master', 'master', 'install', 'master_install'] } 
127 | 
128 | ## secondary-masters -> install secondary masters
129 | - hosts: secondary-masters
130 |   become: true
131 |   become_method: sudo
132 |   any_errors_fatal: true
133 |   tags:
134 |   - master
135 |   - install
136 |   - ha
137 |   - master_install
138 |   roles:
139 |   - { role: non-primary-master, tags: [ 'secondary-masters', 'master', 'install', 'master_install', 'secondary_masters'] } 
140 | 
141 | ## node -> install nodes (kubeadm join, etc)
142 | - hosts: nodes
143 |   become: true
144 |   become_method: sudo
145 |   any_errors_fatal: true
146 |   tags:
147 |   - node
148 |   - install
149 |   - node_install
150 |   roles:
151 |   - { role: non-primary-master, tags: [ 'node', 'install', 'node_install'], when: "inventory_hostname not in groups['masters']" }
152 | 
153 | ## node -> label nodes (even when master is also a node)
154 | - hosts: nodes
155 |   become: true
156 |   become_method: sudo
157 |   any_errors_fatal: true
158 |   tags:
159 |   - node
160 |   - install
161 |   - node_install
162 |   - label
163 |   roles:
164 |   - { role: tools, task: labels, tags: [ 'label'] }
165 | 
166 | ## Post deploy (network, storage, taints, helm installation, helm charts deploy, any other addons)
167 | - hosts: primary-master
168 |   become: true
169 |   become_method: sudo
170 |   tags:
171 |   - post_deploy
172 |   roles:
173 |   - { role: tools, task: labels, tags: [ 'label'] }
174 |   - { role: post_deploy, task: all, tags: [ 'post_deploy_no_helm' ] }
175 |   - { role: storage, task: create_all, tags: [ 'storage', 'rook', 'nfs', 'vsphere' ] }
176 |   - { role: helm, task: helm, tags: [ 'helm' ] }
177 |   - { role: helm, task: charts_deploy, tags: [ 'helm', 'charts_deploy' ] }
178 | 
179 | ### For fixes like vsphere's bug, we have to reboot after some more fixes...
180 | #https://github.com/vmware/kubernetes/issues/495
181 | - hosts: mustrebootlist
182 |   gather_facts: false
183 |   become: true
184 |   become_method: sudo
185 |   tags:
186 |   - mustrebootlist
187 |   - vsphere_bug_fix
188 |   - vsphere
189 |   roles:
190 |   - { role: tools, task: reboot, tags: [ 'reboot_minimal' ], when: "ClusterConfiguration.cloudProvider is defined and ClusterConfiguration.cloudProvider == 'vsphere' and allow_restart | default(False) and vsphere_bug_fix is defined and vsphere_bug_fix" }
191 | 
192 | ## Generic Sanity
193 | - hosts: masters
194 |   become: true
195 |   become_method: sudo
196 |   tags:
197 |   - master
198 |   pre_tasks:
199 |   - name: remove temporary mustreboot temporary group
200 |     group:
201 |       name: mustrebootlist
202 |       state: absent
203 |   roles:
204 |   - { role: tools, task: cluster_sanity, tags: [ 'cluster_sanity', 'sanity' ] }
205 |   - { role: tools, task: postinstall_messages, tags: [ 'cluster_sanity', 'sanity' ] }
206 | 
207 | ## to reset/add only some (more) nodes:
208 | ##   1. keep in hosts only:
209 | ##      - the master
210 | ##      - the affected node (all other nodes should not be there)
211 | ##   2. Have the token defined in the group_vars/all
212 | ##   3. Run using only this/these tag(s):
213 | ## ansible-playbook -i hosts -v site.yml --tags "node"   # same with: ansible-playbook -i hosts -v site.yml --tags "node_reset,node_install,cluster_sanity,cluster_info"
214 | 
215 | ## To get cluster info/sanity:
216 | ## ansible-playbook -i hosts -v site.yml --tags "cluster_sanity,cluster_info"
217 | 


--------------------------------------------------------------------------------
/roles/common/tasks/install_k8s_packages.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | - name: find requested k8s major minor vers
  3 |   set_fact:
  4 |     k8s_major: "{{ KUBERNETES_VERSION.split('.').0 }}"
  5 |     k8s_minor: "{{ KUBERNETES_VERSION.split('.').1 }}"
  6 |   ### - "KUBERNETES_VERSION is version_compare ('1.27', 'lt')"
  7 | # Debian based (includes Ubuntu) prerequisites for using apt-get from ansible:
  8 | - name: Install apt-transport-https
  9 |   package: name={{ item }} state={{ package_state | default ('present') }}
 10 |   environment: '{{ proxy_env | default ({}) }}'
 11 |   with_items:
 12 |   - apt-transport-https
 13 |   when: ansible_os_family == "Debian"
 14 | 
 15 |   # Create deb/yum repos for kubernetes packages (kube*, cni, etc.)
 16 | - block:
 17 |   # Debian based (includes Ubuntu)
 18 |   - apt_key:
 19 |       url: "{{ PKGS_K8S_IO_CORE }}:/stable:/v{{k8s_major}}.{{k8s_minor}}/deb/Release.key"
 20 |       state: present
 21 |     environment: '{{ proxy_env | default ({}) }}'
 22 |     when: ansible_os_family == "Debian"
 23 | 
 24 |   - apt_repository:
 25 |       repo: "deb {{ PKGS_K8S_IO_CORE }}:/stable:/v{{k8s_major}}.{{k8s_minor}}/deb/ /"
 26 |       state: present
 27 |       #filename: 'kubernetes.list'
 28 |       #if filename not defined, looks filename generated like: pkgs_k8s_io_core_stable_v1_29_deb.list
 29 |       update_cache: yes
 30 |     environment: '{{ proxy_env | default ({}) }}'
 31 |     when: ansible_os_family == "Debian"
 32 | 
 33 |   # RedHat based (includes CentOS, RHEL, Fedora, Oracle, etc.)
 34 |   - name: Create kubernetes yum repository
 35 |     yum_repository:
 36 |       name: kubernetes
 37 |       description: Kubernetes
 38 |       baseurl: "{{ PKGS_K8S_IO_CORE }}:/stable:/v{{k8s_major}}.{{k8s_minor}}/rpm/"
 39 |       #http://yum.kubernetes.io/repos/kubernetes-el7-x86_64
 40 |       gpgcheck: 0 # to allow internal repos also
 41 |     when: ansible_os_family == "RedHat" # and HOST_ARCH == "amd64"
 42 | 
 43 |   - name: add proxy for the repo
 44 |     ini_file:
 45 |       dest: /etc/yum.repos.d/kubernetes.repo
 46 |       section: "{{item}}"
 47 |       option: proxy
 48 |       value: "{{proxy_env.https_proxy | default ('') }}"
 49 |     with_items: [ 'kubernetes' ]
 50 |     when:
 51 |     - proxy_env is defined
 52 |     - proxy_env.https_proxy is defined
 53 |     #- proxy_env.https_proxy | length > 0
 54 |     - ansible_os_family == "RedHat"
 55 |   when: kubernetes_repo_create | default('true')
 56 | 
 57 | - name: Clean yum metadata
 58 |   command: yum clean all
 59 |   args:
 60 |     warn: no
 61 |   when: ansible_os_family == "RedHat" and package_state is defined and package_state == "latest"
 62 | 
 63 | - name: apt-get clean metadata
 64 |   command: apt-get clean ; apt-file purge
 65 |   args:
 66 |     warn: no
 67 |   when: ansible_os_family == "Debian" and package_state is defined and package_state == "latest"
 68 | 
 69 | # End OS dependent repo setup
 70 | 
 71 | ### socat
 72 | - name: Ansible check /usr/bin/socat exists
 73 |   stat:
 74 |     path: /usr/bin/socat
 75 |   register: statsocat
 76 | 
 77 | - name: Install socat from centos/rhel/ubuntu repo
 78 |   package: name={{ item }} state={{ package_state | default ('present') }}
 79 |   #environment: '{{ proxy_env | default ({}) }}'
 80 |   with_items:
 81 |   - socat
 82 |   when: statsocat.stat.exists is not defined or statsocat.stat.exists == False
 83 | 
 84 | ### tc (iptables-tc)
 85 | - name: Ansible check /usr/sbin/tc exists
 86 |   stat:
 87 |     path: /usr/sbin/tc
 88 |   register: stattc
 89 | 
 90 | - name: Install tc/iproute-tc from centos/rhel/ubuntu repo
 91 |   package: name={{ item }} state={{ package_state | default ('present') }}
 92 |   #environment: '{{ proxy_env | default ({}) }}'
 93 |   with_items:
 94 |   - iproute-tc
 95 |   when:
 96 |   - stattc.stat.exists is not defined or stattc.stat.exists == False
 97 |   - ansible_os_family == "RedHat"
 98 | 
 99 | - name: Install tc/iproute2 Ubuntu/Debian
100 |   package: name={{ item }} state={{ package_state | default ('present') }}
101 |   #environment: '{{ proxy_env | default ({}) }}'
102 |   with_items:
103 |   - iproute2
104 |   when:
105 |   - stattc.stat.exists is not defined or stattc.stat.exists == False
106 |   - ansible_os_family == "Debian"
107 | 
108 | ### ipset
109 | - name: Ansible check /usr/sbin/ipset exists
110 |   stat:
111 |     # ubuntu 18.04 /sbin/ipset, but we skip checking it there to simplify code. Keeping only ubuntu 20.04 and rhel based
112 |     path: /usr/sbin/ipset
113 |   register: statipset
114 | 
115 | - name: Install ipset from centos/rhel/ubuntu repo
116 |   package: name={{ item }} state={{ package_state | default ('present') }}
117 |   #environment: '{{ proxy_env | default ({}) }}'
118 |   with_items:
119 |   - ipset
120 |   when: statipset.stat.exists is not defined or statipset.stat.exists == False
121 | 
122 | ### unhold (debian)
123 | - name: unhold before upgrade/install packages (when on debian)
124 |   shell: apt-mark unhold {{ item }}
125 |   ignore_errors: true
126 |   with_items:
127 |   - kubeadm
128 |   - kubelet
129 |   - kubectl
130 |   - kubernetes-cni
131 |   - cri-tools
132 |   - containernetworking-plugins
133 |   when:
134 |   - full_kube_apt_unhold | default (False)
135 |   - ansible_os_family == "Debian"
136 |   - kubelet_version is defined and kubelet_version!='present'
137 |   - kubectl_version is defined and kubectl_version!='present'
138 |   - kubeadm_version is defined and kubeadm_version!='present'
139 | 
140 | - name: make sure there is no package containernetworking-plugins as it conflicts with kubernetes-cni
141 |   package: name={{ item }} state=absent
142 |   with_items:
143 |   - containernetworking-plugins
144 | 
145 | ### kubelet
146 | - name: Install kubelet when kubelet_version is not defined
147 |   package: name={{ item }} state={{ package_state | default ('present') }}
148 |   #environment: '{{ proxy_env | default ({}) }}'
149 |   with_items:
150 |   - kubelet
151 |   when: kubelet_version is not defined
152 | 
153 | - name: Install kubelet when Debian and kubelet_version is defined
154 |   package: name="{{ item }}={{kubelet_version | regex_replace('v')}}*" state=present force=yes
155 |   #environment: '{{ proxy_env | default ({}) }}'
156 |   with_items:
157 |   - kubelet
158 |   when: kubelet_version is defined and ( kubelet_version!='present' or kubelet_version!='latest' ) and ansible_os_family == "Debian" 
159 | 
160 | - name: Install kubelet when RedHat and kubelet_version is defined
161 |   package: name="{{ item }}-{{kubelet_version | regex_replace('v')}}" state=present allow_downgrade=yes
162 |   #environment: '{{ proxy_env | default ({}) }}'
163 |   with_items:
164 |   - kubelet
165 |   when: kubelet_version is defined and ( kubelet_version!='present' or kubelet_version!='latest' ) and ansible_os_family == "RedHat"
166 | 
167 | - name: Install kubelet when kubelet_version is latest
168 |   package: name={{ item }} state={{kubelet_version}}
169 |   #environment: '{{ proxy_env | default ({}) }}'
170 |   with_items:
171 |   - kubelet
172 |   when: kubelet_version is defined and ( kubelet_version=='present' or kubelet_version=='latest' )
173 | 
174 | ##############
175 | ### kubectl
176 | - name: Install kubectl when kubectl_version not defined
177 |   package: name={{ item }} state={{ package_state | default ('present') }}
178 |   #environment: '{{ proxy_env | default ({}) }}'
179 |   with_items:
180 |   - kubectl
181 |   when: kubectl_version is not defined
182 | 
183 | - name: Install kubectl when Debian and when kubectl_version is defined # ansible bug 29705
184 |   package: name="{{ item }}={{kubectl_version | regex_replace('v')}}*" state=present force=yes
185 |   #environment: '{{ proxy_env | default ({}) }}'
186 |   with_items:
187 |   - kubectl
188 |   when: kubectl_version is defined and ( kubectl_version!='present' or kubectl_version!='latest' ) and ansible_os_family == "Debian"
189 | 
190 | - name: Install kubectl when RedHat and when kubectl_version is defined
191 |   package: name="{{ item }}-{{kubectl_version | regex_replace('v')}}" state=present allow_downgrade=yes
192 |   #environment: '{{ proxy_env | default ({}) }}'
193 |   with_items:
194 |   - kubectl
195 |   when: kubectl_version is defined and ( kubectl_version!='present' or kubectl_version!='latest' ) and ansible_os_family == "RedHat"
196 | 
197 | - name: Install kubectl when kubectl_version is latest
198 |   package: name={{ item }} state={{kubectl_version}}
199 |   #environment: '{{ proxy_env | default ({}) }}'
200 |   with_items:
201 |   - kubectl
202 |   when: kubectl_version is defined and ( kubectl_version=='present' or kubectl_version=='latest' )
203 | 
204 | #############
205 | ### kubeadm
206 | - name: Install kubeadm when kubeadm_version is not defined
207 |   package: name={{ item }} state={{ package_state | default ('present') }}
208 |   #environment: '{{ proxy_env | default ({}) }}'
209 |   with_items:
210 |   - kubeadm
211 |   when: kubeadm_version is not defined
212 | 
213 | - name: Install kubeadm when Debian and kubeadm_version is defined # ansible bug 29705
214 |   package: name="{{ item }}={{kubeadm_version | regex_replace('v')}}*" state=present force=yes
215 |   #environment: '{{ proxy_env | default ({}) }}'
216 |   with_items:
217 |   - kubeadm
218 |   when: kubeadm_version is defined and ( kubeadm_version!='present' or kubeadm_version!='latest' ) and ansible_os_family == "Debian"
219 | 
220 | - name: Install kubeadm when RedHat and kubeadm_version is defined
221 |   package: name="{{ item }}-{{kubeadm_version | regex_replace('v')}}" state=present allow_downgrade=yes
222 |   #environment: '{{ proxy_env | default ({}) }}'
223 |   with_items:
224 |   - kubeadm
225 |   when: kubeadm_version is defined and ( kubeadm_version!='present' or kubeadm_version!='latest' ) and ansible_os_family == "RedHat"
226 | 
227 | - name: Install kubeadm when kubeadm_version is latest
228 |   package: name={{ item }} state={{kubeadm_version}}
229 |   #environment: '{{ proxy_env | default ({}) }}'
230 |   with_items:
231 |   - kubeadm
232 |   when: kubeadm_version is defined and ( kubeadm_version=='present' or kubeadm_version=='latest' )
233 | 
234 | #- name: Install packages
235 | #  package: name={{ item }} state={{ package_state | default ('present') }}
236 | #  environment: '{{ proxy_env | default ({}) }}'
237 | #  with_items:
238 |   #- kubernetes-cni # already installed by kubelet anyway
239 |   #- docker # for RH, but could be installed manually, so no checks here.
240 |   #- kubeadm
241 |   #- docker.io # for ubuntu
242 | 
243 | 
244 | 


--------------------------------------------------------------------------------