├── .gitignore
├── LICENSE
├── NGINX-to-pods-check
    ├── README.md
    ├── check.sh
    └── example-deployment.yml
├── README.md
├── adjust-downstream-webhook
    ├── README.md
    └── adjust-downstream-webhook.sh
├── bad-ingress-scanner
    ├── Dockerfile
    ├── README.md
    ├── bad-ingress.yaml
    ├── deployment.yaml
    └── run.sh
├── change-nodetemplate-owner
    ├── Dockerfile
    ├── README.md
    └── change-nodetemplate-owner.sh
├── charts
    ├── index.yaml
    ├── rancher-0.0.1.tgz
    ├── robots.txt
    ├── systems-info
    │   ├── .helmignore
    │   ├── Chart.yaml
    │   ├── questions.yml
    │   ├── templates
    │   │   ├── _helpers.tpl
    │   │   ├── clusterrolebinding.yaml
    │   │   ├── deployment.yaml
    │   │   ├── namespace.yaml
    │   │   ├── secret.yaml
    │   │   └── serviceaccount.yaml
    │   └── values.yaml
    └── systems-information
    │   ├── Chart.yaml
    │   ├── questions.yml
    │   ├── templates
    │       ├── clusterrolebinding.yaml
    │       ├── deployment.yaml
    │       ├── namespace.yaml
    │       ├── secret.yaml
    │       └── serviceaccount.yaml
    │   └── values.yaml
├── cleanup-etcd-part-files
    ├── README.md
    ├── alt-s3-sync.yaml
    └── delete-part-files.yaml
├── cleanup-evicted-pods
    ├── README.md
    ├── deploy.yaml
    └── serviceaccount.yaml
├── collection
    └── rancher
    │   ├── v1.6
    │       └── logs-collector
    │       │   ├── README.md
    │       │   └── rancher16_logs_collector.sh
    │   └── v2.x
    │       ├── RBAC-role-collector
    │           ├── README.md
    │           └── role-dump.sh
    │       ├── logs-collector
    │           ├── README.md
    │           ├── collection-details.md
    │           └── rancher2_logs_collector.sh
    │       ├── profile-collector
    │           ├── README.md
    │           └── continuous_profiling.sh
    │       ├── rancher-pod-collector
    │           ├── README.md
    │           └── rancher-pod-collector.sh
    │       ├── supportability-review
    │           ├── README.md
    │           ├── cluster-collector.sh
    │           ├── collect.sh
    │           ├── collection-details.md
    │           ├── nodes-collector.sh
    │           └── security-policies.md
    │       ├── systems-information-v2
    │           ├── README.md
    │           └── deploy.yaml
    │       ├── systems-information
    │           ├── Dockerfile
    │           ├── README.md
    │           ├── run.sh
    │           └── systems_summary.sh
    │       └── windows-log-collector
    │           ├── README.md
    │           └── win-log-collect.ps1
├── eks-upgrade-using-api
    ├── README.md
    ├── common.sh
    ├── demo.gif
    └── eks-support.sh
├── eks-upgrade-using-kubectl
    ├── README.md
    ├── common.sh
    └── eks-support.sh
├── extended-rancher-2-cleanup
    ├── README.md
    └── extended-cleanup-rancher2.sh
├── files
    └── curl-format.txt
├── fleet-delete-cluster-registration
    ├── README.md
    └── delete_old_resources.sh
├── fleet-secrets-bro-patch
    ├── README.md
    └── patch_gitrepo_secrets.sh
├── how-to-retrieve-kubeconfig-from-custom-cluster
    ├── README.md
    └── rke-node-kubeconfig.sh
├── instant-fio-master
    ├── README.md
    └── instant-fio-master.sh
├── kubecert
    ├── README.md
    ├── base64
    ├── jq-linux64
    └── kubecert.sh
├── longhorn
    └── PlaceHolder.md
├── migrate-vsphere-clusters
    ├── README.md
    └── migrate-vsphere-clusters.sh
├── rancher-cleanup
    └── README.md
├── rancher-crd
    └── enumerate-resources
    │   ├── README.md
    │   └── rancher-resource-enumerator.sh
├── rancher-metadata-syncer
    ├── Dockerfile
    ├── README.md
    ├── apache.conf
    ├── deployment-configmap.yaml
    ├── deployment-proxy.yaml
    ├── download.sh
    └── run.sh
├── reverse-rke-state-migrations
    ├── README.md
    └── reverse-rke-state-migrations.sh
├── rotate-tokens
    ├── README.md
    └── rotate-tokens.sh
├── swiss-army-knife
    ├── README.md
    ├── admin-tools.yaml
    ├── overlaytest.sh
    └── overlaytest.yaml
├── troubleshooting-scripts
    ├── README.md
    ├── determine-leader
    │   └── rancher2_determine_leader.sh
    ├── etcd
    │   ├── README.md
    │   └── check-endpoints.sh
    ├── kube-apiserver
    │   ├── check_apiserver-to-etcd.sh
    │   ├── check_endpoints.sh
    │   └── responsiveness.sh
    └── kube-scheduler
    │   └── find-leader.sh
├── windows-access-control-lists
    └── README.md
└── windows-agent-strict-verify
    ├── README.md
    └── update-node.ps1


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | 


--------------------------------------------------------------------------------
/NGINX-to-pods-check/README.md:
--------------------------------------------------------------------------------
 1 | # NGINX-to-pods-check
 2 | This script is designed to walk through all the ingresses in a cluster and test that it can curl the backend pods from the NGINX pods. This is mainly done to verify the overlay network is working along with checking the overall configuration.
 3 | 
 4 | ## Run script
 5 | ```
 6 | curl https://raw.githubusercontent.com/rancherlabs/support-tools/master/NGINX-to-pods-check/check.sh | bash
 7 | ```
 8 | 
 9 | ## Example output
10 | 
11 | ### Broken pod
12 | 
13 | ```
14 | bash ./check.sh -F Table
15 | ####################################################
16 | Pod: webserver-bad-85cf9ccdf8-8v4mh
17 | PodIP: 10.42.0.252
18 | Port: 80
19 | Endpoint: ingress-1d8af467b8b7c9682fda18c8d5053db7
20 | Ingress: test-bad
21 | Ingress Pod: nginx-ingress-controller-b2s2d
22 | Node: a1ubphylbp01
23 | Status: Fail!
24 | ####################################################
25 | ```
26 | 
27 | ```
28 | bash ./check.sh -F Inline
29 | Checking Pod webserver-bad-8v4mh PodIP 10.42.0.252 on Port 80 in endpoint ingress-bad for ingress test-bad from nginx-ingress-controller-b2s2d on node a1ubphylbp01 NOK
30 | ```
31 | 
32 | ### Working pod
33 | 
34 | ```
35 | bash ./check.sh -F Table
36 | ####################################################
37 | Pod: webserver-bad-85cf9ccdf8-8v4mh
38 | PodIP: 10.42.0.252
39 | Port: 80
40 | Endpoint: ingress-1d8af467b8b7c9682fda18c8d5053db7
41 | Ingress: test-bad
42 | Ingress Pod: nginx-ingress-controller-b2s2d
43 | Node: a1ubphylbp01
44 | Status: Pass!
45 | ####################################################
46 | ```
47 | 
48 | ```
49 | bash ./check.sh -F Inline
50 | Checking Pod webserver-good-65644cffd4-gbpkj PodIP 10.42.0.251 on Port 80 in endpoint ingress-good for ingress test-good from nginx-ingress-controller-b2s2d on node a1ubphylbp01 OK
51 | ```
52 | 
53 | ## Testing
54 | 
55 | The following commands will deploy two workloads and ingresses. One that is working with a web server that is responding on port 80. And the other will have the webserver disabled, so it will fail to connect.
56 | 
57 | ```
58 | kubectl apply -f https://raw.githubusercontent.com/rancherlabs/support-tools/master/NGINX-to-pods-check/example-deployment.yml
59 | ```
60 | 


--------------------------------------------------------------------------------
/NGINX-to-pods-check/check.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | 
  4 | usage()
  5 | {
  6 | cat << EOF
  7 | usage: $0 options
  8 | OPTIONS:
  9 |    -h      Show this message
 10 |    -F      Format Default: Table
 11 | EOF
 12 | }
 13 | 
 14 | VERBOSE=
 15 | while getopts .h:F:v. OPTION
 16 | do
 17 |      case $OPTION in
 18 |          h)
 19 |              usage
 20 |              exit 1
 21 |              ;;
 22 |          F)
 23 |              FORMAT=$OPTARG
 24 |              ;;
 25 |          ?)
 26 |              usage
 27 |              exit
 28 |              ;;
 29 |      esac
 30 | done
 31 | 
 32 | if [[ -z $FORMAT ]]
 33 | then
 34 |         FORMAT="Table"
 35 | fi
 36 | 
 37 | if [[ ! "$FORMAT" == "Table" ]] && [[ ! "$FORMAT" == "Inline" ]]
 38 | then
 39 | 	echo "Invalid Option for flag -F"
 40 | 	exit 1
 41 | fi
 42 | 
 43 | 
 44 | kubectl get namespace -o custom-columns=NAMESPACE:.metadata.name --no-headers | while read namespace
 45 | do
 46 | 	kubectl get ingress -n "$namespace" -o custom-columns=ingress:.metadata.name --no-headers | while read ingress
 47 | 	do
 48 | 		kubectl get ingress $ingress -n $namespace -o yaml | grep 'service:' -A1 | awk '{print $2}' | sort | uniq | awk 'NF {p=1} p' | while read servicename
 49 | 		do
 50 | 			PORT="$(kubectl get endpoints "$servicename" -n "$namespace" -o yaml | grep 'port:' | awk '{print $2}'| head -n 1)"
 51 | 			if [[ "$PORT" == 'port:' ]]
 52 | 			then
 53 | 				PORT="80"
 54 | 			fi
 55 | 			kubectl get endpoints "$servicename" -n "$namespace" -o yaml | grep '\- ip:' | awk '{print $3}' | while read endpointpodip
 56 | 			do
 57 | 				kubectl -n ingress-nginx get pods -l app=ingress-nginx -o custom-columns=POD:.metadata.name,NODE:.spec.nodeName,IP:.status.podIP --no-headers | while read ingresspod nodename podip
 58 | 				do
 59 | 					PODNAME="$(kubectl get pods -n $namespace -o custom-columns=POD:.metadata.name,IP:.status.podIP --no-headers | grep "$endpointpodip" | awk '{print $1}' | tr -d ' ')"
 60 | 					if ! kubectl -n ingress-nginx exec $ingresspod -- curl -o /dev/null --connect-timeout 5 -s -q http://${endpointpodip}:${PORT} &> /dev/null
 61 | 					then
 62 | 						if [[ "$FORMAT" == "Inline" ]]
 63 | 						then
 64 | 							tput setaf 7; echo -n "Checking Pod $PODNAME PodIP $endpointpodip on Port $PORT in endpoint $servicename for ingress $ingress from $ingresspod on node $nodename "; tput setaf 1; echo "NOK"; tput sgr0
 65 | 						fi
 66 | 						if [[ "$FORMAT" == "Table" ]]
 67 | 						then
 68 | 							echo "####################################################"
 69 | 							echo "Pod: $PODNAME"
 70 | 							echo "PodIP: $endpointpodip"
 71 | 							echo "Port: $PORT"
 72 | 							echo "Endpoint: $servicename"
 73 | 							echo "Ingress: $ingress"
 74 | 							echo "Ingress Pod: $ingresspod"
 75 | 							echo "Node: $nodename"
 76 | 							tput setaf 1;echo "Status: Fail!"; tput sgr0
 77 | 							echo "####################################################"
 78 | 						fi
 79 | 					else
 80 | 						if [[ "$FORMAT" == "Inline" ]]
 81 | 						then
 82 | 							tput setaf 7; echo -n "Checking Pod $PODNAME PodIP $endpointpodip on Port $PORT in endpoint $servicename for ingress $ingress from $ingresspod on node $nodename "; tput setaf 2; echo "OK"; tput sgr0
 83 | 						fi
 84 | 						if [[ "$FORMAT" == "Table" ]]
 85 |                                                 then
 86 |                                                         echo "####################################################"
 87 |                                                         echo "Pod: $PODNAME"
 88 |                                                         echo "PodIP: $endpointpodip"
 89 |                                                         echo "Port: $PORT"
 90 |                                                         echo "Endpoint: $servicename"
 91 |                                                         echo "Ingress: $ingress"
 92 |                                                         echo "Ingress Pod: $ingresspod"
 93 |                                                         echo "Node: $nodename"
 94 |                                                         tput setaf 2;echo "Status: Pass!"; tput sgr0
 95 |                                                         echo "####################################################"
 96 |                                                 fi
 97 | 					fi
 98 | 				done
 99 | 			done
100 | 		done
101 | 	done
102 | done
103 | 
104 | 


--------------------------------------------------------------------------------
/NGINX-to-pods-check/example-deployment.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | apiVersion: apps/v1
  3 | kind: Deployment
  4 | metadata:
  5 |   labels:
  6 |     app: webserver-good
  7 |   name: webserver-good
  8 | spec:
  9 |   selector:
 10 |     matchLabels:
 11 |       app: webserver-good
 12 |   template:
 13 |     metadata:
 14 |       labels:
 15 |         app: webserver-good
 16 |     spec:
 17 |       containers:
 18 |       - image: httpd
 19 |         name: webserver-good
 20 | 
 21 | ---
 22 | apiVersion: apps/v1
 23 | kind: Deployment
 24 | metadata:
 25 |   labels:
 26 |     app: webserver-bad
 27 |   name: webserver-bad
 28 | spec:
 29 |   selector:
 30 |     matchLabels:
 31 |       app: webserver-bad
 32 |   template:
 33 |     metadata:
 34 |       labels:
 35 |         app: webserver-bad
 36 |     spec:
 37 |       containers:
 38 |       - args:
 39 |         - while true; do sleep 100000; done;
 40 |         command:
 41 |         - /bin/sh
 42 |         - -c
 43 |         image: httpd
 44 |         imagePullPolicy: Always
 45 |         name: webserver-bad
 46 | 
 47 | ---
 48 | apiVersion: v1
 49 | kind: Service
 50 | metadata:
 51 |   labels:
 52 |     app: webserver-good
 53 |   name: webserver-good
 54 | spec:
 55 |   ports:
 56 |   - name: "80"
 57 |     port: 80
 58 |     targetPort: 80
 59 |   selector:
 60 |     app: webserver-good
 61 | 
 62 | ---
 63 | apiVersion: v1
 64 | kind: Service
 65 | metadata:
 66 |   labels:
 67 |     app: webserver-bad
 68 |   name: webserver-bad
 69 | spec:
 70 |   ports:
 71 |   - name: "80"
 72 |     port: 80
 73 |     targetPort: 80
 74 |   selector:
 75 |     app: webserver-bad
 76 | 
 77 | ---
 78 | apiVersion: networking.k8s.io/v1
 79 | kind: Ingress
 80 | metadata:
 81 |   name: webserver-good
 82 | spec:
 83 |   rules:
 84 |   - host: webserver-good.local
 85 |     http:
 86 |       paths:
 87 |       - backend:
 88 |           service:
 89 |             name: webserver-good
 90 |             port:
 91 |               number: 80
 92 |         path: /
 93 |         pathType: ImplementationSpecific
 94 | ---
 95 | apiVersion: networking.k8s.io/v1
 96 | kind: Ingress
 97 | metadata:
 98 |   name: webserver-bad
 99 | spec:
100 |   rules:
101 |   - host: webserver-bad.local
102 |     http:
103 |       paths:
104 |       - backend:
105 |           service:
106 |             name: webserver-bad
107 |             port:
108 |               number: 80
109 |         path: /
110 |         pathType: ImplementationSpecific
111 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # support-tools
 2 | 
 3 | This repository contains Rancher support-tools to assist with investigating and troubleshooting issues with Rancher clusters, as well as other maintenance tasks.
 4 | 
 5 | ## Caution:
 6 | 
 7 | This repository contains scripts that can cause harm if used without the guidance of Rancher Support. We advise reaching out to Rancher Support before executing any of these scripts. Failure to reach out could incur production downtime.
 8 | 
 9 | The repository consists of the following directories of tools:
10 | - collection: non-mutating, non-destructive scripts for the purpose of collecting information/logs from a cluster or node.
11 | - files: common files used in conjunction with troubleshooting commands.
12 | 


--------------------------------------------------------------------------------
/adjust-downstream-webhook/README.md:
--------------------------------------------------------------------------------
 1 | # Adjust downstream webhook
 2 | This script adjusts the version of the rancher-webhook release in downstream clusters.
 3 | It decides what to do with the webhook deployment in each downstream cluster based on Rancher server version.
 4 | 
 5 | ## Background
 6 | The `rancher-webhook` chart is deployed in downstream clusters beginning with Rancher v2.7.2.
 7 | On a rollback from a version >=2.7.2 to a version <2.7.2, the webhook will stay in the downstream clusters. 
 8 | Since each version of the webhook is one-to-one compatible with a specific version of Rancher, this can result in unexpected behavior.
 9 | 
10 | ## Usage
11 | 
12 | ```bash
13 | ## Create a token through the UI. The token should have no scope and be made for a user who is a global admin.
14 | read -s RANCHER_TOKEN && export RANCHER_TOKEN
15 | ## The server URL for Rancher - you can get this value in the "server-url" setting. You can find it by going to Global Settings => Settings => server-url. The example format should be: https://rancher-test.home
16 | read -s RANCHER_URL && export RANCHER_URL
17 | bash adjust-downstream-webhook.sh
18 | ```
19 | For Rancher setups using self-signed certificates, you can specify `--insecure-skip-tls-verify` to force the script to 
20 | ignore TLS certificate verification. Note that this option is insecure, and should be avoided for production setups.
21 | 
22 | ## Notes
23 | This script should be run after rolling back Rancher to the desired version 
24 | (for example, when going from v2.7.2 to v2.7.0, only run this script after v2.7.0 is running).
25 | 


--------------------------------------------------------------------------------
/adjust-downstream-webhook/adjust-downstream-webhook.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | usage() {
  4 |   cat << EOF
  5 | usage: bash adjust-downstream-webhook.sh [--insecure-skip-tls-verify]
  6 | 
  7 | This script adjusts the rancher-webhook chart release in all clusters managed by Rancher (excluding the local cluster).
  8 | Depending on the version of Rancher, it either deletes the downstream webhook release, adjusts its version and restarts, or does nothing.
  9 | Requires kubectl and helm to be installed and available on \$PATH.
 10 | Requires rancher-charts helm repo. If you don't have it, please add: helm repo add rancher-charts https://charts.rancher.io && helm repo update
 11 | 
 12 | RANCHER_URL without a trailing slash must be set with the server URL of Rancher.
 13 | RANCHER_TOKEN must be set with an admin token generated with no scope.
 14 | To ignore TLS verification, set --insecure-skip-tls-verify.
 15 | 
 16 | Users also need to ensure they have the rancher-charts repo in the local Helm index.
 17 | EOF
 18 | }
 19 | 
 20 | if [ "$1" == "-h" ]; then
 21 |   usage
 22 |   exit 0
 23 | fi
 24 | 
 25 | delete_webhook() {
 26 |   cluster="$1"
 27 |   current_chart=$(helm list -n cattle-system -l name=rancher-webhook | tail -1 | cut -f 6)
 28 |   echo "Deleting $current_chart from cluster $cluster."
 29 |   helm uninstall rancher-webhook -n cattle-system
 30 | }
 31 | 
 32 | replace_webhook() {
 33 |   cluster="$1"
 34 |   new_version="$2"
 35 | 
 36 |   echo "Updating the agent to make it remember the min version $new_version of rancher-webhook, so that it can deploy it when needed in the future in cluster $cluster."
 37 |   kubectl set env -n cattle-system deployment/cattle-cluster-agent CATTLE_RANCHER_WEBHOOK_MIN_VERSION="$new_version"
 38 | 
 39 |   helm get values -n cattle-system rancher-webhook -o yaml > current_values.yaml
 40 |   echo "Re-installing rancher-webhook to use $new_version in cluster $cluster."
 41 |   helm upgrade --install rancher-webhook rancher-charts/rancher-webhook -n cattle-system --version "$new_version" --values current_values.yaml
 42 |   rm -f current_values.yaml
 43 | }
 44 | 
 45 | adjust_webhook() {
 46 |   cluster="$1"
 47 |   rancher_version="$2"
 48 | 
 49 |   if [[ "$rancher_version" =~ 2\.6\.13 ]]; then
 50 |     replace_webhook "$cluster" 1.0.9+up0.2.10
 51 |   elif [[ "$rancher_version" =~ 2\.6\.[0-9]$ ]] || [[ "$rancher_version" =~ 2\.6\.1[0-2]$ ]]; then
 52 |     delete_webhook "$cluster"
 53 |   elif [[ "$rancher_version" =~ 2\.7\.[0-1]$ ]]; then
 54 |     delete_webhook "$cluster"
 55 |   elif [[ "$rancher_version" =~ 2\.7\.2 ]]; then
 56 |     replace_webhook "$cluster" 2.0.2+up0.3.2
 57 |   elif [[ "$rancher_version" =~ 2\.7\.3 ]]; then
 58 |     replace_webhook "$cluster" 2.0.3+up0.3.3
 59 |   elif [[ "$rancher_version" =~ 2\.7\.4 ]]; then
 60 |     replace_webhook "$cluster" 2.0.4+up0.3.4
 61 |   elif [[ "$rancher_version" =~ 2\.[7-9]\..* ]]; then
 62 |     # This matches anything else above 2.7, including 2.8.x and 2.9.x.
 63 |     echo "No need to delete rancher-webhook, given Rancher version $rancher_version."
 64 |     echo "Ensuring CATTLE_RANCHER_WEBHOOK_MIN_VERSION is set to an empty string."
 65 |     kubectl set env -n cattle-system deployment/cattle-cluster-agent CATTLE_RANCHER_WEBHOOK_MIN_VERSION=''
 66 |   else
 67 |     echo "Nothing to do, given Rancher version $rancher_version."
 68 |   fi
 69 | }
 70 | 
 71 | if [ -n "$DEBUG" ]
 72 | then
 73 |   set -x
 74 | fi
 75 | 
 76 | if [[ -z "$RANCHER_TOKEN" || -z "$RANCHER_URL" ]]
 77 | then
 78 |   echo "Required environment variables aren't properly set."
 79 |   usage
 80 |   exit 1
 81 | fi
 82 | 
 83 | kubeconfig="
 84 | apiVersion: v1
 85 | kind: Config
 86 | clusters:
 87 | - name: \"local\"
 88 |   cluster:
 89 |     server: \"$RANCHER_URL\"
 90 | 
 91 | users:
 92 | - name: \"local\"
 93 |   user:
 94 |     token: \"$RANCHER_TOKEN\"
 95 | 
 96 | 
 97 | contexts:
 98 | - name: \"local\"
 99 |   context:
100 |     user: \"local\"
101 |     cluster: \"local\"
102 | 
103 | current-context: \"local\"
104 | "
105 | 
106 | echo "$kubeconfig" >> .temp_kubeconfig.yaml
107 | # helm will complain if these are group/world readable
108 | chmod g-r .temp_kubeconfig.yaml
109 | chmod o-r .temp_kubeconfig.yaml
110 | export KUBECONFIG="$(pwd)/.temp_kubeconfig.yaml"
111 | 
112 | if [[ "$1" == "--insecure-skip-tls-verify" ]]
113 | then
114 |   kubectl config set clusters.local.insecure-skip-tls-verify true
115 | fi
116 | 
117 | rancher_version=$(kubectl get setting server-version -o jsonpath='{.value}')
118 | if [[ -z "$rancher_version" ]]; then
119 |   echo 'Failed to look up Rancher version.'
120 |   exit 1
121 | fi
122 | 
123 | clusters=$(kubectl get clusters.management.cattle.io -o jsonpath="{.items[*].metadata.name}")
124 | for cluster in $clusters
125 | do
126 |   if [ "$cluster" == "local" ]
127 |   then
128 |     echo "Skipping deleting rancher-webhook in the local cluster."
129 |     continue
130 |   fi
131 |   kubectl config set clusters.local.server "$RANCHER_URL/k8s/clusters/$cluster"
132 |   adjust_webhook "$cluster" "$rancher_version"
133 | done
134 | 
135 | rm .temp_kubeconfig.yaml
136 | 


--------------------------------------------------------------------------------
/bad-ingress-scanner/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:20.04
 2 | MAINTAINER Matthew Mattox <matt.mattox@suse.com>
 3 | 
 4 | ENV DEBIAN_FRONTEND=noninteractive
 5 | 
 6 | RUN apt-get update && apt-get install -yq --no-install-recommends \
 7 |     apt-utils \
 8 |     curl \
 9 |     && apt-get clean && rm -rf /var/lib/apt/lists/*
10 | 
11 | ## Install kubectl
12 | RUN curl -kLO "https://storage.googleapis.com/kubernetes-release/release/$(curl -ks https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl" && \
13 | chmod u+x kubectl && \
14 | mv kubectl /usr/local/bin/kubectl
15 | 
16 | COPY *.sh /root/
17 | RUN chmod +x /root/*.sh
18 | CMD /root/run.sh
19 | 


--------------------------------------------------------------------------------
/bad-ingress-scanner/README.md:
--------------------------------------------------------------------------------
 1 | [![Docker Pulls](https://img.shields.io/docker/pulls/cube8021/bad-ingress-scanner.svg)](https://hub.docker.com/r/rancher/bad-ingress-scanner)
 2 | [![Build Status](https://drone-publish.rancher.io/api/badges/rancherlabs/support-tools/status.svg)](https://drone-publish.rancher.io/rancherlabs/support-tools)
 3 | 
 4 | # Bad ingress scanner
 5 | This tool is designed to scan for misbehaving ingresses. An example being an ingress that was deployed referencing a non-existent SSL cert or an ingress with an empty/missing backend service.
 6 | 
 7 | ## Running report - remotely
 8 | ```bash
 9 | wget -o ingress-scanner.sh https://raw.githubusercontent.com/rancherlabs/support-tools/master/bad-ingress-scanner/run.sh
10 | chmod +x ./ingress-scanner.sh
11 | ./ingress-scanner.sh
12 | ```
13 | 
14 | ## Running report - in-cluster
15 | ```bash
16 | kubectl -n ingress-nginx delete job ingress-scanner
17 | kubectl apply -f deployment.yaml
18 | kubectl -n ingress-nginx logs -l app=ingress-scanner
19 | ```
20 | 
21 | ## Example output
22 | ```bash
23 | Pod: nginx-ingress-controller-r8kkz
24 | ####################################################################
25 | Found bad endpoints.
26 | default/ingress-75f627ce3d0ccd29dd268e0ab2b37008
27 | default/test-01-example-com
28 | default/test-02-example-com
29 | ####################################################################
30 | Found bad certs.
31 | default/test-01-example-com
32 | default/test-02-example-com
33 | ```
34 | 
35 | ## Removing
36 | ```bash
37 | kubectl delete -f deployment.yaml
38 | ```
39 | 
40 | ## Deploying test ingress rules
41 | Note: These rules are designed to be broken/invalid and are deployed to the default namespace.
42 | ```bash
43 | kubectl apply -f bad-ingress.yaml
44 | ```
45 | 
46 | ## Removing test ingress rules
47 | ```bash
48 | kubectl delete -f bad-ingress.yaml
49 | ```
50 | 


--------------------------------------------------------------------------------
/bad-ingress-scanner/bad-ingress.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: networking.k8s.io/v1
 3 | kind: Ingress
 4 | metadata:
 5 |   name: test-01
 6 | spec:
 7 |   rules:
 8 |   - host: test-01.example.com
 9 |     http:
10 |       paths:
11 |       - backend:
12 |           service:
13 |             name: test-01-example-com
14 |             port:
15 |               number: 80
16 |         path: /
17 |         pathType: Prefix
18 | ---
19 | apiVersion: networking.k8s.io/v1
20 | kind: Ingress
21 | metadata:
22 |   name: test-02
23 | spec:
24 |   rules:
25 |   - host: test-02.example.com
26 |     http:
27 |       paths:
28 |       - backend:
29 |           service:
30 |             name: test-02-example-com
31 |             port:
32 |               number: 80
33 |         path: /
34 |         pathType: Prefix
35 |   tls:
36 |   - hosts:
37 |     - test-02.example.com
38 |     secretName: test-02-example-com
39 | ---
40 | apiVersion: networking.k8s.io/v1
41 | kind: Ingress
42 | metadata:
43 |   name: test-02-dup
44 | spec:
45 |   rules:
46 |   - host: test-02.example.com
47 |     http:
48 |       paths:
49 |       - backend:
50 |           service:
51 |             name: test-02-example-com
52 |             port:
53 |               number: 80
54 |         path: /
55 |         pathType: Prefix
56 |   tls:
57 |   - hosts:
58 |     - test-02.example.com
59 |     secretName: test-02-example-com
60 | 


--------------------------------------------------------------------------------
/bad-ingress-scanner/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ServiceAccount
 3 | metadata:
 4 |   name: ingress-scanner
 5 |   namespace: ingress-nginx
 6 | ---
 7 | kind: ClusterRole
 8 | apiVersion: rbac.authorization.k8s.io/v1
 9 | metadata:
10 |   name: ingress-scanner
11 |   namespace: ingress-nginx
12 | rules:
13 | - apiGroups:
14 |   - ""
15 |   resources:
16 |   - pods
17 |   - pods/log
18 |   verbs:
19 |   - get
20 |   - list
21 | - apiGroups:
22 |   - networking.k8s.io
23 |   resources:
24 |   - ingresses
25 |   verbs:
26 |   - list
27 |   - watch
28 |   - get
29 | ---
30 | apiVersion: rbac.authorization.k8s.io/v1
31 | kind: ClusterRoleBinding
32 | metadata:
33 |   name: ingress-scanner
34 | roleRef:
35 |   apiGroup: rbac.authorization.k8s.io
36 |   kind: ClusterRole
37 |   name: ingress-scanner
38 | subjects:
39 | - kind: ServiceAccount
40 |   name: ingress-scanner
41 |   namespace: ingress-nginx
42 | ---
43 | apiVersion: batch/v1
44 | kind: Job
45 | metadata:
46 |   name: ingress-scanner
47 |   namespace: ingress-nginx
48 | spec:
49 |   backoffLimit: 10
50 |   completions: 1
51 |   parallelism: 1
52 |   template:
53 |     metadata:
54 |       labels:
55 |         app: ingress-scanner
56 |         job-name: ingress-scanner
57 |     spec:
58 |       affinity:
59 |         nodeAffinity:
60 |           requiredDuringSchedulingIgnoredDuringExecution:
61 |             nodeSelectorTerms:
62 |             - matchExpressions:
63 |               - key: beta.kubernetes.io/os
64 |                 operator: NotIn
65 |                 values:
66 |                 - windows
67 |               - key: node-role.kubernetes.io/worker
68 |                 operator: Exists
69 |       containers:
70 |       - image: rancher/bad-ingress-scanner:latest
71 |         imagePullPolicy: IfNotPresent
72 |         name: ingress-scanner
73 |       restartPolicy: Never
74 |       serviceAccount: ingress-scanner
75 |       serviceAccountName: ingress-scanner
76 |       tolerations:
77 |       - effect: NoExecute
78 |         operator: Exists
79 |       - effect: NoSchedule
80 |         operator: Exists
81 | 


--------------------------------------------------------------------------------
/bad-ingress-scanner/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "####################################################################"
 4 | echo "Scanning ingress controllers..."
 5 | for ingressPod in `kubectl -n ingress-nginx get pods -l app=ingress-nginx -o name | awk -F'/' '{print $2}'`
 6 | do
 7 |   echo "Pod: $ingressPod"
 8 |   kubectl -n ingress-nginx logs "$ingressPod" | grep 'Error obtaining Endpoints for Service' | awk -F '"' '{print $2}' > ./bad-endpoints.list
 9 |   kubectl -n ingress-nginx logs "$ingressPod" | grep 'Error getting SSL certificate' | awk -F '"' '{print $2}' > ./bad-certs.list
10 | done
11 | echo "####################################################################"
12 | echo "Sorting and removing duplicates from lists..."
13 | cat ./bad-endpoints.list | sort | uniq > ./bad-endpoints.list2
14 | mv ./bad-endpoints.list2 ./bad-endpoints.list
15 | cat ./bad-certs.list | sort | uniq > ./bad-certs.list2
16 | mv ./bad-certs.list2 ./bad-certs.list
17 | 
18 | if [[ ! -z `cat ./bad-endpoints.list` ]]
19 | then
20 |   echo "####################################################################"
21 |   echo "Found bad endpoints."
22 |   cat ./bad-endpoints.list
23 | else
24 |   echo "####################################################################"
25 |   echo "No bad endpoints found."
26 | fi
27 | 
28 | if [[ ! -z `cat ./bad-certs.list` ]]
29 | then
30 |   echo "####################################################################"
31 |   echo "Found bad certs."
32 |   cat ./bad-certs.list
33 | else
34 |   echo "####################################################################"
35 |   echo "No bad endpoints found."
36 | fi
37 | 


--------------------------------------------------------------------------------
/change-nodetemplate-owner/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:20.04
 2 | MAINTAINER patrick0057
 3 | ENV TERM xterm
 4 | RUN apt-get update && apt-get install -y apt-transport-https curl gnupg2 && \
 5 | curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
 6 | echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" | tee -a /etc/apt/sources.list.d/kubernetes.list && \
 7 | apt-get update && \
 8 | apt-get install -y kubectl jq && \
 9 | rm -rf /var/lib/apt/lists/* && \
10 | mkdir /root/.kube/
11 | COPY change-nodetemplate-owner.sh /usr/bin/
12 | WORKDIR /root
13 | RUN chmod +x /usr/bin/change-nodetemplate-owner.sh
14 | ENTRYPOINT ["/usr/bin/change-nodetemplate-owner.sh"]
15 | CMD []
16 | 


--------------------------------------------------------------------------------
/change-nodetemplate-owner/README.md:
--------------------------------------------------------------------------------
 1 | ## Update
 2 | Note: As of Rancher v2.3.3 this should no longer be necessary.
 3 | https://github.com/rancher/rancher/issues/12186
 4 | 
 5 | ## Change node template owner
 6 | This script will change your node template owner in Rancher 2.x.  You can run this script as a Docker image or directly as a bash script.  You'll need the cluster ID and the user ID you want to change the ownership to.
 7 | 1. To obtain the cluster ID in the Rancher user interface, Navigate to Global> "Your Cluster Name"> then grab the cluster ID from your address bar.  I have listed an example of the URL and a cluster ID derrived from the URL below.
 8 |    * Example URL: `https://<RANCHER URL>/c/c-48x9z/monitoring`
 9 |    * Derrived cluster ID from above URL: **c-48x9z**
10 | 2. Now we need the user ID of the user to become the new node template owner, navigate to Global> Users> to find the ID.
11 | 3. To run the script using a docker image, make sure your $KUBECONFIG is set to the full path of your Rancher local cluster kube config then run the following command.
12 | 
13 |     ```bash
14 |     docker run -ti -v $KUBECONFIG:/root/.kube/config patrick0057/change-nodetemplate-owner -c <cluster-id> -n <user-id>
15 |     ```
16 | 4. To run the script directly, just download change-nodetemplate-owner.sh, make sure your $KUBECONFIG or ~/.kube/config is pointing to the correct Rancher local cluster then run the following command:
17 | 
18 |     ```bash
19 |     curl -LO https://github.com/rancherlabs/support-tools/raw/master/change-nodetemplate-owner/change-nodetemplate-owner.sh
20 |     ./change-nodetemplate-owner.sh -c <cluster-id> -n <user-id>
21 |     ```
22 | ## Assign a node template to a cluster's node pool.
23 | Assign a node template to a cluster's node pool.  This is useful for situations where the original owner of a cluster has been deleted which also deletes their node templates.  To use this task successfully it is recommended that you create a new node template in the UI before 
24 | using it.  Make sure the node template matches the original ones as closely as possible.  You will be shown options to choose from and
25 | prompted for confirmation.
26 | 
27 | Run script with docker image
28 | 
29 |   ```bash
30 |   docker run -ti -v $KUBECONFIG:/root/.kube/config patrick0057/change-nodetemplate-owner -t changenodetemplate -c <cluster-id>
31 |   ```
32 | Run script from bash command line:
33 | 
34 |   ```bash
35 |   curl -LO https://github.com/rancherlabs/support-tools/raw/master/change-nodetemplate-owner/change-nodetemplate-owner.sh
36 |   ./change-nodetemplate-owner.sh -t changenodetemplate -c <cluster-id>
37 |   ```
38 | 


--------------------------------------------------------------------------------
/change-nodetemplate-owner/change-nodetemplate-owner.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | newowner=''
  3 | clusterid=''
  4 | red=$(tput setaf 1)
  5 | green=$(tput setaf 2)
  6 | reset=$(tput sgr0)
  7 | function helpmenu () {
  8 |         echo "Change the owner of all node templates in a cluster:
  9 |     ${green}change-nodetemplate-owner.sh -c <cluster-id> -n <new-owner-id>${reset}
 10 | 
 11 | Assign a nodetemplate to a cluster's nodepool.  This is useful 
 12 | for situations where the original owner of a cluster has been deleted 
 13 | which also deletes their nodetemplates.  To use this task successfully 
 14 | it is recommended that you create a new nodetemplate in the UI before 
 15 | using it.  Make sure the node template matches the original ones as 
 16 | closely as possible.  You will be shown options to choose from and
 17 | prompted for confirmation.
 18 |     ${green}change-nodetemplate-owner.sh -t changenodetemplate -c <cluster-id>${reset}
 19 | "
 20 |         exit 1
 21 | }
 22 | while getopts "hc:n:t:" opt; do
 23 |     case ${opt} in
 24 |     h) # process option h
 25 |         helpmenu
 26 |         ;;
 27 |     c) # process option c
 28 |         clusterid=$OPTARG
 29 |         ;;
 30 |     n) # process option n
 31 |         newowner=$OPTARG
 32 |         ;;
 33 |     t) # process option t
 34 |         task=$OPTARG
 35 |         ;;
 36 |     \?)
 37 |         helpmenu
 38 |         exit 1
 39 |         ;;
 40 |     esac
 41 | done
 42 | #shift $((OPTIND -1))
 43 | if [[ -z "$task" ]] && [ -z "$clusterid" ]; then
 44 |     helpmenu
 45 |     exit 1
 46 | fi
 47 | if ! hash kubectl 2>/dev/null; then
 48 |     echo "!!!kubectl was not found!!!"
 49 |     echo "!!!download and install with:"
 50 |     echo "Linux users:"
 51 |     echo "curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl"
 52 |     echo "chmod +x ./kubectl"
 53 |     echo "mv ./kubectl /bin/kubectl"
 54 |     echo "!!!"
 55 |     echo "Mac users:"
 56 |     echo "brew install kubernetes-cli"
 57 |     exit 1
 58 | fi
 59 | if ! hash jq 2>/dev/null; then
 60 |     echo '!!!jq was not found!!!'
 61 |     echo "!!!download and install with:"
 62 |     echo "Linux users:"
 63 |     echo "curl -L -O https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64"
 64 |     echo "chmod +x jq-linux64"
 65 |     echo "mv jq-linux64 /bin/jq"
 66 |     echo "!!!"
 67 |     echo "Mac users:"
 68 |     echo "brew install jq"
 69 |     echo "brew link jq"
 70 |     exit 1
 71 | fi
 72 | if ! hash sed 2>/dev/null; then
 73 |     echo '!!!sed was not found!!!'
 74 |     exit 1
 75 | fi
 76 | if [ ! -f ~/.kube/config ] && [ -z "$KUBECONFIG" ]; then
 77 |     echo "${red}~/.kube/config does not exist and \$KUBECONFIG is not set!${reset} "
 78 |     exit 1
 79 | fi
 80 | function yesno () {
 81 |     shopt -s nocasematch
 82 |     response=''
 83 |     i=0
 84 |     while [[ ${response} != 'y' ]] && [[ ${response} != 'n' ]]
 85 |     do
 86 |         i=$((i+1))
 87 |         if [ $i -gt 10 ]; then
 88 |             echo "Script is destined to loop forever, aborting!  Make sure your docker run command has -ti then try again."
 89 |             exit 1
 90 |         fi
 91 |         printf '(y/n): '
 92 |         read -n1 response
 93 |         echo
 94 |     done
 95 |     shopt -u nocasematch
 96 | }
 97 | echo
 98 | kubectl get node
 99 | echo
100 | 
101 | if [ "$task" = '' ]; then
102 |     if [[ -z "$clusterid" ]] || [[ -z "$newowner" ]];
103 |     then
104 |             helpmenu
105 |             exit 1
106 |     fi
107 |     echo -e "${green}Cluster: $clusterid${reset}"
108 |     echo -e "${green}New Owner: $newowner${reset}"
109 |     for nodepoolid in $(kubectl -n $clusterid get nodepool --no-headers -o=custom-columns=NAME:.metadata.name); do
110 |         nodetemplateid=$(kubectl -n $clusterid get nodepool $nodepoolid -o json | jq -r .spec.nodeTemplateName | cut -d : -f 2)
111 |         oldowner=$(kubectl -n $clusterid get nodepool $nodepoolid -o json | jq -r .spec.nodeTemplateName | cut -d : -f 1)
112 |         echo -e "${red}creating new nodetemplate under $newowner's namespace${reset}"
113 |         kubectl -n $oldowner get nodetemplate $nodetemplateid -o yaml | sed 's/'$oldowner'/'$newowner'/g' | kubectl apply --namespace=$newowner -f -
114 |         echo -e "${red}patching $nodepoolid old owner: $oldowner new owner: $newowner${reset}"
115 |         kubectl -n $clusterid patch nodepool $nodepoolid -p '{"spec":{"nodeTemplateName": "'$newowner:$nodetemplateid'"}}' --type=merge
116 |     done
117 |     echo
118 |     echo
119 |     echo -e "${green}We're all done!  If see you kubectl complaining about duplicate nodetemplates, this is safe to ignore.${reset}"
120 | fi
121 | 
122 | if [ "$task" = 'changenodetemplate' ]; then
123 |     if [ -z "$clusterid" ]
124 |     then
125 |             helpmenu
126 |             exit 1
127 |     fi
128 |     for nodepoolid in $(kubectl -n $clusterid get nodepool --no-headers -o=custom-columns=NAME:.metadata.name); do
129 |         nodetemplateid=$(kubectl -n $clusterid get nodepool $nodepoolid -o json | jq -r .spec.nodeTemplateName | cut -d : -f 2)
130 |         hostnameprefix=$(kubectl -n $clusterid get nodepool $nodepoolid -o json | jq -r .spec.hostnamePrefix | cut -d : -f 2)
131 |         oldowner=$(kubectl -n $clusterid get nodepool $nodepoolid -o json | jq -r .spec.nodeTemplateName | cut -d : -f 1)
132 |         echo "${green}-----------------------------------------------------------------------${reset}"
133 |         echo "${green}Name prefix: ${hostnameprefix}${reset}"
134 |         echo "${green}Nodepool ID: ${nodepoolid}${reset}"
135 |         echo "${green}Owner ID: ${oldowner}${reset}"
136 |         echo "${green}Nodetemplate ID: ${nodetemplateid}${reset}"
137 |         echo "Would you like to change the node template for nodepool called ${hostnameprefix}?"
138 |         
139 |         yesno
140 |         if [ ${response} == 'y' ]
141 |         then
142 |             echo "nodetemplate ID's available for selection: "
143 |             echo "${green}-${reset}"
144 |             IFS=$'\n'
145 |             echo "${green}name: ID${reset}"
146 |             for nt_namespace_name in $(kubectl get nodetemplate --all-namespaces -o=custom-columns=NAMESPACE:.metadata.namespace,NAME:.metadata.name --no-headers); do
147 |                 nodetemplateid1=$(echo ${nt_namespace_name} | sed -e's/  */ /g' | cut -d" " -f 2)
148 |                 oldowner1=$(echo ${nt_namespace_name} | sed -e's/  */ /g' | cut -d" " -f 1)
149 |                 nodetemplateid_displayname1=$(kubectl -n $oldowner1 get nodetemplate $nodetemplateid1 -o json | jq -r .spec.displayName | cut -d : -f 2)
150 |                 echo "${green}${nodetemplateid_displayname1}: ${nodetemplateid1}${reset}"
151 |             done
152 |             IFS=$' '
153 |             echo "${green}-${reset}"
154 |             echo "What should the new nodetemplate ID be?"
155 |             read new_nodetemplateid
156 |             echo "I have ${new_nodetemplateid}, should I proceed?"
157 |             yesno
158 |             if [ ${response} == 'y' ]
159 |             then
160 |                 echo "${green}OK making changes${reset}"
161 |                 echo -e "${red}patching $nodepoolid old template ID: $nodetemplateid new template ID: ${new_nodetemplateid}${reset}"
162 |                 kubectl -n $clusterid patch nodepool $nodepoolid -p '{"spec":{"nodeTemplateName": "'$oldowner:${new_nodetemplateid}'"}}' --type=merge
163 |             else
164 |                 echo "${green}No changes made, moving on.${reset}"
165 |             fi
166 |         fi
167 | 
168 |     done
169 |         echo "${green}-----------------------------------------------------------------------${reset}"
170 | fi
171 | 


--------------------------------------------------------------------------------
/charts/index.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | entries:
 3 |   rancher:
 4 |   - apiVersion: v1
 5 |     appVersion: 0.0.1
 6 |     created: "2021-03-04T11:39:06.0872245-06:00"
 7 |     description: Rancher Systems Info
 8 |     digest: b2d217c3c7ab839bb11151cdd1d99ab8920f0241dcda74764e3932969f17fe0c
 9 |     home: https://rancher.com
10 |     icon: https://github.com/rancher/ui/blob/master/public/assets/images/logos/welcome-cow.svg
11 |     keywords:
12 |     - rancher
13 |     - support
14 |     maintainers:
15 |     - email: charts@rancher.com
16 |       name: Rancher Labs
17 |     name: rancher
18 |     sources:
19 |     - https://github.com/rancherlabs/systems-info
20 |     urls:
21 |     - https://rancherlabs.github.io/systems-info/charts/rancher-0.0.1.tgz
22 |     version: 0.0.1
23 | generated: "2021-03-04T11:39:06.0847725-06:00"
24 | 


--------------------------------------------------------------------------------
/charts/rancher-0.0.1.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rancherlabs/support-tools/2fa26cd1c13cc329f5553f88adfe693ee978848e/charts/rancher-0.0.1.tgz


--------------------------------------------------------------------------------
/charts/robots.txt:
--------------------------------------------------------------------------------
1 | "User-Agent: *nDisallow: /"
2 | 


--------------------------------------------------------------------------------
/charts/systems-info/.helmignore:
--------------------------------------------------------------------------------
 1 | # Patterns to ignore when building packages.
 2 | # This supports shell glob matching, relative path matching, and
 3 | # negation (prefixed with !). Only one pattern per line.
 4 | .DS_Store
 5 | # Common VCS dirs
 6 | .git/
 7 | .gitignore
 8 | .bzr/
 9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *.orig
18 | *~
19 | # Various IDEs
20 | .project
21 | .idea/
22 | *.tmproj
23 | .vscode/
24 | 


--------------------------------------------------------------------------------
/charts/systems-info/Chart.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | name: rancher
 3 | description: Rancher Systems Info
 4 | version: 0.0.1
 5 | appVersion: 0.0.1
 6 | home: https://rancher.com
 7 | icon: https://github.com/rancher/ui/blob/master/public/assets/images/logos/welcome-cow.svg
 8 | keywords:
 9 |   - rancher
10 |   - support
11 | sources:
12 |   - https://github.com/rancherlabs/systems-info
13 | maintainers:
14 |   - name: Rancher Labs
15 |     email: charts@rancher.com
16 | 


--------------------------------------------------------------------------------
/charts/systems-info/questions.yml:
--------------------------------------------------------------------------------
 1 | labels:
 2 |   io.cattle.role: project # options are cluster/project
 3 | categories:
 4 |   - Support
 5 | Name: systems-info
 6 | Namespace: systems-info
 7 | questions:
 8 | - variable: defaultImage
 9 |   default: true
10 |   description: "Use default Docker image"
11 |   label: Use Default Image
12 |   type: boolean
13 |   show_subquestion_if: false
14 |   group: "Container Images"
15 |   subquestions:
16 |   - variable: image.repository
17 |     default: "docker.io/rancher/systems-info"
18 |     description: "Docker image repository"
19 |     type: string
20 |     label: Image Repository
21 |   - variable: image.tag
22 |     default: "v0.0.1"
23 |     description: "Docker image tag"
24 |     type: string
25 |     label: Image Tag
26 | - variable: schedule
27 |   required: true
28 |   default: '0 0 * * 1'
29 |   description: "Backup schedule in crontab format"
30 |   type: string
31 |   label: "Backup schedule"
32 | - variable: rancher_name
33 |   default: "Rancher"
34 |   description: "Your human readable for this install"
35 |   type: string
36 |   label: Rancher Name
37 |   required: true
38 |   group: "General Settings"
39 | - variable: to_address
40 |   default: ""
41 |   description: "Email address for sending report to"
42 |   type: string
43 |   label: To address
44 |   required: true
45 |   group: "General Settings"
46 | - variable: send_to_support
47 |   default: "true"
48 |   description: "Would you like this report sent to Rancher Support?"
49 |   type: string
50 |   label: Send report to Rancher Support
51 |   required: false
52 |   group: "General Settings"
53 | - variable: smtp_host
54 |   default: ""
55 |   description: "SMTP server hostname"
56 |   type: string
57 |   label: SMTP Server
58 |   required: true
59 |   group: "Mail Server Settings"
60 | - variable: smtp_port
61 |   default: "587"
62 |   description: "SMTP server port"
63 |   type: string
64 |   label: SMTP port
65 |   required: true
66 |   group: "Mail Server Settings"
67 | - variable: smtp_user
68 |   default: ""
69 |   description: "SMTP username"
70 |   type: string
71 |   label: SMTP username
72 |   required: true
73 |   group: "Mail Server Settings"
74 | - variable: smtp_pass
75 |   default: ""
76 |   description: "SMTP password"
77 |   type: password
78 |   label: SMTP password
79 |   required: true
80 |   group: "Mail Server Settings"
81 | - variable: from_address
82 |   default: ""
83 |   description: "From address"
84 |   type: string
85 |   label: From address
86 |   required: true
87 |   group: "Mail Server Settings"
88 | 


--------------------------------------------------------------------------------
/charts/systems-info/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{/* vim: set filetype=mustache: */}}
 2 | {{/*
 3 | Expand the name of the chart.
 4 | */}}
 5 | {{- define "systems-information.name" -}}
 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
 7 | {{- end -}}
 8 | 
 9 | {{/*
10 | Create a default fully qualified app name.
11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
12 | If release name contains chart name it will be used as a full name.
13 | */}}
14 | {{- define "systems-information.fullname" -}}
15 | {{- if .Values.fullnameOverride -}}
16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
17 | {{- else -}}
18 | {{- $name := default .Chart.Name .Values.nameOverride -}}
19 | {{- if contains $name .Release.Name -}}
20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}}
21 | {{- else -}}
22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
23 | {{- end -}}
24 | {{- end -}}
25 | {{- end -}}
26 | 
27 | {{/*
28 | Create chart name and version as used by the chart label.
29 | */}}
30 | {{- define "systems-information.chart" -}}
31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
32 | {{- end -}}
33 | 
34 | {{/*
35 | Common labels
36 | */}}
37 | {{- define "systems-information.labels" -}}
38 | helm.sh/chart: {{ include "systems-information.chart" . }}
39 | {{ include "systems-information.selectorLabels" . }}
40 | {{- if .Chart.AppVersion }}
41 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
42 | {{- end }}
43 | app.kubernetes.io/managed-by: {{ .Release.Service }}
44 | {{- end -}}
45 | 
46 | {{/*
47 | Selector labels
48 | */}}
49 | {{- define "systems-information.selectorLabels" -}}
50 | app.kubernetes.io/name: {{ include "systems-information.name" . }}
51 | app.kubernetes.io/instance: {{ .Release.Name }}
52 | {{- end -}}
53 | 
54 | {{/*
55 | Create the name of the service account to use
56 | */}}
57 | {{- define "systems-information.serviceAccountName" -}}
58 | {{- if .Values.serviceAccount.create -}}
59 |     {{ default (include "systems-information.fullname" .) .Values.serviceAccount.name }}
60 | {{- else -}}
61 |     {{ default "default" .Values.serviceAccount.name }}
62 | {{- end -}}
63 | {{- end -}}
64 | 


--------------------------------------------------------------------------------
/charts/systems-info/templates/clusterrolebinding.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: rbac.authorization.k8s.io/v1
 2 | kind: ClusterRoleBinding
 3 | metadata:
 4 |   name: systems-info
 5 | roleRef:
 6 |   apiGroup: rbac.authorization.k8s.io
 7 |   kind: ClusterRole
 8 |   name: cluster-admin
 9 | subjects:
10 |   - kind: ServiceAccount
11 |     name: systems-info
12 |     namespace: systems-info
13 | 


--------------------------------------------------------------------------------
/charts/systems-info/templates/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: batch/v1beta1
 2 | kind: CronJob
 3 | metadata:
 4 |   name: systems-info
 5 | spec:
 6 |   schedule: {{ .Values.schedule | quote }}
 7 |   jobTemplate:
 8 |     spec:
 9 |       template:
10 |         spec:
11 |           containers:
12 |           - env:
13 |             - name: from_address
14 |               value: {{ .Values.from_address | quote }}
15 |             - name: rancher_name
16 |               value: {{ .Values.rancher_name | quote }}
17 |             - name: send_to_support
18 |               value: {{ .Values.send_to_support | quote }}
19 |             - name: smtp_host
20 |               value: {{ .Values.smtp_host | quote }}
21 |             - name: smtp_port
22 |               value: {{ .Values.smtp_port | quote }}
23 |             - name: to_address
24 |               value: {{ .Values.to_address | quote }}
25 |             - name: smtp_user
26 |               valueFrom:
27 |                 secretKeyRef:
28 |                   key: smtp_user
29 |                   name: mail-config
30 |                   optional: false
31 |             - name: smtp_pass
32 |               valueFrom:
33 |                 secretKeyRef:
34 |                   key: smtp_pass
35 |                   name: mail-config
36 |                   optional: false
37 |             image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
38 |             imagePullPolicy: {{ .Values.image.pullPolicy }}
39 |             name: system-information
40 |           dnsPolicy: ClusterFirst
41 |           restartPolicy: Never
42 |           schedulerName: default-scheduler
43 |           serviceAccountName: systems-info
44 | 


--------------------------------------------------------------------------------
/charts/systems-info/templates/namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 |   name: systems-info
5 | 


--------------------------------------------------------------------------------
/charts/systems-info/templates/secret.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Secret
 3 | metadata:
 4 |   name: mail-config
 5 |   namespace: systems-info
 6 | type: "Opaque"
 7 | data:
 8 |   smtp_user: {{ default "" .Values.smtp_user | b64enc | quote }}
 9 |   smtp_pass: {{ default "" .Values.smtp_pass | b64enc | quote }}
10 | 


--------------------------------------------------------------------------------
/charts/systems-info/templates/serviceaccount.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ServiceAccount
3 | metadata:
4 |   name: systems-info
5 |   namespace: systems-info
6 | 


--------------------------------------------------------------------------------
/charts/systems-info/values.yaml:
--------------------------------------------------------------------------------
 1 | replicaCount: 1
 2 | 
 3 | image:
 4 |   repository: docker.io/rancher/systems-info
 5 |   tag: v0.0.1
 6 |   pullPolicy: IfNotPresent
 7 | 
 8 | imagePullSecrets: []
 9 | 
10 | Name: systems-info
11 | Namespace: systems-info
12 | 
13 | rbac:
14 |   create: true
15 |   clusterAdminRole: true
16 | 
17 | serviceAccount:
18 |   create: true
19 |   name: systems-info
20 | 
21 | resources:
22 |   limits:
23 |     cpu: 100m
24 |     memory: 100Mi
25 |   requests:
26 |     cpu: 100m
27 |     memory: 100Mi
28 | 
29 | secretName: "mail-config"
30 | smtp_user: ""
31 | smtp_pass: ""
32 | 
33 | rancher_name: ""
34 | schedule: "0 0 * * 1"
35 | smtp_host: ""
36 | smtp_port: "587"
37 | to_address: ""
38 | from_address: ""
39 | send_to_support: "true"
40 | 


--------------------------------------------------------------------------------
/charts/systems-information/Chart.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | name: systems-info
 3 | description: Rancher Systems Info
 4 | 
 5 | # A chart can be either an 'application' or a 'library' chart.
 6 | #
 7 | # Application charts are a collection of templates that can be packaged into versioned archives
 8 | # to be deployed.
 9 | #
10 | # Library charts provide useful utilities or functions for the chart developer. They're included as
11 | # a dependency of application charts to inject those utilities and functions into the rendering
12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed.
13 | type: application
14 | 
15 | # This is the chart version. This version number should be incremented each time you make changes
16 | # to the chart and its templates, including the app version.
17 | version: 0.0.1
18 | 
19 | # This is the version number of the application being deployed. This version number should be
20 | # incremented each time you make changes to the application.
21 | appVersion: 0.0.1
22 | 
23 | keywords:
24 | - rancher
25 | - support
26 | home: https://support.rancher.com
27 | sources:
28 | - https://github.com/rancherlabs/systems-info
29 | maintainers:
30 | - name: mattmattox
31 |   email: matt.mattox@suse.com
32 | icon: https://rancher.com/img/brand-guidelines/assets/logos/png/color/rancher-logo-stacked-color.png
33 | 


--------------------------------------------------------------------------------
/charts/systems-information/questions.yml:
--------------------------------------------------------------------------------
 1 | labels:
 2 |   io.cattle.role: project # options are cluster/project
 3 | categories:
 4 |   - Support
 5 | Name: systems-info
 6 | Namespace: systems-info
 7 | questions:
 8 | - variable: defaultImage
 9 |   default: true
10 |   description: "Use default Docker image"
11 |   label: Use Default Image
12 |   type: boolean
13 |   show_subquestion_if: false
14 |   group: "Container Images"
15 |   subquestions:
16 |   - variable: image.repository
17 |     default: "docker.io/rancher/systems-info"
18 |     description: "Docker image repository"
19 |     type: string
20 |     label: Image Repository
21 |   - variable: image.tag
22 |     default: "v0.0.1"
23 |     description: "Docker image tag"
24 |     type: string
25 |     label: Image Tag
26 | - variable: schedule
27 |   required: true
28 |   default: '0 0 * * 1'
29 |   description: "Backup schedule in crontab format"
30 |   type: string
31 |   label: "Backup schedule"
32 | - variable: rancher_name
33 |   default: "Rancher"
34 |   description: "Your human readable for this install"
35 |   type: string
36 |   label: Rancher Name
37 |   required: true
38 |   group: "General Settings"
39 | - variable: to_address
40 |   default: ""
41 |   description: "Email address for sending report to"
42 |   type: string
43 |   label: To address
44 |   required: true
45 |   group: "General Settings"
46 | - variable: send_to_support
47 |   default: "true"
48 |   description: "Would you like this report sent to Rancher Support?"
49 |   type: string
50 |   label: Send report to Rancher Support
51 |   required: false
52 |   group: "General Settings"
53 | - variable: smtp_host
54 |   default: ""
55 |   description: "SMTP server hostname"
56 |   type: string
57 |   label: SMTP Server
58 |   required: true
59 |   group: "Mail Server Settings"
60 | - variable: smtp_port
61 |   default: "587"
62 |   description: "SMTP server port"
63 |   type: string
64 |   label: SMTP port
65 |   required: true
66 |   group: "Mail Server Settings"
67 | - variable: smtp_user
68 |   default: ""
69 |   description: "SMTP username"
70 |   type: string
71 |   label: SMTP username
72 |   required: true
73 |   group: "Mail Server Settings"
74 | - variable: smtp_pass
75 |   default: ""
76 |   description: "SMTP password"
77 |   type: password
78 |   label: SMTP password
79 |   required: true
80 |   group: "Mail Server Settings"
81 | - variable: from_address
82 |   default: ""
83 |   description: "From address"
84 |   type: string
85 |   label: From address
86 |   required: true
87 |   group: "Mail Server Settings"
88 | 


--------------------------------------------------------------------------------
/charts/systems-information/templates/clusterrolebinding.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: rbac.authorization.k8s.io/v1
 2 | kind: ClusterRoleBinding
 3 | metadata:
 4 |   name: systems-info
 5 | roleRef:
 6 |   apiGroup: rbac.authorization.k8s.io
 7 |   kind: ClusterRole
 8 |   name: cluster-admin
 9 | subjects:
10 |   - kind: ServiceAccount
11 |     name: systems-info
12 |     namespace: systems-info
13 | 


--------------------------------------------------------------------------------
/charts/systems-information/templates/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: batch/v1beta1
 2 | kind: CronJob
 3 | metadata:
 4 |   name: systems-info
 5 | spec:
 6 |   schedule: {{ .Values.schedule | quote }}
 7 |   jobTemplate:
 8 |     spec:
 9 |       template:
10 |         spec:
11 |           containers:
12 |           - env:
13 |             - name: from_address
14 |               value: {{ .Values.from_address | quote }}
15 |             - name: rancher_name
16 |               value: {{ .Values.rancher_name | quote }}
17 |             - name: send_to_support
18 |               value: {{ .Values.send_to_support | quote }}
19 |             - name: smtp_host
20 |               value: {{ .Values.smtp_host | quote }}
21 |             - name: smtp_port
22 |               value: {{ .Values.smtp_port | quote }}
23 |             - name: to_address
24 |               value: {{ .Values.to_address | quote }}
25 |             - name: smtp_user
26 |               valueFrom:
27 |                 secretKeyRef:
28 |                   key: smtp_user
29 |                   name: mail-config
30 |                   optional: false
31 |             - name: smtp_pass
32 |               valueFrom:
33 |                 secretKeyRef:
34 |                   key: smtp_pass
35 |                   name: mail-config
36 |                   optional: false
37 |             image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
38 |             imagePullPolicy: {{ .Values.image.pullPolicy }}
39 |             name: system-information
40 |           dnsPolicy: ClusterFirst
41 |           restartPolicy: Never
42 |           schedulerName: default-scheduler
43 |           serviceAccountName: systems-info
44 | 


--------------------------------------------------------------------------------
/charts/systems-information/templates/namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 |   name: systems-info
5 | 


--------------------------------------------------------------------------------
/charts/systems-information/templates/secret.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Secret
 3 | metadata:
 4 |   name: mail-config
 5 |   namespace: systems-info
 6 | type: "Opaque"
 7 | data:
 8 |   smtp_user: {{ default "" .Values.smtp_user | b64enc | quote }}
 9 |   smtp_pass: {{ default "" .Values.smtp_pass | b64enc | quote }}
10 | 


--------------------------------------------------------------------------------
/charts/systems-information/templates/serviceaccount.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ServiceAccount
3 | metadata:
4 |   name: systems-info
5 |   namespace: systems-info
6 | 


--------------------------------------------------------------------------------
/charts/systems-information/values.yaml:
--------------------------------------------------------------------------------
 1 | replicaCount: 1
 2 | 
 3 | image:
 4 |   repository: docker.io/rancher/systems-info
 5 |   tag: v0.0.1
 6 |   pullPolicy: IfNotPresent
 7 | 
 8 | imagePullSecrets: []
 9 | 
10 | Name: systems-info
11 | Namespace: systems-info
12 | 
13 | rbac:
14 |   create: true
15 |   clusterAdminRole: true
16 | 
17 | serviceAccount:
18 |   create: true
19 |   name: systems-info
20 | 
21 | resources:
22 |   limits:
23 |     cpu: 100m
24 |     memory: 100Mi
25 |   requests:
26 |     cpu: 100m
27 |     memory: 100Mi
28 | 
29 | secretName: "mail-config"
30 | smtp_user: ""
31 | smtp_pass: ""
32 | 
33 | rancher_name: ""
34 | schedule: "0 0 * * 1"
35 | smtp_host: ""
36 | smtp_port: "587"
37 | to_address: ""
38 | from_address: ""
39 | send_to_support: "true"
40 | 


--------------------------------------------------------------------------------
/cleanup-etcd-part-files/README.md:
--------------------------------------------------------------------------------
 1 | # Workaround ETCD Snapshots Part Files Issue
 2 | To workaround issue [gh-30662](https://github.com/rancher/rancher/issues/30662) please select one of the following deployment options.
 3 | 
 4 | ## Option A - cleanup file temp files
 5 | This script runs on each etcd node in a while true loop every 5 minutes looking for leftover part files. If it finds part files older than 15 minutes, it will delete them. This is to prevent deleting a part file that is currently in-use.
 6 | 
 7 | ### Changes to restore process
 8 | None, the restore process is unchanged.
 9 | 
10 | ### Installation
11 | ```
12 | kubectl apply -f delete-part-files.yaml
13 | ```
14 | 
15 | ## Option B - alternative s3 snapshots
16 | This script replaces the recurring snapshot functionality in RKE with a Kubernetes job that runs every 12 hours.
17 | 
18 | ### Changes to restore process
19 | - You will need to manually take a new snapshot
20 | - Download the snapshot from S3 on all etcd nodes
21 | - Rename the old snapshot to the new snapshot filename
22 | - Restore the S3 snapshot in Rancher UI by selecting the new snapshot name
23 | 
24 | ### Installation
25 | - Disable recurring snapshots in Rancher/RKE
26 | - At a minimum, `alt-s3-sync.yaml` must be modified (remember to base64 the values) to reflect the s3 details
27 | ```
28 | kubectl apply -f alt-s3-sync.yaml
29 | ```
30 | 


--------------------------------------------------------------------------------
/cleanup-etcd-part-files/alt-s3-sync.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: Secret
 4 | metadata:
 5 |   labels:
 6 |     app: alt-etcd-s3-sync
 7 |   name: etcd-s3
 8 |   namespace: kube-system
 9 | type: Opaque
10 | data:
11 |   # all values should be base64 encoded (ie: echo -n '123456' | base64)
12 |   S3_ACCESS_KEY:
13 |   S3_BUCKET_NAME:
14 |   S3_BUCKET_REGION:
15 |   S3_ENDPOINT:
16 |   S3_FOLDER:
17 |   S3_SECRET_KEY:
18 | ---
19 | apiVersion: apps/v1
20 | kind: Deployment
21 | metadata:
22 |   labels:
23 |     app: rolling-etcd-snapshots
24 |   name: rolling-etcd-snapshots
25 |   namespace: kube-system
26 | spec:
27 |   replicas: 1
28 |   selector:
29 |     matchLabels:
30 |       app: rolling-etcd-snapshots
31 |   template:
32 |     metadata:
33 |       labels:
34 |         app: rolling-etcd-snapshots
35 |     spec:
36 |       affinity:
37 |         nodeAffinity:
38 |           requiredDuringSchedulingIgnoredDuringExecution:
39 |             nodeSelectorTerms:
40 |             - matchExpressions:
41 |               - key: beta.kubernetes.io/os
42 |                 operator: NotIn
43 |                 values:
44 |                 - windows
45 |               - key: node-role.kubernetes.io/etcd
46 |                 operator: In
47 |                 values:
48 |                 - "true"
49 |       containers:
50 |       - args:
51 |         - /opt/rke-tools/rke-etcd-backup
52 |         - etcd-backup
53 |         - save
54 |         - --cacert
55 |         - /etc/kubernetes/ssl/kube-ca.pem
56 |         - --cert
57 |         - /etc/kubernetes/ssl/kube-node.pem
58 |         - --key
59 |         - /etc/kubernetes/ssl/kube-node-key.pem
60 |         - --s3-backup=true
61 |         - --creation=12h
62 |         - --retention=72h
63 |         envFrom:
64 |         - secretRef:
65 |             name: etcd-s3
66 |             optional: false
67 |         image: rancher/rke-tools:v0.1.66
68 |         imagePullPolicy: IfNotPresent
69 |         name: rolling-etcd-snapshots
70 |         volumeMounts:
71 |         - mountPath: /backup
72 |           name: rke-tools
73 |         - mountPath: /etc/kubernetes
74 |           name: k8s-certs
75 |       hostNetwork: true
76 |       tolerations:
77 |       - effect: NoExecute
78 |         key: node-role.kubernetes.io/etcd
79 |         operator: Equal
80 |         value: "true"
81 |       volumes:
82 |       - hostPath:
83 |           path: /opt/rke/etcd-snapshots
84 |           type: ""
85 |         name: rke-tools
86 |       - hostPath:
87 |           path: /etc/kubernetes
88 |           type: ""
89 |         name: k8s-certs
90 | 


--------------------------------------------------------------------------------
/cleanup-etcd-part-files/delete-part-files.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: DaemonSet
 3 | metadata:
 4 |   name: cleanup-etcd
 5 |   namespace: kube-system
 6 |   labels:
 7 |     app: cleanup-etcd
 8 | spec:
 9 |   selector:
10 |     matchLabels:
11 |       app: cleanup-etcd
12 |   template:
13 |     metadata:
14 |       labels:
15 |         app: cleanup-etcd
16 |     spec:
17 |       affinity:
18 |         nodeAffinity:
19 |           requiredDuringSchedulingIgnoredDuringExecution:
20 |             nodeSelectorTerms:
21 |             - matchExpressions:
22 |               - key: beta.kubernetes.io/os
23 |                 operator: NotIn
24 |                 values:
25 |                 - windows
26 |               - key: node-role.kubernetes.io/etcd
27 |                 operator: In
28 |                 values:
29 |                 - "true"
30 |       containers:
31 |       - args:
32 |         - while true;
33 |           do
34 |             echo "Before cleanup";
35 |             ls -lh /opt/rke/etcd-snapshots/;
36 |             find /opt/rke/etcd-snapshots/ -mindepth 1 -type f -name *.* -mmin +15 | grep -v '.zip' | xargs rm -f;
37 |             echo "Post cleanup";
38 |             ls -lh /opt/rke/etcd-snapshots/;
39 |             echo "Sleeping...";
40 |             sleep 360;
41 |           done;
42 |         command:
43 |         - /bin/sh
44 |         - -c
45 |         image: busybox
46 |         name: cleanup-etcd
47 |         volumeMounts:
48 |         - mountPath: /opt/rke
49 |           name: rke
50 |       tolerations:
51 |       - effect: NoExecute
52 |         key: node-role.kubernetes.io/etcd
53 |         operator: Equal
54 |         value: "true"
55 |       volumes:
56 |       - hostPath:
57 |           path: /opt/rke
58 |           type: ""
59 |         name: rke
60 | 


--------------------------------------------------------------------------------
/cleanup-evicted-pods/README.md:
--------------------------------------------------------------------------------
 1 | # Cleanup evicted pods left behind after disk pressure
 2 | When a node starts to evict pods under disk pressure, the evicted pods are left behind. All the resources like volumes, IP, containers, etc will be cleaned up and delete. But the pod object will be left behind in "evicted" status. Per upstream this is [intentional](https://github.com/kubernetes/kubernetes/issues/54525#issuecomment-340035375)
 3 | 
 4 | ## Workaround
 5 | 
 6 | ### Manual cleanup
 7 | NOTE: This script is designed to work on Linux machines.
 8 | ```bash
 9 | kubectl get pods --all-namespaces -ojson | jq -r '.items[] | select(.status.reason!=null) | select(.status.reason | contains("Evicted")) | .metadata.name + " " + .metadata.namespace' | xargs -n2 -l bash -c 'kubectl delete pods $0 --namespace=$1'
10 | ```
11 | 
12 | ### Automatic cleanup
13 | This is a cronjob that runs every 30 mins inside the cluster that will find and remove any pods with the status of "Evicted."
14 | 
15 | ```bash
16 | kubectl apply -f deploy.yaml
17 | ```
18 | 
19 | NOTE: This YAML uses the image `rancherlabs/swiss-army-knife`.
20 | 


--------------------------------------------------------------------------------
/cleanup-evicted-pods/deploy.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: batch/v1
 2 | kind: CronJob
 3 | metadata:
 4 |   name: cleanup-evicted-pods
 5 |   namespace: kube-system
 6 |   labels:
 7 |     app: cleanup-evicted-pods
 8 | spec:
 9 |   schedule: "*/30 * * * *"
10 |   concurrencyPolicy: Forbid
11 |   jobTemplate:
12 |     spec:
13 |       template:
14 |         spec:
15 |           containers:
16 |           - name: cleanup-evicted-pods
17 |             image: rancherlabs/swiss-army-knife
18 |             imagePullPolicy: IfNotPresent
19 |             command: ["sh", "-c", "kubectl get pods --all-namespaces --field-selector 'status.phase==Failed' -o json | kubectl delete -f -"]
20 |           restartPolicy: OnFailure
21 |           serviceAccount: rke-job-deployer
22 |           serviceAccountName: rke-job-deployer
23 | 


--------------------------------------------------------------------------------
/cleanup-evicted-pods/serviceaccount.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ServiceAccount
3 | metadata:
4 |   name: rke-job-deployer
5 |   namespace: kube-system
6 | 


--------------------------------------------------------------------------------
/collection/rancher/v1.6/logs-collector/README.md:
--------------------------------------------------------------------------------
 1 | # rancher-logs-collector
 2 | 
 3 | The script needs to be downloaded and run directly on the host using the `root` user or using `sudo`.
 4 | 
 5 | ## How to use
 6 | 
 7 | * Download the script and save as: `rancher16_logs_collector.sh`
 8 | * Make sure the script is executable: `chmod +x rancher16_logs_collector.sh`
 9 | * Run the script: `./rancher16_logs_collector.sh`
10 | 


--------------------------------------------------------------------------------
/collection/rancher/v1.6/logs-collector/rancher16_logs_collector.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #Check if we're running as root.
  3 | if [[ $EUID -ne 0 ]]; then
  4 |   echo "This script must be run as root"
  5 |   exit 1
  6 | fi
  7 | 
  8 | # Create temp directory
  9 | TMPDIR=$(mktemp -d)
 10 | 
 11 | #Set TIMEOUT in seconds for select commands
 12 | TIMEOUT=60
 13 | 
 14 | function timeout_start_msg() {
 15 |   TIMEOUT_CMD=$1
 16 |   TIMEOUT_EXCEEDED_MSG="$TIMEOUT_CMD command timed out, killing process to prevent hanging."
 17 |   echo "Executing $TIMEOUT_CMD with a timeout of $TIMEOUT seconds."
 18 | }
 19 | function timeout_done_msg() {
 20 |   echo "Execution of $TIMEOUT_CMD has finished."
 21 |   echo
 22 | }
 23 | function timeout_cmd() {
 24 |   WPID=$!; sleep $TIMEOUT && if kill -0 $WPID > /dev/null 2>&1; then echo $TIMEOUT_EXCEEDED_MSG; kill $WPID &> /dev/null; fi & KPID=$!; wait $WPID
 25 | }
 26 | 
 27 | # System info
 28 | mkdir -p $TMPDIR/systeminfo
 29 | hostname > $TMPDIR/systeminfo/hostname 2>&1
 30 | hostname -f > $TMPDIR/systeminfo/hostnamefqdn 2>&1
 31 | cat /etc/hosts > $TMPDIR/systeminfo/etchosts 2>&1
 32 | cat /etc/resolv.conf > $TMPDIR/systeminfo/etcresolvconf 2>&1
 33 | date > $TMPDIR/systeminfo/date 2>&1
 34 | free -m > $TMPDIR/systeminfo/freem 2>&1
 35 | uptime > $TMPDIR/systeminfo/uptime 2>&1
 36 | dmesg -T > $TMPDIR/systeminfo/dmesg 2>&1
 37 | df -h > $TMPDIR/systeminfo/dfh 2>&1
 38 | if df -i >/dev/null 2>&1; then
 39 |   df -i > $TMPDIR/systeminfo/dfi 2>&1
 40 | fi
 41 | lsmod > $TMPDIR/systeminfo/lsmod 2>&1
 42 | mount > $TMPDIR/systeminfo/mount 2>&1
 43 | ps aux > $TMPDIR/systeminfo/psaux 2>&1
 44 | 
 45 | timeout_start_msg "lsof"
 46 | lsof -Pn >$TMPDIR/systeminfo/lsof 2>&1 & timeout_cmd
 47 | timeout_done_msg
 48 | 
 49 | if $(command -v sysctl >/dev/null 2>&1); then
 50 |   sysctl -a > $TMPDIR/systeminfo/sysctla 2>/dev/null
 51 | fi
 52 | # OS: Ubuntu
 53 | if $(command -v ufw >/dev/null 2>&1); then
 54 |   ufw status > $TMPDIR/systeminfo/ubuntu-ufw 2>&1
 55 | fi
 56 | if $(command -v apparmor_status >/dev/null 2>&1); then
 57 |   apparmor_status > $TMPDIR/systeminfo/ubuntu-apparmorstatus 2>&1
 58 | fi
 59 | # OS: RHEL
 60 | if [ -f /etc/redhat-release ]; then
 61 |   systemctl status NetworkManager > $TMPDIR/systeminfo/rhel-statusnetworkmanager 2>&1
 62 |   systemctl status firewalld > $TMPDIR/systeminfo/rhel-statusfirewalld 2>&1
 63 |   if $(command -v getenforce >/dev/null 2>&1); then
 64 |   getenforce > $TMPDIR/systeminfo/rhel-getenforce 2>&1
 65 |   fi
 66 | fi
 67 | 
 68 | # Docker
 69 | mkdir -p $TMPDIR/docker
 70 | timeout_start_msg "docker info"
 71 | docker info >$TMPDIR/docker/dockerinfo 2>&1 & timeout_cmd
 72 | timeout_done_msg
 73 | 
 74 | timeout_start_msg "docker ps -a"
 75 | docker ps -a >$TMPDIR/docker/dockerpsa 2>&1
 76 | timeout_done_msg
 77 | 
 78 | timeout_start_msg "docker stats"
 79 | docker stats -a --no-stream >$TMPDIR/docker/dockerstats 2>&1 & timeout_cmd
 80 | timeout_done_msg
 81 | 
 82 | if [ -f /etc/docker/daemon.json ]; then
 83 |   cat /etc/docker/daemon.json > $TMPDIR/docker/etcdockerdaemon.json
 84 | fi
 85 | 
 86 | # Networking
 87 | mkdir -p $TMPDIR/networking
 88 | iptables-save > $TMPDIR/networking/iptablessave 2>&1
 89 | cat /proc/net/xfrm_stat > $TMPDIR/networking/procnetxfrmstat 2>&1
 90 | if $(command -v ip >/dev/null 2>&1); then
 91 |   ip addr show > $TMPDIR/networking/ipaddrshow 2>&1
 92 |   ip route > $TMPDIR/networking/iproute 2>&1
 93 | fi
 94 | if $(command -v ifconfig >/dev/null 2>&1); then
 95 |   ifconfig -a > $TMPDIR/networking/ifconfiga
 96 | fi
 97 | 
 98 | # System logging
 99 | mkdir -p $TMPDIR/systemlogs
100 | cp /var/log/syslog* /var/log/messages* /var/log/kern* /var/log/docker* /var/log/system-docker* /var/log/audit/* $TMPDIR/systemlogs 2>/dev/null
101 | 
102 | # Rancher logging
103 | # Discover any server or agent running
104 | mkdir -p $TMPDIR/rancher/containerinspect
105 | mkdir -p $TMPDIR/rancher/containerlogs
106 | RANCHERSERVERS=$(docker ps -a | grep -E "rancher/server:|rancher/server |rancher/enterprise:|rancher/enterprise " | awk '{ print $1 }')
107 | RANCHERAGENTS=$(docker ps -a | grep -E "rancher/agent:|rancher/agent " | awk '{ print $1 }')
108 | 
109 | for RANCHERSERVER in $RANCHERSERVERS; do
110 |   docker inspect $RANCHERSERVER > $TMPDIR/rancher/containerinspect/server-$RANCHERSERVER 2>&1
111 |   docker logs -t $RANCHERSERVER > $TMPDIR/rancher/containerlogs/server-$RANCHERSERVER 2>&1
112 |   for LOGFILE in $(docker exec $RANCHERSERVER ls -1 /var/lib/cattle/logs 2>/dev/null); do
113 |     mkdir -p $TMPDIR/rancher/cattlelogs/
114 |     docker cp $RANCHERSERVER:/var/lib/cattle/logs/$LOGFILE $TMPDIR/rancher/cattlelogs/$LOGFILE-$RANCHERSERVER
115 |   done
116 | done
117 | 
118 | for RANCHERAGENT in $RANCHERAGENTS; do
119 |   docker inspect $RANCHERAGENT > $TMPDIR/rancher/containerinspect/agent-$RANCHERAGENT 2>&1
120 |   docker logs -t $RANCHERAGENT > $TMPDIR/rancher/containerlogs/agent-$RANCHERAGENT 2>&1
121 | done
122 | 
123 | # Infastructure/System stack containers
124 | for INFRACONTAINER in $(docker ps -a --filter label=io.rancher.container.system=true --format "{{.Names}}"); do
125 |   mkdir -p $TMPDIR/infrastacks/containerlogs
126 |   mkdir -p $TMPDIR/infrastacks/containerinspect
127 |   docker inspect $INFRACONTAINER > $TMPDIR/infrastacks/containerinspect/$INFRACONTAINER 2>&1
128 |   docker logs -t $INFRACONTAINER > $TMPDIR/infrastacks/containerlogs/$INFRACONTAINER 2>&1
129 | done
130 | 
131 | # IPsec
132 | IPSECROUTERS=$(docker ps --filter label=io.rancher.stack_service.name=ipsec/ipsec/router --format "{{.Names}}")
133 | for IPSECROUTER in "${IPSECROUTERS[@]}"; do
134 |   mkdir -p $TMPDIR/ipsec
135 |   docker exec $IPSECROUTER bash -cx "swanctl --list-conns && swanctl --list-sas && ip -s xfrm state && ip -s xfrm policy && cat /proc/net/xfrm_stat && sysctl -a" > $TMPDIR/ipsec/ipsec.info.${IPSECROUTER}.log 2>&1
136 | done
137 | 
138 | # Networkmanager
139 | NETWORKMANAGERS=$(docker ps --filter label=io.rancher.stack_service.name=network-services/network-manager --format "{{.Names}}")
140 | for NETWORKMANAGER in "${NETWORKMANAGERS[@]}"; do
141 |   mkdir -p $TMPDIR/networkmanager
142 |   docker exec $NETWORKMANAGER bash -cx "ip link && ip addr && ip neighbor && ip route && conntrack -L && iptables-save && sysctl -a && cat /etc/resolv.conf && uname -a" > $TMPDIR/networkmanager/nm.network.info.${NETWORKMANAGER}.log 2>&1
143 | done
144 | 
145 | # System pods
146 | SYSTEMNAMESPACES=(kube-system)
147 | for SYSTEMNAMESPACE in "${SYSTEMNAMESPACES[@]}"; do
148 |   CONTAINERS=$(docker ps -a --filter name=$SYSTEMNAMESPACE --format "{{.Names}}")
149 |   for CONTAINER in $CONTAINERS; do
150 |     mkdir -p $TMPDIR/k8s/podlogs
151 |     mkdir -p $TMPDIR/k8s/podinspect
152 |     docker inspect $CONTAINER > $TMPDIR/k8s/podinspect/$CONTAINER 2>&1
153 |     docker logs -t $CONTAINER > $TMPDIR/k8s/podlogs/$CONTAINER 2>&1
154 |   done
155 | done
156 | 
157 | # etcd
158 | ETCDCONTAINERS=$(docker ps --filter label=io.rancher.stack_service.name=kubernetes/etcd --format "{{.Names}}")
159 | for ETCDCONTAINER in $ETCDCONTAINERS; do
160 |   mkdir -p $TMPDIR/etcd
161 |   docker exec $ETCDCONTAINER etcdctl cluster-health > $TMPDIR/etcd/cluster-health-${ETCDCONTAINER} 2>&1
162 |   find $(docker inspect $ETCDCONTAINER --format '{{ range .Mounts }}{{ if eq .Destination "/pdata" }}{{ .Source }}{{ end }}{{ end }}') -type f -exec ls -la {} \; > $TMPDIR/etcd/findetcddata 2>&1
163 | done
164 | 
165 | FILENAME="$(hostname)-$(date +'%Y-%m-%d_%H_%M_%S').tar"
166 | tar cf /tmp/$FILENAME -C ${TMPDIR}/ .
167 | 
168 | if $(command -v gzip >/dev/null 2>&1); then
169 |   gzip /tmp/${FILENAME}
170 |   FILENAME="${FILENAME}.gz"
171 | fi
172 | 
173 | echo "Created /tmp/${FILENAME}"
174 | echo "You can now remove ${TMPDIR}"
175 | 


--------------------------------------------------------------------------------
/collection/rancher/v2.x/RBAC-role-collector/README.md:
--------------------------------------------------------------------------------
 1 | # Rancher 2.x RBAC role collector
 2 | 
 3 | This project was created to collect RABC roles in a Kubernetes cluster to assist troubleshooting
 4 | 
 5 | ## Usage
 6 | 
 7 | 1. Download the script to a location from where you can run `kubectl` against the intended cluster, and save it as: `role-dump.sh`
 8 |   `curl -OLs  https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/RBAC-role-collector/role-dump.sh`
 9 | 2. Set kubectl context to the cluster where you see the issue you are investigating.  You will likely want to run this against the Rancher local cluster as well as the downstream cluster where you see the issues
10 | 3. Run the script `bash ./role-dump.sh`
11 | 
12 | ### What is collected
13 | 
14 | The output will contain:
15 | 
16 | - JSON files for each role type (in the following list) containing all the roles in the cluster
17 | - Listing (`rolebindings.list`) of all the rolebindings ordered by type
18 | - A tar.gz file that can be provided to support, an uncompressed directory will remain with all the data gathered for your inspection
19 | 
20 | Having this information and a list of the user IDs of any users affected by the issue can help in troubleshooting.
21 | 
22 | #### CRDs collected:
23 | 
24 | ```
25 | clusterroletemplatebindings
26 | globalrolebindings
27 | globalroles
28 | projectroletemplatebindings
29 | roletemplates.management.cattle.io
30 | roletemplatebindings
31 | clusterrolebindings
32 | clusterroles
33 | roletemplates.rancher.cattle.io
34 | rolebindings
35 | roles
36 | ```


--------------------------------------------------------------------------------
/collection/rancher/v2.x/RBAC-role-collector/role-dump.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Troubleshooting Bash settings in case of unexpected failure
 4 | # set -o errexit  # Set to exit on error.  Do not enable this unless running against upstream Rancher cluster
 5 | # set -o xtrace   # Set to output every line Bash runs as it runs the script
 6 | 
 7 | # Unset variables used in the script to be safe
 8 | unset crd cluster wd dir file role i
 9 | 
10 | _declare_variables () {
11 |   # Role types to collect
12 |   crd=(\
13 |     clusterroletemplatebindings \
14 |     globalrolebindings \
15 |     globalroles \
16 |     projectroletemplatebindings \
17 |     roletemplates.management.cattle.io \
18 |     roletemplatebindings \
19 |     clusterrolebindings \
20 |     clusterroles \
21 |     roletemplates.rancher.cattle.io \
22 |     rolebindings \
23 |     roles
24 |   )
25 | 
26 |   # Store filename friendly cluster name
27 |   cluster=$(_slugify "$(kubectl config current-context)") # 
28 |   
29 |   # Working directory 
30 |   wd="$cluster"_role-bindings_$(date +"%Y-%m-%d_%H_%M_%S")
31 | }
32 | 
33 | 
34 | # Slugify strings (replace any special characters with `-`)
35 | _slugify () {
36 |   echo "$1" |
37 |   iconv -t ascii//TRANSLIT |
38 |   sed -r s/[^a-zA-Z0-9]+/-/g |
39 |   sed -r s/^-+\|-+$//g |
40 |   tr A-Z a-z
41 | }
42 | 
43 | # Generate a list (`rolebindings.list`) of all the role bindings and template bindings in the cluster
44 | _list_rolebindings () {
45 |   for i in ${crd[*]} ; do
46 |     echo "Listing $i"
47 |     printf "\n\n# $i\n" >> "$wd"/rolebindings.list 
48 |     kubectl get $i -A >> "$wd"/rolebindings.list
49 |   done
50 | }
51 | 
52 | # Generate a JSON per role type containing all the rolebindings
53 | _get_rolebindings () {
54 |   for i in ${crd[*]} ; do
55 |     echo "Getting $i JSON"
56 |     file=$(_slugify "$i")
57 |     kubectl get "$i" -A -o json > "$wd"/"$file".json
58 |   done
59 | }
60 | 
61 | # Archive and compress the report
62 | _tarball_wd () {
63 |   echo "Compressing $wd"
64 |   tar -czvf "$wd".tar.gz "$wd"
65 | }
66 | 
67 | 
68 | # Runs all the things
69 | main () {
70 |   _declare_variables
71 |   # Create working directory
72 |   if [[ ! -e "$wd" ]]; then
73 |     mkdir "$wd"
74 |   fi
75 |   _list_rolebindings >& "$wd"/list.log
76 |   _get_rolebindings >& "$wd"/rolebindings.log
77 |   _tarball_wd
78 | }
79 | 
80 | # ACTUALLY run all the things
81 | main
82 | 


--------------------------------------------------------------------------------
/collection/rancher/v2.x/logs-collector/README.md:
--------------------------------------------------------------------------------
 1 | # Rancher v2.x logs-collector
 2 | 
 3 | This logs collector project was created to collect logs from Linux Kubernetes nodes. It is designed to be used in the following environments for troubleshooting support cases:
 4 | - [RKE2 clusters](https://docs.rke2.io/)
 5 | - [RKE1 clusters](https://rancher.com/docs/rke/latest/en/)
 6 | - [K3s clusters](https://docs.k3s.io/)
 7 | - [Custom clusters](https://docs.ranchermanager.rancher.io/pages-for-subheaders/use-existing-nodes)
 8 | - [Infrastructure provider clusters](https://ranchermanager.docs.rancher.com/how-to-guides/new-user-guides/launch-kubernetes-with-rancher/use-new-nodes-in-an-infra-provider)
 9 | - [Kubeadm clusters](https://kubernetes.io/docs/reference/setup-tools/kubeadm/)
10 | 
11 | > Note: This script may not collect all necessary information when run on nodes in a [Hosted Kubernetes Provider](https://ranchermanager.docs.rancher.com/how-to-guides/new-user-guides/kubernetes-clusters-in-rancher-setup/set-up-clusters-from-hosted-kubernetes-providers) cluster.
12 | 
13 | ## Usage
14 | 
15 | The script needs to be downloaded and run directly on the node, using the `root` user or `sudo`.
16 | 
17 | Output will be written to `/tmp` as a tar.gz archive named `<hostname>-<date>.tar.gz`, the default output directory can be changed with the `-d` flag.
18 | 
19 | ### Download and run the script
20 | * Save the script as: `rancher2_logs_collector.sh`
21 | 
22 |   Using `wget`:
23 |     ```bash
24 |     wget https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/logs-collector/rancher2_logs_collector.sh
25 |     ```
26 |   Using `curl`:
27 |     ```bash
28 |     curl -OLs https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/logs-collector/rancher2_logs_collector.sh
29 |     ```
30 |  
31 | * Run the script:
32 |   ```bash
33 |   sudo bash rancher2_logs_collector.sh
34 |   ```
35 | 
36 | ### Optional: Download and run the script in one command
37 |   ```bash
38 |   curl -Ls rnch.io/rancher2_logs | sudo bash
39 |   ```
40 |   > Note: This command requires `curl` to be installed, and internet access from the node.
41 | 
42 | ## Flags
43 | 
44 | ```
45 | Rancher 2.x logs-collector
46 |   Usage: rancher2_logs_collector.sh [ -d <directory> -s <days> -e <days> -r <k8s distribution> -p -f ]
47 | 
48 |   All flags are optional
49 | 
50 |   -c    Custom data-dir for RKE2 (ex: -c /opt/rke2)
51 |   -d    Output directory for temporary storage and .tar.gz archive (ex: -d /var/tmp)
52 |   -s    Start day of journald and docker log collection, # of days relative to the current day (ex: -s 7)
53 |   -e    End day of journald and docker log collection, # of days relative to the current day (ex: -e 5)
54 |   -S    Start date of journald and docker log collection. (ex: -S 2022-12-05)
55 |   -E    End date of journald and docker log collection. (ex: -E 2022-12-07)
56 |   -r    Override k8s distribution if not automatically detected (rke|k3s|rke2|kubeadm)
57 |   -p    When supplied runs with the default nice/ionice priorities, otherwise use the lowest priorities
58 |   -f    Force log collection if the minimum space isn't available
59 |   -o    Obfuscate IP addresses and hostnames
60 | ```
61 | 
62 | ## Scope of collection
63 | 
64 | Collection includes the following areas, the logs collector is designed to gather necessary diagnostic information while respecting privacy and security concerns. A detailed list is maintained in [collection-details.md](./collection-details.md).
65 | 
66 | - Related OS logs and configuration:  
67 |   - Network configuration - interfaces, iptables
68 |   - Disk configuration - devices, filesystems, utilization
69 |   - Performance - resource usage, tuning 
70 |   - OS release and logs - versions, messages/syslog
71 | - Related Kubernetes object output, kubectl commands, and pod logs
72 |   - Related CRD objects
73 |   - Output from kubectl for troubleshooting
74 |   - Pod logs from related namespaces
75 | 
76 | The scope of collection is intentionally limited to avoid sensitive data, use minimal resources and disk space, and focus on the core areas needed for troubleshooting.
77 | 
78 | IP addresses and hostnames are collected and can assist with troubleshooting, however these can be obfuscated when adding the `-o` flag for the log collection script.
79 | 
80 | Note, if additional verbosity, debug, or audit logging is enabled for the related Kubernetes and OS components, these logs can be included and may contain sensitive output. 


--------------------------------------------------------------------------------
/collection/rancher/v2.x/logs-collector/collection-details.md:
--------------------------------------------------------------------------------
 1 | # Collection Details
 2 | 
 3 | ## Overview
 4 | This document provides transparency about the output collected when running the logs collector script. The collection is designed to gather necessary troubleshooting information while respecting privacy and security concerns
 5 | 
 6 | Where possible output from the collection is sanitized, however we recommend you check a log collection and remove or edit any sensitive data
 7 | 
 8 | ### Node-level collection
 9 | 
10 | Output that is collected only from the node where the logs collector script is run
11 | 
12 | #### Operating System
13 | - General OS configuration, for example: the hostname, resources, process list, service list, packages, limits and tunables
14 | - Networking, iptables, netstat, interfaces, CNI configuration
15 | - Journalctl output for related services if available, a list of services is listed in [the `JOURNALD_LOGS` variable](https://github.com/rancherlabs/support-tools/blob/master/collection/rancher/v2.x/logs-collector/rancher2_logs_collector.sh#L12) 
16 | - OS logs from /var/logs, a list of log files is listed in [the `VAR_LOG_FILES` variable](https://github.com/rancherlabs/support-tools/blob/master/collection/rancher/v2.x/logs-collector/rancher2_logs_collector.sh#L15)
17 | 
18 | #### Kubernetes
19 | - Distribution logs, for example rke2 and k3s agent/server journalctl logs
20 | - Distribution configuration, rke2 and k3s configuration files, static pod manifests
21 | - Container runtime logs and configuration, containerd or docker
22 | 
23 | ### Cluster-level collection
24 | 
25 | Output that is collected from the cluster
26 | 
27 | Note, pod logs from other nodes and additional kubectl output can only be collected when running on a control plane/server node
28 | 
29 | #### Kubernetes
30 | - Kubernetes control plane and worker component configuration and logs, for example: kubelet etcd, kube-apiserver
31 | - Kubernetes pod logs from related namespaces, a list of namespaces is listed in [the `SYSTEM_NAMESPACE` variable](https://github.com/rancherlabs/support-tools/blob/master/collection/rancher/v2.x/logs-collector/rancher2_logs_collector.sh#L6) located in the script
32 | - Directory listings, for example: rke2 manifests directory, SSL certificates, etcd snapshots
33 | 
34 | #### Kubectl output
35 | - Kubectl list of nodes, pods, services, RBAC roles, persistent volumes, events, ingress and deployments
36 | - Cluster provisioning CRD objects


--------------------------------------------------------------------------------
/collection/rancher/v2.x/profile-collector/README.md:
--------------------------------------------------------------------------------
 1 | # Rancher v2.x profiles-collector
 2 | 
 3 | This profiles collector project was created to collect:
 4 | - [Golang profiles](https://github.com/pkg/profile) for [Rancher Manager](https://github.com/rancher/rancher/), Rancher Cluster Agent, Fleet Controller and Fleet Agent
 5 | - Rancher debug or trace logs when collecting Rancher profiles
 6 | - Rancher audit logs when available
 7 | - Events from the cattle-system namespace
 8 | - metrics with kubectl top from pods and nodes
 9 | - Rancher metrics exposed on <RANCHER_URL>/metrics
10 | 
11 | ## Usage
12 | 
13 | The script needs to be downloaded and run with a kubeconfig file for the Rancher Management (local) cluster, or a downstream cluster where cattle-cluster-agent pods are running
14 | 
15 | ### Download and run the script
16 | - Save the script as: `continuous_profiling.sh`
17 | 
18 |   Using `wget`:
19 |     ```bash
20 |     wget https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/profile-collector/continuous_profiling.sh
21 |     ```
22 |   Using `curl`:
23 |     ```bash
24 |     curl -OLs https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/profile-collector/continuous_profiling.sh
25 |     ```
26 |  
27 | - Run the script:
28 |   ```bash
29 |   bash continuous_profiling.sh
30 |   ```
31 |   The script will run until it receives a SIGKILL (Ctrl-C)
32 |   A tarball will be generated at the same folder where the script is running. Please share that file with Rancher support.
33 | 
34 | ## Flags
35 | 
36 | ```
37 | Rancher 2.x profile-collector
38 |   Usage: profile-collector.sh [-a rancher -p goroutine,heap ]
39 | 
40 |   All flags are optional
41 | 
42 |   -a    Application, rancher, cattle-cluster-agent, fleet-controller, fleet-agent
43 |   -p    Profiles to be collected (comma separated): goroutine,heap,threadcreate,block,mutex,profile
44 |   -s    Sleep time between loops in seconds
45 |   -t    Time of CPU profile collections
46 |   -l    Log level of the Rancher pods: debug or trace
47 |   -h    This help
48 | ```
49 | 
50 | ## Examples
51 | - The default collection is equivalent of:
52 |   ```bash continuous_profiling -a rancher -p goroutine,heap,profile -s 120 -t 30```
53 | 
54 | - Collecting Upstream Rancher profiles every 30 minutes, and collect trace level logs
55 |   ```bash continuous_profiling -s 1800 -l trace```
56 | 
57 | - Collecting cattle-cluster-agent heap and profile
58 |   ```bash continuous_profiling -a cattle-cluster-agent -p heap,profile ```
59 | 
60 | - Collecting fleet-agent profile profile (cpu) over a minute
61 |   ```bash continuous_profiling -a fleet-agent -t 60```
62 | 


--------------------------------------------------------------------------------
/collection/rancher/v2.x/rancher-pod-collector/README.md:
--------------------------------------------------------------------------------
 1 | # Rancher v2.x rancher-pod-collector
 2 | 
 3 | This project was created to collect output for the Rancher installation in a Rancher Management (local) cluster when troubleshooting support cases
 4 | 
 5 | This script needs to be downloaded and run on one of the following locations:
 6 | 
 7 | - A server or workstation with kubectl access to the Rancher Management (local) cluster
 8 | - Directly on one of the cluster nodes using the `root` user or using `sudo`
 9 | - As a k8s deployment on the local cluster
10 | 
11 | ## Usage
12 | 
13 | - Download the script and save as: `rancher-pod-collector.sh`
14 | - Make sure the script is executable: `chmod +x rancher-pod-collector.sh`
15 | - Run the script: `./rancher-pod-collector.sh`
16 | 
17 | Output will be written to `/tmp` as a tar.gz archive named `<context>-<date>.tar.gz`, the default output directory can be changed with the `-d` flag.
18 | 
19 | ## Flags
20 | 
21 | ```
22 | Rancher Pod Collector
23 | Usage: rancher-pod-collector.sh [ -d <directory> -k KUBECONFIG -t -w -f ]
24 | 
25 | All flags are optional.
26 | 
27 | -d    Output directory for temporary storage and .tar.gz archive (ex: -d /var/tmp)
28 | -k    Override the kubeconfig (ex: ~/.kube/custom)
29 | -t    Enable trace logs
30 | -w    Live tailing Rancher logs
31 | -f    Force log collection if the minimum space isn't available."
32 | ```
33 | 
34 | ## Important disclaimer
35 | 
36 | The flag `-t` will enables trace logging. This can capture sensitive information about your Rancher install, including but not limited to usernames, passwords, encryption keys, etc.
37 | 


--------------------------------------------------------------------------------
/collection/rancher/v2.x/rancher-pod-collector/rancher-pod-collector.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Minimum space needed to run the script (MB)
  4 | SPACE="512"
  5 | 
  6 | setup() {
  7 | 
  8 |   TMPDIR=$(mktemp -d $MKTEMP_BASEDIR)
  9 |   techo "Created ${TMPDIR}"
 10 | 
 11 | }
 12 | 
 13 | disk-space() {
 14 | 
 15 |   AVAILABLE=$(df -m ${TMPDIR} | tail -n 1 | awk '{ print $4 }')
 16 |   if [ "${AVAILABLE}" -lt "${SPACE}" ]
 17 |     then
 18 |       techo "${AVAILABLE} MB space free, minimum needed is ${SPACE} MB."
 19 |       DISK_FULL=1
 20 |   fi
 21 | 
 22 | }
 23 | 
 24 | verify-access() {
 25 | 
 26 |   techo "Verifying cluster access"
 27 |   if [[ ! -z $OVERRIDE_KUBECONFIG ]];
 28 |   then
 29 |     ## Just use the kubeconfig that was set by the user
 30 |     KUBECTL_CMD="kubectl --kubeconfig $OVERRIDE_KUBECONFIG"
 31 |   elif [[ ! -z $KUBECONFIG ]];
 32 |   then
 33 |     KUBECTL_CMD="kubectl"
 34 |   elif [[ ! -z $KUBERNETES_PORT ]];
 35 |   then
 36 |     ## We are inside the k8s cluster or we're using the local kubeconfig
 37 |     RANCHER_POD=$(kubectl -n cattle-system get pods -l app=rancher --no-headers -o custom-columns=id:metadata.name | head -n1)
 38 |     KUBECTL_CMD="kubectl -n cattle-system exec -c rancher ${RANCHER_POD} -- kubectl"
 39 |   elif $(command -v k3s >/dev/null 2>&1)
 40 |   then
 41 |     ## We are on k3s node
 42 |     KUBECTL_CMD="k3s kubectl"
 43 |   elif $(command -v docker >/dev/null 2>&1)
 44 |   then
 45 |     DOCKER_ID=$(docker ps | grep "k8s_rancher_rancher" | cut -d' ' -f1 | head -1)
 46 |     KUBECTL_CMD="docker exec ${DOCKER_ID} kubectl"
 47 |   else
 48 |     ## Giving up
 49 |     techo "Could not find a kubeconfig"
 50 |   fi
 51 |   if ! ${KUBECTL_CMD} cluster-info >/dev/null 2>&1
 52 |   then
 53 |     techo "Can not access cluster"
 54 |     exit 1
 55 |   else
 56 |     techo "Cluster access has been verified"
 57 |   fi
 58 | }
 59 | 
 60 | cluster-info() {
 61 | 
 62 |   techo "Collecting cluster info"
 63 |   mkdir -p $TMPDIR/clusterinfo
 64 |   ${KUBECTL_CMD} cluster-info > $TMPDIR/clusterinfo/cluster-info 2>&1
 65 |   ${KUBECTL_CMD} get nodes -o wide > $TMPDIR/clusterinfo/get-node-wide 2>&1
 66 |   ${KUBECTL_CMD} cluster-info dump -o yaml -n cattle-system --log-file-max-size 200 --output-directory $TMPDIR/clusterinfo/cluster-info-dump
 67 |   ## Grabbing cattle-system items
 68 |   mkdir -p $TMPDIR/cattle-system/
 69 |   ${KUBECTL_CMD} get endpoints -n cattle-system -o wide > $TMPDIR/cattle-system/get-endpoints 2>&1
 70 |   ${KUBECTL_CMD} get ingress -n cattle-system -o yaml > $TMPDIR/cattle-system/get-ingress.yaml 2>&1
 71 |   ${KUBECTL_CMD} get pods -n cattle-system -o wide > $TMPDIR/cattle-system/get-pods 2>&1
 72 |   ${KUBECTL_CMD} get svc -n cattle-system -o yaml > $TMPDIR/cattle-system/get-svc.yaml 2>&1
 73 |   ## Grabbing kube-system items
 74 |   mkdir -p $TMPDIR/kube-system/
 75 |   ${KUBECTL_CMD} get configmap -n kube-system cattle-controllers -o yaml > $TMPDIR/kube-system/get-configmap-cattle-controllers.yaml 2>&1
 76 |   ## Grabbing cluster configuration
 77 |   mkdir -p $TMPDIR/clusters
 78 |   ${KUBECTL_CMD} get clusters.management.cattle.io -A > $TMPDIR/clusters/clusters 2>&1
 79 |   ${KUBECTL_CMD} get clusters.management.cattle.io -A -o yaml > $TMPDIR/clusters/clusters.yaml 2>&1
 80 | 
 81 | }
 82 | 
 83 | enable-debug() {
 84 | 
 85 |   techo "Enabling debug for Rancher pods"
 86 |   for POD in $(${KUBECTL_CMD} get pods -n cattle-system -l app=rancher --no-headers | awk '{print $1}');
 87 |   do
 88 |     if [ ! -z "${TRACE}" ]
 89 |     then
 90 |       techo "Pod: $POD `${KUBECTL_CMD} exec -n cattle-system -c rancher $POD -- loglevel --set trace`"
 91 |     else
 92 |       techo "Pod: $POD `${KUBECTL_CMD} exec -n cattle-system -c rancher $POD -- loglevel --set debug`"
 93 |     fi
 94 |   done
 95 | 
 96 | }
 97 | 
 98 | disable-debug() {
 99 | 
100 |   techo "Disabling debug for Rancher pods"
101 |   for POD in $(${KUBECTL_CMD} get pods -n cattle-system -l app=rancher --no-headers | awk '{print $1}');
102 |   do
103 |     techo "Pod: $POD `${KUBECTL_CMD} exec -n cattle-system -c rancher $POD -- loglevel --set info`"
104 |   done
105 | 
106 | }
107 | 
108 | watch-logs() {
109 | 
110 |   techo "Live tailing debug logs from Rancher pods"
111 |   techo "Please use Ctrl+C to finish tailing"
112 |   mkdir -p $TMPDIR/rancher-logs/
113 |   ${KUBECTL_CMD} -n cattle-system logs -f -l app=rancher -c rancher | tee $TMPDIR/rancher-logs/live-logs
114 | 
115 | }
116 | 
117 | 
118 | pause() {
119 | 
120 |  read -n1 -rsp $'Press any key once finished logging with debug loglevel, or Ctrl+C to exit and leave debug loglevel enabled... \n'
121 | 
122 | }
123 | 
124 | archive() {
125 | 
126 |   FILEDIR=$(dirname $TMPDIR)
127 |   FILENAME="$(kubectl config view -o jsonpath='{.current-context}')-$(date +'%Y-%m-%d_%H_%M_%S').tar"
128 |   tar --create --file ${FILEDIR}/${FILENAME} --directory ${TMPDIR}/ .
129 |   ## gzip separately for Rancher OS
130 |   gzip ${FILEDIR}/${FILENAME}
131 | 
132 |   techo "Created ${FILEDIR}/${FILENAME}.gz"
133 | 
134 | }
135 | 
136 | cleanup() {
137 | 
138 |   techo "Removing ${TMPDIR}"
139 |   rm -r -f "${TMPDIR}" >/dev/null 2>&1
140 | 
141 | }
142 | 
143 | help() {
144 | 
145 |   echo "Rancher Pod Collector
146 |   Usage: rancher-pod-collector.sh [ -d <directory> -k KUBECONFIG -t -w -f ]
147 | 
148 |   All flags are optional
149 | 
150 |   -d    Output directory for temporary storage and .tar.gz archive (ex: -d /var/tmp)
151 |   -k    Override the kubeconfig (ex: ~/.kube/custom)
152 |   -t    Enable trace logs
153 |   -w    Live tailing Rancher logs
154 |   -f    Force log collection if the minimum space isn't available"
155 | 
156 | }
157 | 
158 | timestamp() {
159 | 
160 |   date "+%Y-%m-%d %H:%M:%S"
161 | 
162 | }
163 | 
164 | techo() {
165 | 
166 |   echo "$(timestamp): $*"
167 | 
168 | }
169 | 
170 | while getopts ":d:k:ftwh" opt; do
171 |   case $opt in
172 |     d)
173 |       MKTEMP_BASEDIR="${OPTARG}/temp.XXXX"
174 |       ;;
175 |     k)
176 |       OVERRIDE_KUBECONFIG="${OPTARG}"
177 |       ;;
178 |     f)
179 |       FORCE=1
180 |       ;;
181 |     t)
182 |       TRACE=1
183 |       ;;
184 |     w)
185 |       WATCH=1
186 |       ;;
187 |     h)
188 |       help && exit 0
189 |       ;;
190 |     :)
191 |       techo "Option -$OPTARG requires an argument."
192 |       exit 1
193 |       ;;
194 |     *)
195 |       help && exit 0
196 |   esac
197 | done
198 | 
199 | setup
200 | disk-space
201 | if [ -n "${DISK_FULL}" ]
202 |   then
203 |     if [ -z "${FORCE}" ]
204 |       then
205 |         techo "Cleaning up and exiting"
206 |         cleanup
207 |         exit 1
208 |       else
209 |         techo "-f (force) used, continuing"
210 |     fi
211 | fi
212 | 
213 | if [ ! -z "${TRACE}" ]
214 | then
215 |   techo "WARNING: Trace logging has been set. Please confirm that you understand this may capture sensitive information."
216 |   pause
217 | fi
218 | verify-access
219 | enable-debug
220 | if [ ! -z "${WATCH}" ]
221 | then
222 |   watch-logs
223 | else
224 |   techo "Debug loglevel has been set"
225 |   pause
226 | fi
227 | disable-debug
228 | cluster-info
229 | archive
230 | cleanup
231 | 


--------------------------------------------------------------------------------
/collection/rancher/v2.x/supportability-review/collect.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | if [ "${DEBUG}" == "true" ]; then
  4 |   set -x
  5 | fi
  6 | 
  7 | HELP_MENU() {
  8 | echo "Supportability Review
  9 | Usage: collect.sh [ -h ]
 10 | 
 11 | All flags are optional
 12 | 
 13 | -h      Print help menu for Supportability Review
 14 | 
 15 | Environment variables:
 16 | 
 17 |   RANCHER_URL: Specify Rancher Server URL (Ex: https://rancher.example.com)
 18 |   RANCHER_TOKEN: Specify Rancher Token to connect to Rancher Server
 19 |   SR_IMAGE: Use this variable to point to custom container image of Supportability Review
 20 | "
 21 | }
 22 | 
 23 | SR_IMAGE=${SR_IMAGE:-"rancher/supportability-review:latest"}
 24 | 
 25 | if [ "${CONTAINER_RUNTIME}" == "" ]; then
 26 |   if command -v docker &> /dev/null; then
 27 |     echo "setting CONTAINER_RUNTIME=docker"
 28 |     CONTAINER_RUNTIME="docker"
 29 |   elif command -v nerdctl &> /dev/null; then
 30 |     echo "setting CONTAINER_RUNTIME=nerdctl"
 31 |     CONTAINER_RUNTIME="nerdctl"
 32 |   elif command -v podman &> /dev/null; then
 33 |     echo "setting CONTAINER_RUNTIME=podman"
 34 |     CONTAINER_RUNTIME="podman"
 35 |   else
 36 |     echo "error: couldn't detect CONTAINER_RUNTIME"
 37 |     exit 1
 38 |   fi
 39 | else
 40 |   supported_runtime=false
 41 |   for runtime in docker nerdctl podman; do
 42 |     if [ "${CONTAINER_RUNTIME}" == ${runtime} ]; then
 43 |       supported_runtime=true
 44 |       break
 45 |     fi
 46 |   done
 47 |   if [ "${supported_runtime}" == false ]; then
 48 |     echo "error: unsupported CONTAINER_RUNTIME. Use docker|nerdctl|podman."
 49 |     exit 1
 50 |   fi
 51 | fi
 52 | 
 53 | if [[ "$SR_IMAGE" != *":dev" ]]; then
 54 |   echo "pulling image: ${SR_IMAGE}"
 55 |   $CONTAINER_RUNTIME pull "${SR_IMAGE}"
 56 | fi
 57 | 
 58 | CONTAINER_RUNTIME_ARGS=""
 59 | COLLECT_INFO_FROM_RANCHER_SETUP_ARGS=""
 60 | 
 61 | if [ "$ENABLE_PRIVILEGED" = "true" ]; then
 62 |   CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS --privileged"
 63 | fi
 64 | 
 65 | if [ "${SONOBUOY_TOLERATION_FILE}" != "" ]; then
 66 |   if [ ! -f "${SONOBUOY_TOLERATION_FILE}" ]; then
 67 |     echo "error: SONOBUOY_TOLERATION_FILE=${SONOBUOY_TOLERATION_FILE} specified, but cannot access that file"
 68 |     exit 1
 69 |   fi
 70 |   CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -v ${SONOBUOY_TOLERATION_FILE}:/tmp/sonobuoy_toleration.yml"
 71 |   COLLECT_INFO_FROM_RANCHER_SETUP_ARGS="$COLLECT_INFO_FROM_RANCHER_SETUP_ARGS --sonobuoy-toleration-file /tmp/sonobuoy_toleration.yml"
 72 | fi
 73 | 
 74 | if [ "${KUBECONFIG}" == "" ]; then
 75 |   if [ "${RANCHER_URL}" == "" ]; then
 76 |     echo "error: RANCHER_URL is not set"
 77 |     exit 1
 78 |   fi
 79 | 
 80 |   if [ "${RANCHER_TOKEN}" == "" ]; then
 81 |     echo "error: RANCHER_TOKEN is not set"
 82 |     exit 1
 83 |   fi
 84 | 
 85 |   if [ "$1" == "-h" ]; then
 86 |     HELP_MENU
 87 |   fi
 88 | 
 89 |   CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e RANCHER_URL="${RANCHER_URL}""
 90 |   CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e RANCHER_TOKEN="${RANCHER_TOKEN}""
 91 |   CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e RANCHER_VERIFY_SSL_CERTS="${RANCHER_VERIFY_SSL_CERTS}""
 92 | else
 93 |   # TODO: Check if it's absolute path
 94 |   # TODO: Check if the file exists and it's readable
 95 |   echo "KUBECONFIG specified: ${KUBECONFIG}"
 96 | 
 97 |   if [ ! -f "${KUBECONFIG}" ]; then
 98 |     echo "error: KUBECONFIG=${KUBECONFIG} specified, but cannot access that file"
 99 |     exit 1
100 |   fi
101 | 
102 |   CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -v ${KUBECONFIG}:/tmp/kubeconfig.yml"
103 |   COLLECT_INFO_FROM_RANCHER_SETUP_ARGS="$COLLECT_INFO_FROM_RANCHER_SETUP_ARGS --kubeconfig /tmp/kubeconfig.yml"
104 | 
105 |   if [ -d "$HOME/.aws" ]; then
106 |     CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -v $HOME/.aws:/root/.aws"
107 |   fi
108 |   if [ -n "$AWS_ACCESS_KEY_ID" ] && [ -n "$AWS_SECRET_ACCESS_KEY" ]; then
109 |     CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e AWS_ACCESS_KEY_ID="${AWS_ACCESS_KEY_ID}" -e AWS_SECRET_ACCESS_KEY="${AWS_SECRET_ACCESS_KEY}""
110 |     if [ -n "$AWS_SESSION_TOKEN" ]; then
111 |       CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e AWS_SESSION_TOKEN="${AWS_SESSION_TOKEN}""
112 |     fi
113 |   fi
114 | 
115 | fi
116 | 
117 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e DB_HOST_NAME="${DB_HOST_NAME}""
118 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e DB_PORT_NUMBER="${DB_PORT_NUMBER}""
119 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e DB_KEY="${DB_KEY}""
120 | 
121 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS ${SR_IMAGE}"
122 | 
123 | $CONTAINER_RUNTIME run --rm \
124 |   -it \
125 |   --network host \
126 |   -v `pwd`:/data \
127 |   $CONTAINER_RUNTIME_ARGS \
128 |   collect_info_from_rancher_setup.py $COLLECT_INFO_FROM_RANCHER_SETUP_ARGS "$@"
129 | 


--------------------------------------------------------------------------------
/collection/rancher/v2.x/supportability-review/collection-details.md:
--------------------------------------------------------------------------------
  1 | # Rancher Supportability Review Collection Details
  2 | 
  3 | ## Overview
  4 | This document provides transparency about the data collected during a Rancher supportability review. The collection is designed to gather necessary diagnostic information while respecting privacy and security concerns.
  5 | 
  6 | ## Cluster-Level Collection
  7 | 
  8 | ### Kubernetes Components
  9 | - API server configuration
 10 | - Controller manager settings
 11 | - Scheduler configuration
 12 | - etcd status and metrics
 13 | - Kubelet configuration
 14 | - Container runtime status
 15 | 
 16 | ### Workload Information
 17 | - Pod status and configuration
 18 | - Deployment configurations
 19 | - StatefulSet configurations
 20 | - DaemonSet configurations
 21 | - Service configurations
 22 | - Ingress configurations
 23 | 
 24 | ### Cluster Resources
 25 | - Namespace listing
 26 | - Resource quotas
 27 | - Limit ranges
 28 | - Network policies
 29 | - Storage classes and PV/PVC status
 30 | 
 31 | ### Custom Resources
 32 | - Rancher-specific CRDs status
 33 | - Cluster configuration CRs
 34 | - Helm releases
 35 | 
 36 | ## Node-Level Collection
 37 | 
 38 | ### System Information
 39 | - OS version and distribution
 40 | - Kernel parameters
 41 | - System resources (CPU, memory, disk)
 42 | - Network configuration
 43 | 
 44 | ### Container Runtime
 45 | - Docker/containerd version
 46 | - Runtime configuration
 47 | - Container logs
 48 | - Image list
 49 | 
 50 | ### Kubernetes Components
 51 | - Kubelet status
 52 | - Proxy configuration
 53 | - CNI configuration
 54 | - Container runtime logs
 55 | 
 56 | ### System Logs
 57 | - Kubernetes component logs
 58 | - System service logs related to container runtime
 59 | - Kernel logs related to container operations
 60 | 
 61 | ## What is NOT Collected
 62 | 
 63 | ### Excluded Data
 64 | - Application data and logs
 65 | - Secrets and sensitive configurations
 66 | - User data
 67 | - Database contents
 68 | - Custom application configurations
 69 | - SSL private keys
 70 | - Authentication tokens
 71 | - Password hashes
 72 | 
 73 | ### Storage
 74 | - Application persistent volumes content
 75 | - User uploaded files
 76 | - Backup files
 77 | 
 78 | ### Network
 79 | - Raw network traffic
 80 | - Packet captures
 81 | - Private network configurations
 82 | - VPN configurations
 83 | 
 84 | ## Data Handling
 85 | 
 86 | ### Collection Process
 87 | 1. Data is collected using Sonobuoy plugins
 88 | 2. Information is aggregated at cluster level
 89 | 3. Results are bundled into a single archive
 90 | 
 91 | ### Security Measures
 92 | - All collection is read-only
 93 | - No modifications are made to cluster configuration
 94 | - Collection runs with minimal required permissions
 95 | - Data transfer is encrypted
 96 | - Generated bundles are encoded and compressed
 97 | 
 98 | ## Usage of Collected Data
 99 | 
100 | The collected information is used for:
101 | - Identifying potential system issues
102 | - Validating configurations
103 | - Ensuring compliance with best practices
104 | - Troubleshooting reported problems
105 | - Providing optimization recommendations
106 | 
107 | The data is analyzed by SUSE Rancher Support to:
108 | - Verify system health
109 | - Identify potential improvements
110 | - Ensure security compliance
111 | - Provide targeted recommendations
112 | - Support issue resolution
113 | 
114 | ## Questions or Concerns
115 | 
116 | If you have questions about data collection or need to exclude certain types of information, please contact SUSE Rancher Support before running the collection tool. We can provide guidance on:
117 | - Customizing collection scope
118 | - Excluding sensitive namespaces
119 | - Modifying collection parameters
120 | - Reviewing collection results


--------------------------------------------------------------------------------
/collection/rancher/v2.x/supportability-review/security-policies.md:
--------------------------------------------------------------------------------
  1 | # Security Policy Configuration Guide
  2 | 
  3 | ## Overview
  4 | This guide provides detailed configuration examples for running the Rancher Supportability Review tool in environments with various security policies.
  5 | 
  6 | ## Kyverno Policies
  7 | 
  8 | ### Required Exclusions
  9 | ```yaml
 10 | apiVersion: kyverno.io/v1
 11 | kind: ClusterPolicy
 12 | metadata:
 13 |   name: privilege-policy
 14 | spec:
 15 |   validationFailureAction: Enforce
 16 |   background: true
 17 |   rules:
 18 |     - name: privilege-escalation
 19 |       match:
 20 |         any:
 21 |         - resources:
 22 |             kinds:
 23 |               - Pod
 24 |       exclude:
 25 |         any:
 26 |         - resources:
 27 |             namespaces:
 28 |             - sonobuoy
 29 |       validate:
 30 |         message: "Privilege escalation is disallowed..."
 31 | ```
 32 | 
 33 | ### Common Kyverno Policies Requiring Modification
 34 | - Privilege escalation policies
 35 | - Container security policies
 36 | - Resource quota policies
 37 | - Host path mounting policies
 38 | 
 39 | ## Pod Security Policies
 40 | 
 41 | ### Required Permissions
 42 | ```yaml
 43 | apiVersion: policy/v1beta1
 44 | kind: PodSecurityPolicy
 45 | metadata:
 46 |   name: sonobuoy-psp
 47 | spec:
 48 |   privileged: true
 49 |   allowPrivilegeEscalation: true
 50 |   volumes:
 51 |     - hostPath
 52 |     - configMap
 53 |     - emptyDir
 54 |   hostNetwork: true
 55 |   hostPID: true
 56 |   hostIPC: true
 57 |   runAsUser:
 58 |     rule: RunAsAny
 59 |   seLinux:
 60 |     rule: RunAsAny
 61 |   supplementalGroups:
 62 |     rule: RunAsAny
 63 |   fsGroup:
 64 |     rule: RunAsAny
 65 | ```
 66 | 
 67 | ## Network Policies
 68 | 
 69 | ### Sonobuoy Aggregator Access
 70 | ```yaml
 71 | apiVersion: networking.k8s.io/v1
 72 | kind: NetworkPolicy
 73 | metadata:
 74 |   name: allow-sonobuoy
 75 |   namespace: sonobuoy
 76 | spec:
 77 |   podSelector: {}
 78 |   policyTypes:
 79 |   - Ingress
 80 |   - Egress
 81 |   ingress:
 82 |   - from:
 83 |     - namespaceSelector:
 84 |         matchLabels:
 85 |           kubernetes.io/metadata.name: sonobuoy
 86 |   egress:
 87 |   - to:
 88 |     - namespaceSelector: {}
 89 | ```
 90 | 
 91 | ## Image Pull Policies
 92 | 
 93 | ### Required Registry Access
 94 | ```yaml
 95 | apiVersion: operator.openshift.io/v1alpha1
 96 | kind: ImageContentSourcePolicy
 97 | metadata:
 98 |   name: sonobuoy-repo
 99 | spec:
100 |   repositoryDigestMirrors:
101 |   - mirrors:
102 |     - registry.example.com/supportability-review
103 |     source: rancher/supportability-review
104 |   - mirrors:
105 |     - registry.example.com/sonobuoy
106 |     source: rancher/mirrored-sonobuoy-sonobuoy
107 | ```
108 | 
109 | ## OPA Exempting Namespaces
110 | 
111 | ### Required Exemption
112 | ```yaml
113 | apiVersion: config.gatekeeper.sh/v1alpha1
114 | kind: Config
115 | metadata:
116 |   name: config
117 |   namespace: "gatekeeper-system"
118 | spec:
119 |   match:
120 |     - excludedNamespaces: ["sonobuoy"]
121 |       processes: ["*"]
122 | ```
123 | 
124 | 
125 | ## Troubleshooting Security Policies
126 | 
127 | ### Common Issues and Solutions
128 | 
129 | #### 1. Privilege Escalation Blocked
130 | ```yaml
131 | # Error:
132 | validation error: privileged containers are not allowed
133 | 
134 | # Solution:
135 | Add namespace exclusion for sonobuoy namespace in your policy
136 | ```
137 | 
138 | #### 2. Host Path Mounting Blocked
139 | ```yaml
140 | # Error:
141 | hostPath volumes are not allowed
142 | 
143 | # Solution:
144 | Modify PSP to allow hostPath volume types for sonobuoy namespace
145 | ```
146 | 
147 | #### 3. Network Policy Blocks
148 | ```yaml
149 | # Error:
150 | unable to connect to sonobuoy aggregator
151 | 
152 | # Solution:
153 | Ensure NetworkPolicy allows pod-to-pod communication in sonobuoy namespace
154 | ```
155 | 
156 | ## Best Practices
157 | 
158 | ### Security Policy Configuration
159 | 1. Use namespace-specific exclusions
160 | 2. Avoid blanket exemptions
161 | 3. Monitor policy audit logs
162 | 4. Regular policy review
163 | 
164 | ### Deployment Considerations
165 | 1. Use dedicated service accounts
166 | 2. Implement least-privilege access
167 | 3. Regular security audits
168 | 4. Documentation of exceptions
169 | 
170 | ## Support
171 | For additional assistance with security policy configuration, contact SUSE Rancher Support with:
172 | 1. Current policy configurations
173 | 2. Error messages
174 | 3. Cluster configuration details
175 | 


--------------------------------------------------------------------------------
/collection/rancher/v2.x/systems-information-v2/README.md:
--------------------------------------------------------------------------------
  1 | # Rancher 2.x Systems Summary v2
  2 | 
  3 | The script runs as a pod in the Rancher Management (local) cluster and collects information about the clusters managed by Rancher. The script collects the following information:
  4 | 
  5 | - Rancher server version and installation UUID
  6 | - Details of all clusters managed by Rancher, including:
  7 |   - Cluster ID and name
  8 |   - Kubernetes version
  9 |   - Provider type
 10 |   - Creation timestamp
 11 |   - Nodes associated with each cluster
 12 | - For each cluster, detailed information about each node, including:
 13 |   - Node ID and address
 14 |   - Role within the cluster
 15 |   - CPU and RAM capacity
 16 |   - Operating system and Docker version
 17 |   - Creation timestamp
 18 | - Total count of nodes across all clusters
 19 | 
 20 | ## How to use
 21 | 
 22 | Run the following command to deploy the script as a pod in the Rancher Management (local) cluster:
 23 | 
 24 | ```bash
 25 | # Deploy the pod in the cluster
 26 | kubectl apply -f https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/systems-information-v2/deploy.yaml
 27 | 
 28 | # Wait for the pod to reach Succeeded status
 29 | while [[ $(kubectl get pod rancher-systems-summary-pod -n cattle-system -o 'jsonpath={..status.phase}') != "Succeeded" ]]; do
 30 |   echo "Waiting for rancher-systems-summary-pod to complete..."
 31 |   sleep 5
 32 | done
 33 | 
 34 | # Follow the logs from the pod
 35 | kubectl logs -f pod/rancher-systems-summary-pod -n cattle-system
 36 | 
 37 | # Clean up the pod
 38 | kubectl delete pod/rancher-systems-summary-pod -n cattle-system
 39 | ```
 40 | 
 41 | > Note: It might take a few minutes for the pod to collect the information and display it in the logs. The script will exit after displaying the information, you should see `Total node count` at the end of the log output
 42 | 
 43 | Example output:
 44 | 
 45 | ```bash
 46 | Rancher Systems Summary Report
 47 | ==============================
 48 | Run on Mon Aug 12 16:46:44 UTC 2024
 49 | 
 50 | NAME                       READY   STATUS    RESTARTS      AGE
 51 | rancher-747c5647d7-5fmh7   2/2     Running   3 (63m ago)   94m
 52 | rancher-747c5647d7-76hjr   2/2     Running   5 (61m ago)   101m
 53 | rancher-747c5647d7-sfmlc   2/2     Running   2 (35m ago)   92m
 54 | Rancher version: v2.9.0
 55 | Rancher id: b82b0b06-6f0b-4052-9f17-3602499f07dc
 56 | 
 57 | Cluster Id     Name             K8s Version           Provider   Created                Nodes
 58 | c-m-mfc8m8z5   a1-ops-prd       v1.30.2+rke2r1        imported   2024-01-27T20:16:15Z   <none>
 59 | c-m-tncnvhrs   a1-harvester-prd v1.27.13+rke2r1       rke2       2023-12-11T00:52:36Z   <none>
 60 | local          a1-rancher-prd   v1.30.2+rke2r1        rke2       2023-08-13T08:46:40Z   <none>
 61 | 
 62 | --------------------------------------------------------------------------------
 63 | Cluster: a1-ops-prd (c-m-mfc8m8z5)
 64 | Node Id         Address                                         Role     CPU   RAM           OS       Docker Version   Created
 65 | machine-4m5rd   172.28.2.217,a1-ops-prd-medium-7962bbf5-wrc2t   <none>   8     16273392Ki    <none>   <none>           2024-07-10T18:28:25Z
 66 | machine-4tvh7   172.28.2.142,a1-ops-prd-mgmt-105e966c-xvlg7     <none>   8     16273396Ki    <none>   <none>           2024-07-09T13:19:54Z
 67 | machine-5dnpc   172.28.2.234,a1-ops-prd-large-ba0dc7eb-tpmh8    <none>   12    49228384Ki    <none>   <none>           2024-07-12T06:33:51Z
 68 | machine-bpmld   172.28.2.235,a1-ops-prd-large-ba0dc7eb-2xzfv    <none>   12    49228376Ki    <none>   <none>           2024-07-12T06:39:50Z
 69 | machine-hnhqb   172.28.2.185,a1-ops-prd-mgmt-105e966c-b68bx     <none>   8     16273400Ki    <none>   <none>           2024-07-08T05:36:20Z
 70 | machine-j7ckv   172.28.2.220,a1-ops-prd-medium-7962bbf5-sptzb   <none>   8     16273412Ki    <none>   <none>           2024-07-10T18:34:02Z
 71 | machine-lvljm   172.28.2.218,a1-ops-prd-small-8918c748-9hjl7    <none>   4     8029568Ki     <none>   <none>           2024-07-10T18:32:48Z
 72 | machine-q8blw   172.28.2.205,a1-ops-prd-small-8918c748-5wz8n    <none>   4     8029568Ki     <none>   <none>           2024-07-10T17:58:51Z
 73 | machine-rslml   172.28.2.222,a1-ops-prd-small-8918c748-rs7tf    <none>   4     8029564Ki     <none>   <none>           2024-07-10T21:55:58Z
 74 | machine-sv2n2   172.28.2.167,a1-ops-prd-mgmt-105e966c-fbtdz     <none>   8     16273400Ki    <none>   <none>           2024-07-08T13:29:51Z
 75 | machine-v5mxt   172.28.2.219,a1-ops-prd-small-8918c748-r9knc    <none>   4     8029556Ki     <none>   <none>           2024-07-10T18:33:35Z
 76 | machine-vs9tn   172.28.2.223,a1-ops-prd-medium-7962bbf5-lqfwj   <none>   8     16273400Ki    <none>   <none>           2024-07-10T21:54:43Z
 77 | machine-xjwjv   172.28.2.236,a1-ops-prd-large-ba0dc7eb-sbrfm    <none>   12    49228388Ki    <none>   <none>           2024-07-12T06:47:55Z
 78 | machine-z674w   172.28.2.221,a1-ops-prd-small-8918c748-tlzvx    <none>   4     8029560Ki     <none>   <none>           2024-07-10T21:06:23Z
 79 | Node count: 14
 80 | 
 81 | --------------------------------------------------------------------------------
 82 | Cluster: a1-harvester-prd (c-m-tncnvhrs)
 83 | Node Id         Address                   Role     CPU   RAM           OS       Docker Version   Created
 84 | machine-4rbqg   172.28.2.22,a1hrr720p02   <none>   24    396150564Ki   <none>   <none>           2023-12-11T01:32:03Z
 85 | machine-f864m   172.28.2.24,a1hrr720p04   <none>   24    264029632Ki   <none>   <none>           2024-02-10T00:54:14Z
 86 | machine-p5lqp   172.28.2.21,a1hrr720p01   <none>   24    264030104Ki   <none>   <none>           2023-12-11T00:54:08Z
 87 | machine-srwm6   172.28.2.23,a1hrr720p03   <none>   24    396150588Ki   <none>   <none>           2023-12-11T03:12:46Z
 88 | machine-wfv9d   172.28.2.25,a1hrr720p05   <none>   24    264049860Ki   <none>   <none>           2024-02-10T01:01:46Z
 89 | Node count: 5
 90 | 
 91 | --------------------------------------------------------------------------------
 92 | Cluster: a1-rancher-prd (local)
 93 | Node Id         Address                     Role     CPU   RAM          OS       Docker Version   Created
 94 | machine-5xwg6   172.28.4.191,a1ubranvp-02   <none>   16    32761048Ki   <none>   <none>           2024-07-07T09:03:53Z
 95 | machine-kplk9   172.28.4.116,a1ubranvp-03   <none>   16    32761056Ki   <none>   <none>           2024-07-07T08:55:21Z
 96 | machine-tgqhj   172.28.4.160,a1ubranvp-01   <none>   16    32761060Ki   <none>   <none>           2024-07-07T09:03:53Z
 97 | Node count: 3
 98 | --------------------------------------------------------------------------------
 99 | Total node count: 22
100 | ```
101 | 


--------------------------------------------------------------------------------
/collection/rancher/v2.x/systems-information-v2/deploy.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: rancher-systems-summary-pod
 5 |   namespace: cattle-system
 6 | spec:
 7 |   serviceAccountName: rancher
 8 |   containers:
 9 |     - name: rancher-systems-summary
10 |       image: rancherlabs/swiss-army-knife
11 |       command: ["/bin/bash", "-c"]
12 |       args:
13 |         - |
14 |           echo 'Rancher Systems Summary Report';
15 |           echo '==============================';
16 |           echo "Run on $(date)";
17 |           echo;
18 |           kubectl -n cattle-system get pods -l app=rancher;
19 |           echo "Rancher version: $(kubectl get settings.management.cattle.io server-version --no-headers -o custom-columns=version:value)";
20 |           echo "Rancher id: $(kubectl get settings.management.cattle.io install-uuid --no-headers -o custom-columns=id:value)";
21 |           echo;
22 |           kubectl get clusters.management.cattle.io -o custom-columns=Cluster\ Id:metadata.name,Name:spec.displayName,K8s\ Version:status.version.gitVersion,Provider:status.provider,Created:metadata.creationTimestamp,Nodes:status.appliedSpec.rancherKubernetesEngineConfig.nodes[*].address;
23 |           CLUSTER_IDS=$(kubectl get cluster.management.cattle.io --no-headers -o custom-columns=id:metadata.name);
24 |           for ID in $CLUSTER_IDS; do
25 |             CLUSTER_NAME=$(kubectl get cluster.management.cattle.io ${ID} --no-headers -o custom-columns=name:spec.displayName);
26 |             NODE_COUNT=$(kubectl get nodes.management.cattle.io -n ${ID} --no-headers 2>/dev/null | wc -l );
27 |             ((TOTAL_NODE_COUNT += NODE_COUNT));
28 |             echo;
29 |             echo '--------------------------------------------------------------------------------';
30 |             echo "Cluster: ${CLUSTER_NAME} (${ID})";
31 |              kubectl get nodes.management.cattle.io -n ${ID} -o custom-columns=Node\ Id:metadata.name,Address:status.internalNodeStatus.addresses[*].address,etcd:spec.etcd,Control\ Plane:spec.controlPlane,Worker:spec.worker,CPU:status.internalNodeStatus.capacity.cpu,RAM:status.internalNodeStatus.capacity.memory,OS:status.internalNodeStatus.nodeInfo.osImage,Container\ Runtime\ Version:status.internalNodeStatus.nodeInfo.containerRuntimeVersion,Created:metadata.creationTimestamp;
32 |             echo "Node count: ${NODE_COUNT}";
33 |           done;
34 |           echo '--------------------------------------------------------------------------------';
35 |           echo "Total node count: ${TOTAL_NODE_COUNT}";
36 |   restartPolicy: Never
37 | 


--------------------------------------------------------------------------------
/collection/rancher/v2.x/systems-information/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | MAINTAINER Rancher Support support@rancher.com
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | 
 5 | RUN apt-get update && apt-get install -yq --no-install-recommends \
 6 | curl \
 7 | msmtp \
 8 | && apt-get clean && rm -rf /var/lib/apt/lists/*
 9 | 
10 | ##Installing kubectl
11 | RUN curl -k -LO https://storage.googleapis.com/kubernetes-release/release/`curl -k -s https://storage.googleapis.com/kubernetes-release/release/stable.txt`/bin/linux/amd64/kubectl && mv kubectl /bin/kubectl && chmod +x /bin/kubectl
12 | 
13 | ADD *.sh /usr/bin/
14 | RUN chmod +x /usr/bin/*.sh
15 | 
16 | WORKDIR /root
17 | CMD /usr/bin/run.sh
18 | 


--------------------------------------------------------------------------------
/collection/rancher/v2.x/systems-information/README.md:
--------------------------------------------------------------------------------
 1 | # Rancher 2.x Systems Summary
 2 | 
 3 | The script needs to be downloaded and run directly on a host running a Rancher server container, either as a single node install or a Rancher Pod as part of a High Availability install. The script needs to be run by a user with access to the Docker socket or using `sudo`.
 4 | 
 5 | ## How to use
 6 | 
 7 | * Download the script and save as: `rancher2_systems_information.sh`
 8 | * Make sure the script is executable: `chmod +x rancher2_systems_information.sh`
 9 | * Run the script: `./rancher2_systems_information.sh`
10 | 


--------------------------------------------------------------------------------
/collection/rancher/v2.x/systems-information/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | if [[ -z $smtp_user ]]
 5 | then
 6 | cat << EOF > /etc/msmtprc
 7 | account default
 8 | host ${smtp_host}
 9 | port ${smtp_port}
10 | from ${from_address}
11 | logfile /var/log/msmtp.log
12 | EOF
13 | 
14 | else
15 | cat << EOF > /etc/msmtprc
16 | account default
17 | host ${smtp_host}
18 | port ${smtp_port}
19 | tls on
20 | tls_starttls on
21 | tls_certcheck off
22 | auth on
23 | user ${smtp_user}
24 | password ${smtp_pass}
25 | from ${from_address}
26 | logfile /var/log/msmtp.log
27 | EOF
28 | fi
29 | chmod 600 /etc/msmtprc
30 | 
31 | echo "Running Summary Report..."
32 | /usr/bin/systems_summary.sh | tee report.txt
33 | 
34 | echo "To: ${to_address}" > email.txt
35 | if [[ "$send_to_support" == "true" ]]
36 | then
37 |   echo "CC: support@support.tools" >> email.txt
38 | fi
39 | echo "From: ${from_address}" >> email.txt
40 | echo "Subject: Rancher Systems Summary Report - ${rancher_name}" >> email.txt
41 | cat report.txt >> email.txt
42 | cat email.txt | msmtp -a default ${to_address}
43 | 


--------------------------------------------------------------------------------
/collection/rancher/v2.x/systems-information/systems_summary.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Rancher Systems Summary Report"
 4 | echo "=============================="
 5 | echo "Run on `date`"
 6 | echo
 7 | 
 8 | if [[ ! -z $KUBERNETES_PORT ]];
 9 | then
10 |   RANCHER_POD=$(kubectl -n cattle-system get pods -l app=rancher --no-headers -o custom-columns=id:metadata.name --field-selector status.phase=Running | head -n1)
11 |   KUBECTL_CMD="kubectl -n cattle-system exec ${RANCHER_POD} -c rancher -- kubectl"
12 | else
13 |   if $(command -v rke2 >/dev/null 2>&1)
14 |   then
15 |     KUBECTL_CMD="/var/lib/rancher/rke2/bin/kubectl --kubeconfig=/etc/rancher/rke2/rke2.yaml"
16 |   elif $(command -v k3s >/dev/null 2>&1)
17 |   then
18 |     KUBECTL_CMD="k3s kubectl"
19 |   else
20 |     # Get docker id for rancher single node install
21 |     DOCKER_ID=$(docker ps | grep "rancher/rancher:" | cut -d' ' -f1)
22 |     if [ -z "${DOCKER_ID}" ]
23 |     then
24 |       # Get docker id for rancher ha install
25 |       DOCKER_ID=$(docker ps | grep "k8s_rancher_rancher" | cut -d' ' -f1 | head -1)
26 |       if [ -z "${DOCKER_ID}" ]
27 |       then
28 |         echo "Could not find Rancher 2 container, exiting..."
29 |         exit -1
30 |        fi
31 |     fi
32 |     KUBECTL_CMD="docker exec ${DOCKER_ID} kubectl"
33 |   fi
34 | fi
35 | 
36 | echo "Rancher version: $(${KUBECTL_CMD} get settings.management.cattle.io server-version --no-headers -o custom-columns=version:value)"
37 | echo "Rancher id: $(${KUBECTL_CMD} get settings.management.cattle.io install-uuid --no-headers -o custom-columns=id:value)"
38 | echo
39 | 
40 | ${KUBECTL_CMD} get clusters.management.cattle.io -o custom-columns=Cluster\ Id:metadata.name,Name:spec.displayName,K8s\ Version:status.version.gitVersion,Provider:status.driver,Created:metadata.creationTimestamp,Nodes:status.appliedSpec.rancherKubernetesEngineConfig.nodes[*].address
41 | 
42 | CLUSTER_IDS=$(${KUBECTL_CMD} get cluster.management.cattle.io --no-headers -o custom-columns=id:metadata.name)
43 | 
44 | for ID in $CLUSTER_IDS
45 | do
46 |   CLUSTER_NAME=$(${KUBECTL_CMD} get cluster.management.cattle.io ${ID} --no-headers -o custom-columns=name:spec.displayName)
47 |   NODE_COUNT=$(${KUBECTL_CMD} get nodes.management.cattle.io -n ${ID} --no-headers 2>/dev/null | wc -l )
48 |   ((TOTAL_NODE_COUNT += NODE_COUNT))
49 |   echo
50 |   echo "--------------------------------------------------------------------------------"
51 |   echo "Cluster: ${CLUSTER_NAME} (${ID})"
52 |   ${KUBECTL_CMD} get nodes.management.cattle.io -n ${ID} -o custom-columns=Node\ Id:metadata.name,Address:status.internalNodeStatus.addresses[*].address,Role:status.rkeNode.role[*],CPU:status.internalNodeStatus.capacity.cpu,RAM:status.internalNodeStatus.capacity.memory,OS:status.dockerInfo.OperatingSystem,Docker\ Version:status.dockerInfo.ServerVersion,Created:metadata.creationTimestamp
53 |   echo "Node count: ${NODE_COUNT}"
54 | done
55 | echo "--------------------------------------------------------------------------------"
56 | echo "Total node count: ${TOTAL_NODE_COUNT}"
57 | 


--------------------------------------------------------------------------------
/collection/rancher/v2.x/windows-log-collector/README.md:
--------------------------------------------------------------------------------
 1 | # Rancher v2.x Windows log-collector
 2 | 
 3 | This logs collector project was created to collect logs from Windows Kubernetes nodes. It is designed to be used with RKE1 Windows clusters for troubleshooting support cases.
 4 | 
 5 | ## Usage
 6 | 
 7 | - Open a new PowerShell window with Administrator Privileges (Find Windows PowerShell in Start Menu, right click, Run As Administrator)
 8 | - Run the following commands in your PowerShell window
 9 | 
10 | ```ps1
11 | Set-ExecutionPolicy Bypass
12 | Start-BitsTransfer https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/windows-log-collector/win-log-collect.ps1
13 | .\win-log-collect.ps1
14 | ```
15 | 
16 | ### Upon successful completion, your log bundle will be on the root of the C: drive (example below)
17 | 
18 | ```
19 | > dir C:\
20 | d-----       11/14/2018   6:56 AM                EFI
21 | d-----         6/2/2020   3:31 PM                etc
22 | d-----         6/2/2020   3:31 PM                opt
23 | d-----        5/13/2020   6:03 PM                PerfLogs
24 | d-r---        5/13/2020   5:25 PM                Program Files
25 | d-----         6/2/2020   3:16 PM                Program Files (x86)
26 | d-----         6/2/2020   7:23 PM                rancher
27 | d-----         6/2/2020   4:06 PM                run
28 | d-r---         6/1/2020   6:30 PM                Users
29 | d-----         6/2/2020   3:31 PM                var
30 | d-----         6/1/2020   6:26 PM                Windows
31 | -a----         6/2/2020   5:07 PM         428911 rancher_EC2AMAZ-ENEJ0H8_20200602T1704290242Z.tgz
32 | ```
33 | 
34 | ### Expected output
35 | 
36 | > Note: The `Unable to Collect Windows Firewall information` error is expected if it there are no Domain specific firewall rules
37 | 
38 | ```ps1
39 | Running Rancher Log Collection
40 | Creating temporary directory
41 | OK
42 | Collecting System information
43 | OK
44 | Collecting PS output
45 | Collecting Disk information
46 | Collecting Volume info
47 | OK
48 | Collecting Windows Firewall info
49 | Collecting Rules for Domain profile
50 | get_firewall_info : Unable to Collect Windows Firewall information
51 | At C:\Users\Administrator\log-collect-beta.ps1:397 char:5
52 | +     get_firewall_info
53 | +     ~~~~~~~~~~~~~~~~~
54 |     + CategoryInfo          : NotSpecified: (:) [Write-Error], WriteErrorException
55 |     + FullyQualifiedErrorId : Microsoft.PowerShell.Commands.WriteErrorException,get_firewall_info
56 | 
57 | Collecting installed applications list
58 | OK
59 | Collecting Services list
60 | OK
61 | Collecting Docker daemon information
62 | OK
63 | Collecting Kubernetes components config
64 | OK
65 | Collecting Windows Event logs
66 | OK
67 | Collecting Kubernetes Logs
68 | OK
69 | Collecting network Information
70 | OK
71 | Collecting group policy information
72 | Get-GPOReport is not a valid cmdlet
73 | Collecting proxy information
74 | OK
75 | Archiving Rancher log collection script data
76 | OK
77 | Done. Your log bundle is located in  C:\rancher_EC2AMAZ-ENEJ0H8_20200602T1704290242Z
78 | Please supply the log bundle(s) to Rancher Support
79 | Cleaning up directory
80 | OK
81 | ```
82 | 


--------------------------------------------------------------------------------
/eks-upgrade-using-api/README.md:
--------------------------------------------------------------------------------
 1 | # SURE-5880 Support Script
 2 | 
 3 | ## Purpose
 4 | 
 5 | This script is designed to be used to upgrade EKS clusters using the Rancher API. Its been specifically designed for Rancher v2.6.10 and upgrading EKS clusters from 1.22 to 1.23 (whilst a UI issue prevents this).
 6 | 
 7 | ## Requirements
 8 | 
 9 | This script requires the following:
10 | 
11 | - jq
12 | - cURL
13 | - Rancher API Endpoint
14 | - Rancher API Token
15 | 
16 | ## Demo
17 | 
18 | ![demo](demo.gif)
19 | 
20 | ## Usage
21 | 
22 | 1. Create an API key in Rancher.The key can be scoped per cluster or with no scope. Its easier to have no scope as you can use the same API key for all cluster upgrades.
23 |   2. Note down the **Bearer Token** API key
24 |   3. Note down the API Endpoint
25 | 2. Open a terminal
26 | 3. Export environment variables for the key and endpoint
27 | 
28 | ```bash
29 | export RANCHER_TOKEN="<YOUR BEARER TOKEN FROM ABOVE>"
30 | export RANCHER_API="<YOUR RANCHER API ENDPOINT>"
31 | ```
32 | 
33 | 4. Get a list of your EKS clusters using this command
34 | 
35 | ```bash
36 | # For v2
37 | ./eks-support.sh list -t $RANCHER_TOKEN --endpoint $RANCHER_API
38 | # For v1
39 | ./eks-support.sh list -t $RANCHER_TOKEN --endpoint $RANCHER_API --kev1
40 | ```
41 | 
42 | > The output will list all the found EKS clusters with their name, id, current version and state.
43 | 
44 | ### Upgrading EKS Clusters
45 | 
46 | 1. For each EKS cluster you want to upgrade run the following command:
47 | 
48 | ```bash
49 | # For v2
50 | ./eks-support.sh upgrade -t $RANCHER_TOKEN --endpoint $RANCHER_API --from 1.22 --to 1.23 --name <EKS_CLUSTER_NAME>
51 | # For v1
52 | ./eks-support.sh upgrade -t $RANCHER_TOKEN --endpoint $RANCHER_API --from 1.22 --to 1.23 --name <EKS_CLUSTER_NAME> --aws-secret-key "<AWS SECRET FOR CLUSTER>" --kev1
53 | ```
54 | 
55 | > Replace the values of --from, --to and --name with your values.
56 | 
57 | 2. The cluster will start to upgrade. You can check the status of a specific cluster using this command:
58 | 
59 | ```bash
60 | ./eks-support.sh status -t $RANCHER_TOKEN --endpoint $RANCHER_API --name richtest1
61 | ```
62 | 
63 | ### Unsetting Node Groups as managed fields for imported EKS Clusters (only for KEv2)
64 | 
65 | ```bash
66 | # For v2
67 | ./eks-support.sh unset_nodegroups -t $RANCHER_TOKEN --endpoint $RANCHER_API --name <EKS_CLUSTER_NAME>
68 | ```
69 | 
70 | 


--------------------------------------------------------------------------------
/eks-upgrade-using-api/common.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This containers common functions for shell scripts. Its
 4 | # meant to be source included into another script.
 5 | 
 6 | ## HELPER FUNCS
 7 | 
 8 | # Send a green message to stdout, followed by a new line
 9 | say() {
10 | 	[ -t 1 ] && [ -n "$TERM" ] &&
11 | 		echo "$(tput setaf 2)[$MY_NAME]$(tput sgr0) $*" ||
12 | 		echo "[$MY_NAME] $*"
13 | }
14 | 
15 | # Send a green message to stdout, without a trailing new line
16 | say_noln() {
17 | 	[ -t 1 ] && [ -n "$TERM" ] &&
18 | 		echo -n "$(tput setaf 2)[$MY_NAME]$(tput sgr0) $*" ||
19 | 		echo "[$MY_NAME] $*"
20 | }
21 | 
22 | # Send a red message to stdout, followed by a new line
23 | say_err() {
24 | 	[ -t 2 ] && [ -n "$TERM" ] &&
25 | 		echo -e "$(tput setaf 1)[$MY_NAME] $*$(tput sgr0)" 1>&2 ||
26 | 		echo -e "[$MY_NAME] $*" 1>&2
27 | }
28 | 
29 | # Send a yellow message to stdout, followed by a new line
30 | say_warn() {
31 | 	[ -t 1 ] && [ -n "$TERM" ] &&
32 | 		echo "$(tput setaf 3)[$MY_NAME] $*$(tput sgr0)" ||
33 | 		echo "[$MY_NAME] $*"
34 | }
35 | 
36 | # Send a yellow message to stdout, without a trailing new line
37 | say_warn_noln() {
38 | 	[ -t 1 ] && [ -n "$TERM" ] &&
39 | 		echo -n "$(tput setaf 3)[$MY_NAME] $*$(tput sgr0)" ||
40 | 		echo "[$MY_NAME] $*"
41 | }
42 | 
43 | # Exit with an error message and (optional) code
44 | # Usage: die [-c <error code>] <error message>
45 | die() {
46 | 	code=1
47 | 	[[ "$1" = "-c" ]] && {
48 | 		code="$2"
49 | 		shift 2
50 | 	}
51 | 	say_err "$@"
52 | 	exit "$code"
53 | }
54 | 
55 | # Exit with an error message if the last exit code is not 0
56 | ok_or_die() {
57 | 	code=$?
58 | 	[[ $code -eq 0 ]] || die -c $code "$@"
59 | }
60 | 
61 | ## MAIN
62 | main() {
63 |     if [ $# = 0 ]; then
64 |     die "No command provided. Please use \`$0 help\` for help."
65 |     fi
66 | 
67 |     # Parse main command line args.
68 |     while [ $# -gt 0 ]; do
69 |         case "$1" in
70 |             -h | --help)
71 |                 cmd_help
72 |                 exit 1
73 |                 ;;
74 |             -*)
75 |                 die "Unknown arg: $1. Please use \`$0 help\` for help."
76 | 			;;
77 |             *)
78 |                 break
79 |             ;;
80 |         esac
81 |         shift
82 |     done
83 | 
84 |     # $1 is now a command name. Check if it is a valid command and, if so,
85 |     # run it.
86 |     #
87 |     declare -f "cmd_$1" >/dev/null
88 |     ok_or_die "Unknown command: $1. Please use \`$0 help\` for help."
89 | 
90 |     cmd=cmd_$1
91 |     shift
92 | 
93 |     # $@ is now a list of command-specific args
94 |     #
95 |     $cmd "$@"
96 | }


--------------------------------------------------------------------------------
/eks-upgrade-using-api/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rancherlabs/support-tools/2fa26cd1c13cc329f5553f88adfe693ee978848e/eks-upgrade-using-api/demo.gif


--------------------------------------------------------------------------------
/eks-upgrade-using-kubectl/README.md:
--------------------------------------------------------------------------------
 1 | # SURE-5880 Support Script
 2 | 
 3 | ## Purpose
 4 | 
 5 | This script is designed to be used to upgrade EKS clusters using kubectl. Its been specifically designed for Rancher v2.6.10 and upgrading EKS clusters from 1.22 to 1.23 (whilst a UI issue prevents this).
 6 | 
 7 | ## Requirements
 8 | 
 9 | This script requires the following:
10 | 
11 | - jq
12 | - kubectl
13 | 
14 | ## Usage
15 | 
16 | 1. Open a terminal
17 | 2. Export environment variables for the path to the kubeconfig for your Rancher cluster
18 | 
19 | ```bash
20 | export RANCHER_KUBE="<PATH TO YOUR RANCHER KUBECONFIG>"
21 | ```
22 | 
23 | ### Upgrading EKS Clusters
24 | 
25 | 1. Get a list of your EKS clusters using this command
26 | 
27 | ```bash
28 | # For v2 
29 | ./eks-support.sh list -k $RANCHER_KUBE
30 | # For v1
31 | ./eks-support.sh list -k $RANCHER_KUBE --kev1
32 | ```
33 | 
34 | 2. For each EKS cluster you want to upgrade run the following command:
35 | 
36 | ```bash
37 | # For v2 
38 | ./eks-support.sh upgrade -k $RANCHER_KUBE --from 1.22 --to 1.23 --nname <EKS_CLUSTER_NAME>
39 | # For v1
40 | ./eks-support.sh upgrade -k $RANCHER_KUBE --from 1.22 --to 1.23 --name <EKS_CLUSTER_NAME> --kev1
41 | ```
42 | 
43 | > Replace the values of --from, --to and --name with your values.
44 | 
45 | ### Unsetting Node Groups as managed fields for imported EKS Clusters (only for KEv2)
46 | 
47 | ```bash
48 | # For v2
49 | ./eks-support.sh unset_nodegroups -k $RANCHER_KUBE --name <EKS_CLUSTER_NAME>
50 | ```
51 | 


--------------------------------------------------------------------------------
/eks-upgrade-using-kubectl/common.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This containers common functions for shell scripts. Its
 4 | # meant to be source included into another script.
 5 | 
 6 | ## HELPER FUNCS
 7 | 
 8 | # Send a green message to stdout, followed by a new line
 9 | say() {
10 | 	[ -t 1 ] && [ -n "$TERM" ] &&
11 | 		echo "$(tput setaf 2)[$MY_NAME]$(tput sgr0) $*" ||
12 | 		echo "[$MY_NAME] $*"
13 | }
14 | 
15 | # Send a green message to stdout, without a trailing new line
16 | say_noln() {
17 | 	[ -t 1 ] && [ -n "$TERM" ] &&
18 | 		echo -n "$(tput setaf 2)[$MY_NAME]$(tput sgr0) $*" ||
19 | 		echo "[$MY_NAME] $*"
20 | }
21 | 
22 | # Send a red message to stdout, followed by a new line
23 | say_err() {
24 | 	[ -t 2 ] && [ -n "$TERM" ] &&
25 | 		echo -e "$(tput setaf 1)[$MY_NAME] $*$(tput sgr0)" 1>&2 ||
26 | 		echo -e "[$MY_NAME] $*" 1>&2
27 | }
28 | 
29 | # Send a yellow message to stdout, followed by a new line
30 | say_warn() {
31 | 	[ -t 1 ] && [ -n "$TERM" ] &&
32 | 		echo "$(tput setaf 3)[$MY_NAME] $*$(tput sgr0)" ||
33 | 		echo "[$MY_NAME] $*"
34 | }
35 | 
36 | # Send a yellow message to stdout, without a trailing new line
37 | say_warn_noln() {
38 | 	[ -t 1 ] && [ -n "$TERM" ] &&
39 | 		echo -n "$(tput setaf 3)[$MY_NAME] $*$(tput sgr0)" ||
40 | 		echo "[$MY_NAME] $*"
41 | }
42 | 
43 | # Exit with an error message and (optional) code
44 | # Usage: die [-c <error code>] <error message>
45 | die() {
46 | 	code=1
47 | 	[[ "$1" = "-c" ]] && {
48 | 		code="$2"
49 | 		shift 2
50 | 	}
51 | 	say_err "$@"
52 | 	exit "$code"
53 | }
54 | 
55 | # Exit with an error message if the last exit code is not 0
56 | ok_or_die() {
57 | 	code=$?
58 | 	[[ $code -eq 0 ]] || die -c $code "$@"
59 | }
60 | 
61 | ## MAIN
62 | main() {
63 |     if [ $# = 0 ]; then
64 |     die "No command provided. Please use \`$0 help\` for help."
65 |     fi
66 | 
67 |     # Parse main command line args.
68 |     while [ $# -gt 0 ]; do
69 |         case "$1" in
70 |             -h | --help)
71 |                 cmd_help
72 |                 exit 1
73 |                 ;;
74 |             -*)
75 |                 die "Unknown arg: $1. Please use \`$0 help\` for help."
76 | 			;;
77 |             *)
78 |                 break
79 |             ;;
80 |         esac
81 |         shift
82 |     done
83 | 
84 |     # $1 is now a command name. Check if it is a valid command and, if so,
85 |     # run it.
86 |     #
87 |     declare -f "cmd_$1" >/dev/null
88 |     ok_or_die "Unknown command: $1. Please use \`$0 help\` for help."
89 | 
90 |     cmd=cmd_$1
91 |     shift
92 | 
93 |     # $@ is now a list of command-specific args
94 |     #
95 |     $cmd "$@"
96 | }


--------------------------------------------------------------------------------
/extended-rancher-2-cleanup/README.md:
--------------------------------------------------------------------------------
 1 | ## Extended Rancher 2 Cleanup
 2 | 
 3 | This script is designed to clean a node provisioned with the RKE1 distribution using Rancher or the RKE CLI.
 4 | 
 5 | The node will be cleaned of all state to ensure it is consistent to reuse in a cluster or other use case.
 6 | 
 7 | For [RKE2](https://docs.rke2.io/install/uninstall) and [K3s](https://rancher.com/docs/k3s/latest/en/installation/uninstall/) nodes, use the uninstall.sh script created during installation
 8 | 
 9 | > **Warning** this script will delete all containers, volumes, network interfaces, and directories that relate to Rancher and Kubernetes. It will also flush all iptables rules and optionally delete container images.
10 | 
11 | > It is important to perform pre-checks, and backup the node as needed before proceeding with any steps below.
12 | 
13 | ### Running the script
14 | 
15 | #### Download the script
16 | ```bash
17 | curl -LO https://github.com/rancherlabs/support-tools/raw/master/extended-rancher-2-cleanup/extended-cleanup-rancher2.sh
18 | ```
19 | #### Run the script as root, or prefix with sudo
20 | ```bash
21 | bash extended-cleanup-rancher2.sh
22 | ```
23 | 
24 | ### Usage
25 | 
26 | ```bash
27 | # bash extended-cleanup-rancher2.sh -h
28 | Rancher 2.x extended cleanup
29 |   Usage: bash extended-cleanup-rancher2.sh [ -f -i -s ]
30 | 
31 |   All flags are optional
32 | 
33 |   -f | --skip-iptables      Skip flush of iptables rules
34 |   -i | --delete-images      Cleanup all container images
35 |   -s | --delete-snapshots   Cleanup all etcd snapshots
36 |   -h                        This help menu
37 | 
38 |     !! Warning, this script flushes iptables rules, removes containers, and all data specific to Kubernetes and Rancher
39 |     !! Docker will be restarted when flushing iptables rules
40 |     !! Backup data as needed before running this script
41 |     !! Use at your own risk
42 | ```
43 | 


--------------------------------------------------------------------------------
/extended-rancher-2-cleanup/extended-cleanup-rancher2.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Cleanup for nodes provisioned using the RKE1 distribution
  4 | # Note, for RKE2 and K3s use the uninstall script deployed on the node during install. 
  5 | 
  6 | # Directories to cleanup
  7 | CLEANUP_DIRS=(/etc/ceph /etc/cni /etc/kubernetes /opt/cni /run/secrets/kubernetes.io /run/calico /run/flannel /var/lib/calico /var/lib/weave /var/lib/etcd /var/lib/cni /var/lib/kubelet /var/lib/rancher/rke/log /var/log/containers /var/log/pods /var/run/calico)
  8 | 
  9 | # Interfaces to cleanup
 10 | CLEANUP_INTERFACES=(flannel.1 cni0 tunl0 weave datapath vxlan-6784)
 11 | 
 12 | run() {
 13 | 
 14 |   CONTAINERS=$(docker ps -qa)
 15 |   if [[ -n ${CONTAINERS} ]]
 16 |     then
 17 |       cleanup-containers
 18 |     else
 19 |       techo "No containers exist, skipping container cleanup..."
 20 |   fi
 21 |   cleanup-dirs
 22 |   cleanup-interfaces
 23 |   VOLUMES=$(docker volume ls -q)
 24 |   if [[ -n ${VOLUMES} ]]
 25 |     then
 26 |       cleanup-volumes
 27 |     else
 28 |       techo "No volumes exist, skipping container volume cleanup..."
 29 |   fi
 30 |   if [[ ${DELETE_IMAGES} -eq 1 ]]
 31 |     then
 32 |       IMAGES=$(docker images -q)
 33 |       if [[ -n ${IMAGES} ]]
 34 |         then
 35 |           cleanup-images
 36 |         else
 37 |           techo "No images exist, skipping container image cleanup..."
 38 |       fi
 39 |   fi
 40 |   if [[ -z ${SKIP_FLUSH_IPTABLES} ]]
 41 |     then
 42 |       flush-iptables
 43 |     else
 44 |       techo "Skipping flush of iptables rules..."
 45 |   fi
 46 |   techo "Done!"
 47 | 
 48 | }
 49 | 
 50 | cleanup-containers() {
 51 | 
 52 |   techo "Removing containers..."
 53 |   docker rm -f $(docker ps -qa)
 54 | 
 55 | }
 56 | 
 57 | cleanup-dirs() {
 58 | 
 59 |   techo "Unmounting filesystems..."
 60 |   for mount in $(mount | grep '/var/lib/kubelet' | awk '{ print $3 }')
 61 |     do
 62 |       umount -f $mount
 63 |   done
 64 | 
 65 |   if [ -n "${DELETE_SNAPSHOTS}" ]
 66 |     then
 67 |       techo "Removing etcd snapshots..."
 68 |       rm -rf /opt/rke
 69 |   fi
 70 | 
 71 |   techo "Removing directories..."
 72 |   for DIR in "${CLEANUP_DIRS[@]}"
 73 |     do
 74 |       techo "Removing $DIR"
 75 |       rm -rf $DIR
 76 |   done
 77 | 
 78 | }
 79 | 
 80 | cleanup-images() {
 81 | 
 82 |   techo "Removing images..."
 83 |   docker rmi -f $(docker images -q)
 84 | 
 85 | }
 86 | 
 87 | cleanup-interfaces() {
 88 | 
 89 |   techo "Removing interfaces..."
 90 |   for INTERFACE in "${CLEANUP_INTERFACES[@]}"
 91 |     do
 92 |       if $(ip link show ${INTERFACE} > /dev/null 2>&1)
 93 |         then
 94 |           techo "Removing $INTERFACE"
 95 |           ip link delete $INTERFACE
 96 |       fi
 97 |   done
 98 | 
 99 | }
100 | 
101 | cleanup-volumes() {
102 | 
103 |   techo "Removing volumes..."
104 |   docker volume rm $(docker volume ls -q)
105 | 
106 | }
107 | 
108 | flush-iptables() {
109 | 
110 |   techo "Flushing iptables..."
111 |   iptables -F -t nat
112 |   iptables -X -t nat
113 |   iptables -F -t mangle
114 |   iptables -X -t mangle
115 |   iptables -F
116 |   iptables -X
117 |   techo "Restarting Docker..."
118 |   if systemctl list-units --full -all | grep -q docker.service
119 |     then
120 |       systemctl restart docker
121 |     else
122 |       /etc/init.d/docker restart
123 |   fi
124 | 
125 | }
126 | 
127 | help() {
128 | 
129 |   echo "Rancher 2.x extended cleanup
130 |   Usage: bash extended-cleanup-rancher2.sh [ -f -i -s ]
131 | 
132 |   All flags are optional
133 | 
134 |   -f | --skip-iptables      Skip flush of iptables rules
135 |   -i | --delete-images      Cleanup all container images
136 |   -s | --delete-snapshots   Cleanup all etcd snapshots
137 |   -h                        This help menu
138 | 
139 |     !! Warning, this script flushes iptables rules, removes containers, and all data specific to Kubernetes and Rancher
140 |     !! Docker will be restarted when flushing iptables rules
141 |     !! Backup data as needed before running this script
142 |     !! Use at your own risk"
143 | 
144 | }
145 | 
146 | timestamp() {
147 | 
148 |   date "+%Y-%m-%d %H:%M:%S"
149 | 
150 | }
151 | 
152 | techo() {
153 | 
154 |   echo "$(timestamp): $*"
155 | 
156 | }
157 | 
158 | # Check if we're running as root.
159 | if [[ $EUID -ne 0 ]]
160 |   then
161 |     techo "This script must be run as root"
162 |     exit 1
163 | fi
164 | 
165 | while test $# -gt 0
166 |   do
167 |     case ${1} in
168 |       -f|--skip-iptables)
169 |         shift
170 |         SKIP_FLUSH_IPTABLES=1
171 |         ;;
172 |       -i|--delete-images)
173 |         shift
174 |         DELETE_IMAGES=1
175 |         ;;
176 |       -s|--delete-snapshots)
177 |         shift
178 |         DELETE_SNAPSHOTS=1
179 |         ;;
180 |       h)
181 |         help && exit 0
182 |         ;;
183 |       *)
184 |         help && exit 0
185 |     esac
186 | done
187 | 
188 | # Run the cleanup
189 | run


--------------------------------------------------------------------------------
/files/curl-format.txt:
--------------------------------------------------------------------------------
 1 | http_code: %{http_code}\n
 2 | http_connect: %{http_connect}\n
 3 | time_total: %{time_total}\n
 4 | time_namelookup: %{time_namelookup}\n
 5 | time_connect: %{time_connect}\n
 6 | time_appconnect: %{time_appconnect}\n
 7 | time_pretransfer: %{time_pretransfer}\n
 8 | time_redirect: %{time_redirect}\n
 9 | time_starttransfer: %{time_starttransfer}\n
10 | size_download: %{size_download}\n
11 | size_upload: %{size_upload}\n
12 | size_header: %{size_header}\n
13 | size_request: %{size_request}\n
14 | speed_download: %{speed_download}\n
15 | speed_upload: %{speed_upload}\n
16 | content_type: %{content_type}\n
17 | num_connects: %{num_connects}\n
18 | num_redirects :%{num_redirects}\n
19 | 


--------------------------------------------------------------------------------
/fleet-delete-cluster-registration/README.md:
--------------------------------------------------------------------------------
1 | # Fleet | Registration Resource Cleanup
2 | 
3 | This is a cleanup script to work around a known Fleet bug whereby patching a downstream cluster, for instance when re-deploying a Fleet agent in such a cluster, causes new resources to be created without obsolete resources being deleted. Ultimately, this clutters the upstream cluster.
4 | 
5 | This script retrieves all cluster registration resources, orders them by cluster then by creation timestamp, and deletes all but the youngest cluster registration for each cluster. This causes obsolete cluster registrations and their child resources to be deleted.


--------------------------------------------------------------------------------
/fleet-delete-cluster-registration/delete_old_resources.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | namespace=${1-fleet-default}
 4 | chunk_size=${2-100}
 5 | 
 6 | if [ "$chunk_size" -le 1 ]; then
 7 |     chunk_size=1
 8 | fi
 9 | 
10 | # We output the cluster name first, then the creation timestamp, then the
11 | # resource name for ordering to work by cluster, then by ascending creation
12 | # timestamp, which is in "YYYY-MM-DDTHH:mm:SSZ" format.
13 | jsonPath='{range .items[*]}{@.status.clusterName}{"_"}{@.metadata.creationTimestamp}{"_"}{@.metadata.name}{"\n"}{end}'
14 | cluster_regs=$(kubectl get clusterregistration -o=jsonpath="$jsonPath" -n "$namespace" | sort)
15 | 
16 | read -ra regs -d '' <<< "${cluster_regs}"
17 | 
18 | # delete_chunk deletes cluster registrations, extracting their names from $regs
19 | # This function operates on set of indexes between first_idx (first argument)
20 | # and last_chunk_idx (second argument), both included.
21 | delete_chunk() {
22 |     first_idx=$1
23 |     last_idx=$2
24 | 
25 |     for (( i = first_idx; i < last_idx; i++ )); do
26 |         IFS=_ read -r cluster_name creation_timestamp name <<< "${regs[i]}"
27 |         IFS=_ read -r next_cluster_name next_creation_timestamp next_name <<< "${regs[i+1]}"
28 | 
29 |         if [[ "$next_cluster_name" = "$cluster_name" ]]; then
30 |             # The most recent cluster registration is still ahead of us: deletion is safe.
31 |             echo -n "Cluster: $cluster_name"
32 |             echo -e "\t$(kubectl delete --ignore-not-found=true clusterregistration "$name" -n "$namespace")"
33 |         fi
34 |     done
35 | }
36 | 
37 | declare -a pids
38 | 
39 | # The only resource we do not want to delete for each cluster is the last
40 | # element, most recently created.
41 | last_idx=$(( ${#regs[@]} - 1 ))
42 | if [ $chunk_size -ge $last_idx ]; then
43 |     chunk_size=$last_idx
44 | fi
45 | 
46 | # Start an async deletion process for each chunk.
47 | for (( i = 0; i < last_idx; i+= chunk_size )); do
48 |     last_chunk_idx=$(( i + chunk_size - 1 ))
49 |     if [ $last_chunk_idx -ge $last_idx ]; then
50 |         last_chunk_idx="$last_idx"
51 |     fi
52 | 
53 |     delete_chunk $i $last_chunk_idx &
54 |     pids[${i}]=$!
55 | done
56 | 
57 | # wait for deletion to complete on all chunks.
58 | for pid in ${pids[@]}; do
59 |     wait $pid
60 | done
61 | 


--------------------------------------------------------------------------------
/fleet-secrets-bro-patch/README.md:
--------------------------------------------------------------------------------
 1 | # Fleet | GitRepo Secret Backup Restore Patch
 2 | 
 3 | This is a patching script to ensure all secrets used by Fleet `GitRepos` are backed up by the Rancher Backups tool.
 4 | 
 5 | From Rancher v2.8.?? (TBD) and v2.9.0 all `Secrets` created via the Fleet UI in Rancher will be included in Rancher Backups.
 6 | 
 7 | Any GitRepo `Secrets` created before this, or outside of the Fleet UI in Rancher, will not be included in Rancher Backups.
 8 | 
 9 | By running this patching script on your Rancher cluster, it will identify all secrets used by GitRepos and label them as managed by Fleet. This labeling ensures they are backed up by Rancher Backups.
10 | 
11 | ## Running the script
12 | To run this script you simply need a valid KUBECONFIG to connect to your Rancher cluster. Then execute the shell script:
13 | > ./patch_gitrepo_secrets.sh
14 | 
15 | When run you should see output similar to:
16 | 
17 | ```bash
18 | # ./patch_gitrepo_secrets.sh
19 | Patching unique secret combinations:
20 | Patching secret: fleet-default:auth-helm-creds
21 | secret/auth-helm-creds patched
22 | Patching secret: fleet-local:auth-gitlab-creds
23 | secret/auth-gitlab-creds patched (no change)
24 | ```
25 | 
26 | Note: If the secret already has the necessary label it will look like the `secret/auth-gitlab-creds` line above.
27 | 
28 | ### Dry-run
29 | Optionally you can run the script with dry-run flag `-D`, it will produce output like:
30 | ```bash
31 | # ./patch_gitrepo_secrets.sh -D
32 | Patching unique secret combinations:
33 | Would patch secret: fleet-default/auth-6w5gn
34 | Would patch secret: fleet-default/auth-lfkdr
35 | Would patch secret: fleet-local/auth-gitlab-creds
36 | ```


--------------------------------------------------------------------------------
/fleet-secrets-bro-patch/patch_gitrepo_secrets.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | DRYRUN=0
 4 | 
 5 | while getopts "D" opt; do
 6 |   case $opt in
 7 |     D) DRYRUN=1;;
 8 |     \?) echo "Invalid option: -$OPTARG"; exit 1;;
 9 |   esac
10 | done
11 | 
12 | output=$(kubectl get gitrepo -A -o custom-columns=NAMESPACE:.metadata.namespace,CLIENT:.spec.clientSecretName,HELM:.spec.helmSecretName,HELMPATHS:.spec.helmSecretNameForPaths --no-headers)
13 | 
14 | secret_combinations=()
15 | while read -r row; do
16 |   # Extract the namespace and potential secret names from each row
17 |   namespace=$(echo "$row" | awk '{print $1}')
18 |   read -r -a secrets <<< "$(echo "$row" | awk '{print $2, $3, $4}')"
19 |   # Create a list of secret combinations for this namespace
20 |   for secret in "${secrets[@]}"; do
21 |     if [ "$secret" != "<none>" ]; then
22 |       secret_combinations+=("$namespace:$secret")
23 |     fi
24 |   done
25 | done <<< "$(echo "$output" | awk '{print $0}')"
26 | 
27 | # Sort and uniq the list of secret combinations
28 | sorted_secret_combinations=($(printf "%s\n" "${secret_combinations[@]}" | sort -u))
29 | 
30 | echo "Patching unique secret combinations:"
31 | for combination in "${sorted_secret_combinations[@]}"; do
32 |   # Set the delimiter
33 |   IFS=':'
34 |   # Read the input string into two variables
35 |   read -r namespace name <<< "$combination"
36 |   if [ $DRYRUN -eq 1 ]; then
37 |     echo "[DRY-RUN] Would patch secret: $namespace/$name"
38 |   else
39 |     echo "Patching secret: $combination"
40 |     kubectl patch secret -n "$namespace" "$name" -p '{"metadata": {"labels": {"fleet.cattle.io/managed": "true"}}}'
41 |   fi
42 | done


--------------------------------------------------------------------------------
/how-to-retrieve-kubeconfig-from-custom-cluster/README.md:
--------------------------------------------------------------------------------
 1 | # How to retrieve a kubeconfig from an RKE1 cluster
 2 | 
 3 | During a Rancher outage or other disaster event you may lose access to a downstream cluster via Rancher and be unable to manage your applications. This process creates a kubeconfig to bypass Rancher, it connects directly to the local kube-apiserver on a control plane node.
 4 | 
 5 | **Note**: The [Authorised Cluster Endpoint (ACE)](https://ranchermanager.docs.rancher.com/how-to-guides/new-user-guides/manage-clusters/access-clusters/use-kubectl-and-kubeconfig#authenticating-directly-with-a-downstream-cluster) is a default option enabled on clusters provisioned by Rancher, this contains a second context which connects directly to the downstream kube-apiserver and also bypasses Rancher.
 6 | 
 7 | ### Pre-requisites
 8 | 
 9 | - Rancher v2.2.x or newer
10 | - RKE v0.2.x or newer
11 | - SSH access to one of the controlplane nodes
12 | - Access to the Docker CLI or root/sudo
13 | 
14 | ## Retrieve a kubeconfig - using jq
15 | 
16 | This option requires `kubectl` and `jq` to be installed on the server.
17 | 
18 | **Note**: kubectl can be copied from the kubelet container
19 | 
20 | ```bash
21 | docker cp kubelet:/usr/local/bin/kubectl /usr/local/bin/
22 | ```
23 | 
24 | - Get kubeconfig (Rancher 2.7.14+/Rancher 2.8.5+, RKE 1.4.19+/RKE 1.5.10+)
25 | 
26 | ```bash
27 | kubectl --kubeconfig $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl/kubecfg-kube-node.yaml get secrets -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | base64 -d | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_" > kubeconfig_admin.yaml
28 | ```
29 | 
30 | - Get kubeconfig (Earlier versions of Rancher and RKE)
31 | 
32 | ```bash
33 | kubectl --kubeconfig $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl/kubecfg-kube-node.yaml get configmap -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_" > kubeconfig_admin.yaml
34 | ```
35 | 
36 | - Run `kubectl get nodes`
37 | ```bash
38 | kubectl --kubeconfig kubeconfig_admin.yaml get nodes
39 | ```
40 | 
41 | ## Retrieve a kubeconfig - without jq
42 | 
43 | This option does not require `kubectl` or `jq` on the server because this uses the `rancher/rancher-agent` image to retrieve the kubeconfig.
44 | 
45 | - Get kubeconfig (Rancher 2.7.14+/Rancher 2.8.5+, RKE 1.4.19+/RKE 1.5.10+)
46 | ```bash
47 | docker run --rm --net=host -v $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl:/etc/kubernetes/ssl:ro --entrypoint bash $(docker inspect $(docker images -q --filter=label=org.opencontainers.image.source=https://github.com/rancher/hyperkube) --format='{{index .RepoTags 0}}' | tail -1) -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get secret -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | base64 -d | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_"' > kubeconfig_admin.yaml
48 | ```
49 | 
50 | - Get kubeconfig (Earlier versions of Rancher and RKE)
51 | 
52 | ```bash
53 | docker run --rm --net=host -v $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl:/etc/kubernetes/ssl:ro --entrypoint bash $(docker inspect $(docker images -q --filter=label=org.opencontainers.image.source=https://github.com/rancher/hyperkube.git) --format='{{index .RepoTags 0}}' | tail -1) -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get configmap -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_"' > kubeconfig_admin.yaml
54 | ```
55 | 
56 | - Run `kubectl get nodes`
57 | ```bash
58 | docker run --rm --net=host -v $PWD/kubeconfig_admin.yaml:/root/.kube/config:z --entrypoint bash $(docker inspect $(docker images -q --filter=label=org.opencontainers.image.source=https://github.com/rancher/hyperkube) --format='{{index .RepoTags 0}}' | tail -1) -c 'kubectl get nodes'
59 | ```
60 | 
61 | ## Script
62 | Run `https://raw.githubusercontent.com/rancherlabs/support-tools/master/how-to-retrieve-kubeconfig-from-custom-cluster/rke-node-kubeconfig.sh` and follow the instructions given.
63 | 


--------------------------------------------------------------------------------
/how-to-retrieve-kubeconfig-from-custom-cluster/rke-node-kubeconfig.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | PRIVATE_REGISTRY="$1/"
 4 | 
 5 | # Check if controlplane node (kube-apiserver)
 6 | CONTROLPLANE=$(docker ps -q --filter=name=kube-apiserver)
 7 | 
 8 | # Get agent image from Docker images
 9 | RANCHER_IMAGE=$(docker inspect $(docker images -q --filter=label=io.cattle.agent=true) --format='{{index .RepoTags 0}}' | tail -1)
10 | 
11 | if [ -z $RANCHER_IMAGE ]; then
12 |   RANCHER_IMAGE="${PRIVATE_REGISTRY}rancher/rancher-agent:v2.6.11"
13 | fi
14 | 
15 | if [ -d /opt/rke/etc/kubernetes/ssl ]; then
16 |   K8S_SSLDIR=/opt/rke/etc/kubernetes/ssl
17 | else
18 |   K8S_SSLDIR=/etc/kubernetes/ssl
19 | fi
20 | 
21 | # Determine object type for full-cluster-state (depends on Rancher/RKE version), can be either a configmap (older versions) or a secret (newer versions)
22 | FULL_CLUSTER_STATE_TYPE=$(docker run --rm --net=host -v $K8S_SSLDIR:/etc/kubernetes/ssl:ro --entrypoint bash $RANCHER_IMAGE -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get configmap -n kube-system full-cluster-state -o jsonpath='{.kind}' 2>/dev/null || kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get secret -n kube-system full-cluster-state -o jsonpath='{.kind}' 2>/dev/null')
23 | 
24 | # Generate kubeconfig depending on object type for full-cluster-state
25 | if [ "$FULL_CLUSTER_STATE_TYPE" = "Secret" ]; then
26 |   docker run --rm --net=host -v $K8S_SSLDIR:/etc/kubernetes/ssl:ro --entrypoint bash $RANCHER_IMAGE -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get secret -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | base64 -d | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_"' > kubeconfig_admin.yaml
27 | elif [ "$FULL_CLUSTER_STATE_TYPE" = "ConfigMap" ]; then
28 |   docker run --rm --net=host -v $K8S_SSLDIR:/etc/kubernetes/ssl:ro --entrypoint bash $RANCHER_IMAGE -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get configmap -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_"' > kubeconfig_admin.yaml
29 | else
30 |   echo "Invalid type for object \"full-cluster-state\" (should be a Secret or a ConfigMap). Exiting..."
31 |   exit 1
32 | fi
33 | 
34 | if [ -s kubeconfig_admin.yaml ]; then
35 |   if [ -z $CONTROLPLANE ]; then
36 |     echo "This is supposed to be run on a node with the 'controlplane' role as it will try to connect to https://127.0.0.1:6443"
37 |     echo "You can manually change the 'server:' parameter inside 'kubeconfig_admin.yaml' to point to a node with the 'controlplane' role"
38 |   fi
39 |   echo "Kubeconfig is stored at: kubeconfig_admin.yaml
40 | 
41 | You can use on of the following commands to use it:
42 | 
43 |   docker run --rm --net=host -v $PWD/kubeconfig_admin.yaml:/root/.kube/config --entrypoint bash $RANCHER_IMAGE -c 'kubectl get nodes'
44 | 
45 |   kubectl --kubeconfig kubeconfig_admin.yaml get nodes
46 | 
47 | Note: if kubectl is not available on the node, the binary can be copied from the kubelet container:
48 |   docker cp kubelet:/usr/local/bin/kubectl /usr/local/bin/"
49 | else
50 |   echo "Failed to retrieve kubeconfig"
51 | fi


--------------------------------------------------------------------------------
/instant-fio-master/README.md:
--------------------------------------------------------------------------------
 1 | # instant-fio-master.sh
 2 | Simple script to install fio from source.  It does the following:
 3 | 
 4 | - Installs dependencies automatically on RedHat and Debian based operating systems.
 5 | - clones fio master branch
 6 | - compiles fio from source then performs a make install
 7 | - updates ~/.bash_profile to add /usr/local/bin/ to your PATH if it isn't already there
 8 | 
 9 | Usage:
10 | ```
11 | curl -LO https://raw.githubusercontent.com/rancherlabs/support-tools/master/instant-fio-master/instant-fio-master.sh
12 | bash instant-fio-master.sh
13 | 
14 | mkdir test-data
15 | fio --rw=write --ioengine=sync --fdatasync=1 --directory=test-data --size=100m --bs=2300 --name=mytest
16 | ```
17 | 


--------------------------------------------------------------------------------
/instant-fio-master/instant-fio-master.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | red=$(tput setaf 1)
 3 | green=$(tput setaf 2)
 4 | reset=$(tput sgr0)
 5 | START_TIME=$(date +%Y-%m-%d--%H%M%S)
 6 | SCRIPT_NAME="instant-fio-master.sh"
 7 | function helpmenu() {
 8 |     echo "Usage: ${SCRIPT_NAME}
 9 | "
10 |     exit 1
11 | }
12 | while getopts "h" opt; do
13 |     case ${opt} in
14 |     h) # process option h
15 |         helpmenu
16 |         ;;
17 |     \?)
18 |         helpmenu
19 |         exit 1
20 |         ;;
21 |     esac
22 | done
23 | if [[ $EUID -ne 0 ]]; then
24 |     echo "This script must be run as root"
25 |     exit 1
26 | fi
27 | #set os and install dependencies
28 | if [[ -f /etc/lsb-release ]]; then
29 |     OS=ubuntu
30 |     echo You are using Ubuntu
31 |     apt install -y gcc zlib1g-dev make git
32 | fi
33 | if [[ -f /etc/redhat-release ]]; then
34 |     OS=redhat
35 |     echo You are using Red Hat
36 |     yum -y install zlib-devel gcc make git
37 | fi
38 | 
39 | if ! hash fio 2>/dev/null; then
40 |     git clone git://git.kernel.dk/fio.git
41 |     cd fio
42 |     ./configure
43 |     make
44 |     make install
45 |     if [[ '/usr/local/bin' != *"$PATH"* ]]; then
46 |         export PATH=/usr/local/bin:$PATH
47 |         echo "Run the following command or logout and log back in again so that your .bash_profile can add it for you."
48 |         echo 'export PATH=/usr/local/bin:$PATH'
49 |         echo 'export PATH=/usr/local/bin:$PATH' >>~/.bash_profile
50 |     fi
51 | else
52 |     echo "fio is already installed."
53 | fi
54 | 


--------------------------------------------------------------------------------
/kubecert/README.md:
--------------------------------------------------------------------------------
 1 | Credit for the logic that retrieves the KUBECONFIG goes to [Superseb](https://github.com/superseb/)
 2 | 
 3 | # kubecert
 4 | This script will set you up with kubectl and retrieve your local kube config for a cluster provisioned by RKE or Rancher.  Option -y will auto install kubectl and jq for linux.
 5 | Usage:
 6 | ```bash
 7 | curl -LO https://github.com/rancherlabs/support-tools/raw/master/kubecert/kubecert.sh
 8 | bash ./kubecert.sh -y
 9 | ```
10 | 


--------------------------------------------------------------------------------
/kubecert/base64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rancherlabs/support-tools/2fa26cd1c13cc329f5553f88adfe693ee978848e/kubecert/base64


--------------------------------------------------------------------------------
/kubecert/jq-linux64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rancherlabs/support-tools/2fa26cd1c13cc329f5553f88adfe693ee978848e/kubecert/jq-linux64


--------------------------------------------------------------------------------
/longhorn/PlaceHolder.md:
--------------------------------------------------------------------------------
1 | ## Longhorn Scripts


--------------------------------------------------------------------------------
/migrate-vsphere-clusters/README.md:
--------------------------------------------------------------------------------
1 | # migrate-vsphere-clusters
2 | 
3 | This script is to be used as part of the patch process for Rancher's
4 | [CVE-2022-45157].
5 | 
6 | 
7 | [CVE-2022-45157]: https://github.com/rancher/rancher/security/advisories/GHSA-xj7w-r753-vj8v 
8 | 
9 | 


--------------------------------------------------------------------------------
/rancher-cleanup/README.md:
--------------------------------------------------------------------------------
1 | # Rancher resource cleanup script
2 | 
3 | This has been moved to [rancher/rancher-cleanup](https://github.com/rancher/rancher-cleanup).
4 | 


--------------------------------------------------------------------------------
/rancher-crd/enumerate-resources/README.md:
--------------------------------------------------------------------------------
 1 | # rancher-resource-enumerator
 2 | 
 3 | Rancher Custom Resource enumeration script
 4 | 
 5 | ## Dependencies
 6 | 
 7 | * `kubectl`
 8 | * Linux, MacOS or WSL2
 9 | 
10 | ## How to use
11 | 
12 | * Download the script and save as: `rancher-resource-enumerator.sh`
13 | * Make sure the script is executable: `chmod u+x ./rancher-resource-enumerator.sh`
14 | * Run the script: `./rancher-resource-enumerator.sh -a`
15 | 
16 | The script will output all Rancher custom resource data in the `/tmp/enum-cattle-resources-<timestamp>` directory by default. The `totals` file will give the total count for all resources.
17 | 
18 | ## Flags
19 | 
20 | ```
21 | Rancher Resource Enumerator
22 | Usage: ./rancher-resource-enumerator.sh [ -d <directory> -n <namespace> | -c | -a ]
23 |  -h                               Display this help message.
24 |  -a                               Enumerate all custom resources.
25 |  -n <namespace>                   Only enumerate resources in the specified namespace(s).
26 |  -c                               Only enumerate cluster (non-namespaced) resources.
27 |  -d <directory>                   Path to output directory (default: /tmp/enum-cattle-resources-<timestamp>).
28 | ```
29 | 


--------------------------------------------------------------------------------
/rancher-crd/enumerate-resources/rancher-resource-enumerator.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | 
  3 | datenow="$(date "+%F-%H-%M-%S")"
  4 | outputdir="/tmp/enum-cattle-resources-$datenow"
  5 | export outputdir
  6 | 
  7 | usage() {
  8 |     printf "Rancher Resource Enumerator \n"
  9 |     printf "Usage: ./rancher-resource-enumerator.sh [ -d <directory> -n <namespace> | -c | -a ]\n"
 10 |     printf " -h                               Display this help message.\n"
 11 |     printf " -a                               Enumerate all custom resources.\n"
 12 |     printf " -n <namespace>                   Only enumerate resources in the specified namespace(s).\n"
 13 |     printf " -c                               Only enumerate cluster (non-namespaced) resources.\n"
 14 |     printf " -d <directory>                   Path to output directory (default: /tmp/enum-cattle-resources-<timestamp>).\n"
 15 |     exit 0
 16 | }
 17 | 
 18 | # Arguments
 19 | optstring="cahd:n:"
 20 | while getopts ${optstring} opt; do
 21 |     case ${opt} in
 22 |       h) usage
 23 |         ;;
 24 |       d) path=${OPTARG}
 25 |         outputdir="$path-$datenow"
 26 |         export outputdir
 27 |         ;;
 28 |       a) all=1
 29 |         export all
 30 |         ;;
 31 |       n) namespaces=${OPTARG}
 32 |         export namespaces
 33 |         ;;
 34 |       c) cluster=1
 35 |         export cluster
 36 |         ;;
 37 |       *) printf "Invalid Option: %s.\n" "$1"
 38 |         usage
 39 |         ;;
 40 |     esac
 41 | done
 42 | 
 43 | 
 44 | # Setup
 45 | setup() {
 46 |   # Create output directory
 47 |   echo "Output directory set to $outputdir"
 48 |   mkdir -p "$outputdir"
 49 | }
 50 | 
 51 | # Get cluster resources
 52 | non_namespaced() {
 53 |   kubectl api-resources --verbs=list --namespaced=false -o name | grep cattle.io | xargs -I _ sh -c "echo '(cluster) enumerating _ resources...'; kubectl get _ -o custom-columns=KIND:.kind,NAME:.metadata.name --no-headers=true --ignore-not-found=true >> $outputdir/_"  
 54 | }
 55 | 
 56 | # Get namespaced resources
 57 | namespaced() {
 58 |   ns="$1"
 59 |   # Select all namespaces if no namespace is specified
 60 |   if [ -z "$ns" ]; then
 61 |     ns="$(kubectl get ns --no-headers -o jsonpath='{.items[*].metadata.name}')"
 62 |   fi
 63 |   # Get all custom resources for validated namespaces
 64 |   for n in $ns
 65 |   do
 66 |       kubectl get ns "$n" -o name && \
 67 |       kubectl api-resources --verbs=list --namespaced=true -o name | grep cattle.io | xargs -I _ sh -c "echo '(namespaced) enumerating _ resources in $n...'; kubectl get _ -n $n -o custom-columns=KIND:.kind,NAME:.metadata.name,NAMESPACE:.metadata.namespace --no-headers=true --ignore-not-found=true >> $outputdir/_"
 68 |   done
 69 | }
 70 | 
 71 | # Get total counts
 72 | totals() {
 73 |   countfiles="$outputdir/*"
 74 |   echo 'counting totals...'
 75 |   for f in $countfiles
 76 |   do
 77 |       wc -l "$f" >> "$outputdir"/totals
 78 |   done
 79 |   echo "results saved in $outputdir"
 80 |   exit 0
 81 | }
 82 | 
 83 | main() {
 84 |   if [ -n "$all" ]; then
 85 |     setup
 86 |     non_namespaced
 87 |     namespaced
 88 |     totals
 89 |   elif [ -n "$cluster" ]; then
 90 |     setup
 91 |     non_namespaced
 92 |     totals
 93 |   elif [ -n "$namespaces" ]; then
 94 |     setup
 95 |     namespaced "$namespaces"
 96 |     totals
 97 |   else
 98 |     usage
 99 |   fi
100 | }
101 | 
102 | main


--------------------------------------------------------------------------------
/rancher-metadata-syncer/Dockerfile:
--------------------------------------------------------------------------------
 1 | ## Running builder to download metadata files
 2 | FROM alpine AS builder
 3 | MAINTAINER Matthew Mattox matt.mattox@suse.com
 4 | RUN  apk update && apk add --update-cache \
 5 |     wget \
 6 |     bash \
 7 |   && rm -rf /var/cache/apk/*
 8 | 
 9 | ADD *.sh /usr/local/bin/
10 | RUN chmod +x /usr/local/bin/*.sh
11 | WORKDIR /root/
12 | RUN /usr/local/bin/download.sh
13 | 
14 | ## Building webserver
15 | FROM httpd:alpine
16 | MAINTAINER Matthew Mattox matt.mattox@suse.com
17 | RUN  apk update && apk add --update-cache \
18 |     wget \
19 |     curl \
20 |     bash \
21 |     gzip \
22 |   && rm -rf /var/cache/apk/*
23 | 
24 | WORKDIR /var/www/localhost
25 | COPY --from=builder /root/*.json /usr/local/apache2/htdocs/
26 | COPY --from=builder /usr/local/bin/*.sh /usr/local/bin/
27 | CMD /usr/local/bin/run.sh
28 | 


--------------------------------------------------------------------------------
/rancher-metadata-syncer/README.md:
--------------------------------------------------------------------------------
 1 | # rancher-metadata-syncer
 2 | Rancher Metadata Syncer is a simple pod for publishing the Rancher metadata.json in an airgap setup to allow Rancher to get updated metadata files without granting Rancher internet access or upgrading Rancher.
 3 | 
 4 | ## Installation
 5 | 
 6 | Note: The following tool should only be deployed on the Rancher Local cluster and not on a downstream cluster.
 7 | 
 8 | ### Option A - Configmap
 9 | The Configmap option is used when you would like to add the metadata files via a Configmap.
10 | Note: The following steps should be run from a server/workstation with internet access.
11 | 
12 | - Download the metadata file(s)
13 | ```bash
14 | wget --no-check-certificate -O v2-4.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.4/data.json
15 | wget --no-check-certificate -O v2-5.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.5/data.json
16 | tar -czvf v2-4.json.tar.gz v2-4.json
17 | tar -czvf v2-5.json.tar.gz v2-5.json
18 | ```
19 | 
20 | - Create the Configmap with the metadata files.
21 | 
22 | ```bash
23 | kubectl -n cattle-system create configmap rancher-metadata --from-file=v2-4.json=./v2-4.json.tar.gz --from-file=v2-5.json=./v2-5.json.tar.gz
24 | ```
25 | 
26 | - Deploy the workload
27 | ```bash
28 | kubectl apply -f deployment-configmap.yaml
29 | ```
30 | 
31 | - If you would update the metadata file, please do the following.
32 | 
33 | ```bash
34 | wget --no-check-certificate -O v2-4.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.4/data.json
35 | wget --no-check-certificate -O v2-5.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.5/data.json
36 | tar -czvf v2-4.json.tar.gz v2-4.json
37 | tar -czvf v2-5.json.tar.gz v2-5.json
38 | kubectl -n cattle-system delete configmap rancher-metadata
39 | kubectl -n cattle-system create configmap rancher-metadata --from-file=v2-4.json.tar.gz=./v2-4.json.tar.gz --from-file=v2-5.json.tar.gz=./v2-5.json.tar.gz
40 | kubectl -n cattle-system patch deployment rancher-metadata -p "{\"spec\":{\"template\":{\"metadata\":{\"labels\":{\"date\":\"$(date +%s)\"}}}}}"
41 | ```
42 | 
43 | ### Option B - Proxy
44 | The proxy option is used if you would like the deployment to automatedly download the metadata files every 6 hours without opening all of Rancher to the internet via the Proxy.
45 | 
46 | - Edit values HTTP_PROXY and HTTPS_PROXY in deployment-proxy.yaml match your environment requirements.
47 | ```bash
48 | - name: HTTPS_PROXY
49 |   value: "https://<user>:<password>@<ip_addr>:<port>/"
50 | - name: HTTP_PROXY
51 |   value: "http://<user>:<password>@<ip_addr>:<port>/"
52 | ```
53 | 
54 | - Deploy the workload
55 | ```bash
56 | kubectl apply -f deployment-proxy.yaml
57 | ```
58 | 
59 | ## Updating Rancher
60 | 
61 | - Browse to the Rancher UI -> Global -> Settings -> rke-metadata-config
62 | 
63 | - Update the value to the following for Rancher v2.4.x
64 | ```
65 | {
66 |   "refresh-interval-minutes": "60",
67 |   "url": "http://rancher-metadata/v2-4.json"
68 | }
69 | ```
70 | 
71 | - Update the value to the following for Rancher v2.5.x
72 | ```
73 | {
74 |   "refresh-interval-minutes": "60",
75 |   "url": "http://rancher-metadata/v2-5.json"
76 | }
77 | ```
78 | 


--------------------------------------------------------------------------------
/rancher-metadata-syncer/apache.conf:
--------------------------------------------------------------------------------
 1 | <VirtualHost *:80>
 2 |   ServerAdmin admin@localhost
 3 |   ServerName localhost
 4 |   DocumentRoot /var/www/src
 5 |   <Directory /var/www/src/>
 6 |     Options Indexes FollowSymLinks MultiViews
 7 |     AllowOverride All
 8 |     Order allow,deny
 9 |     Allow from all
10 |     Require all granted
11 |   </Directory>
12 |   ErrorLog ${APACHE_LOG_DIR}/error.log
13 |   CustomLog ${APACHE_LOG_DIR}/access.log combined
14 | </VirtualHost>
15 | 


--------------------------------------------------------------------------------
/rancher-metadata-syncer/deployment-configmap.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: apps/v1
 3 | kind: Deployment
 4 | metadata:
 5 |   labels:
 6 |     app: rancher-metadata
 7 |   name: rancher-metadata
 8 |   namespace: cattle-system
 9 | spec:
10 |   replicas: 1
11 |   selector:
12 |     matchLabels:
13 |       app: rancher-metadata
14 |   template:
15 |     metadata:
16 |       labels:
17 |         app: rancher-metadata
18 |     spec:
19 |       containers:
20 |       - image: rancher/metadata-syncer:latest
21 |         imagePullPolicy: IfNotPresent
22 |         name: rancher-metadata
23 |         livenessProbe:
24 |           httpGet:
25 |             path: /healthz
26 |             port: 80
27 |           initialDelaySeconds: 3
28 |           periodSeconds: 3
29 |         readinessProbe:
30 |           httpGet:
31 |             path: /healthz
32 |             port: 80
33 |           initialDelaySeconds: 5
34 |           periodSeconds: 5
35 |         volumeMounts:
36 |         - mountPath: /data
37 |           name: metadata
38 |       volumes:
39 |       - configMap:
40 |           defaultMode: 256
41 |           name: rancher-metadata
42 |           optional: false
43 |         name: metadata
44 | ---
45 | apiVersion: v1
46 | kind: Service
47 | metadata:
48 |   name: rancher-metadata
49 |   namespace: cattle-system
50 | spec:
51 |   selector:
52 |     app: rancher-metadata
53 |   ports:
54 |     - protocol: TCP
55 |       port: 80
56 |       targetPort: 80
57 | 


--------------------------------------------------------------------------------
/rancher-metadata-syncer/deployment-proxy.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: apps/v1
 3 | kind: Deployment
 4 | metadata:
 5 |   labels:
 6 |     app: rancher-metadata
 7 |   name: rancher-metadata
 8 |   namespace: cattle-system
 9 | spec:
10 |   replicas: 1
11 |   selector:
12 |     matchLabels:
13 |       app: rancher-metadata
14 |   template:
15 |     metadata:
16 |       labels:
17 |         app: rancher-metadata
18 |     spec:
19 |       containers:
20 |       - env:
21 |         - name: HTTPS_PROXY
22 |           value: https://<user>:<password>@<ip_addr>:<port>/
23 |         - name: HTTP_PROXY
24 |           value: http://<user>:<password>@<ip_addr>:<port>/
25 |         image: rancher/metadata-syncer:latest
26 |         imagePullPolicy: IfNotPresent
27 |         livenessProbe:
28 |           failureThreshold: 3
29 |           httpGet:
30 |             path: /healthz
31 |             port: 80
32 |             scheme: HTTP
33 |           initialDelaySeconds: 3
34 |           periodSeconds: 3
35 |           successThreshold: 1
36 |           timeoutSeconds: 1
37 |         name: rancher-metadata
38 |         readinessProbe:
39 |           failureThreshold: 3
40 |           httpGet:
41 |             path: /healthz
42 |             port: 80
43 |             scheme: HTTP
44 |           initialDelaySeconds: 5
45 |           periodSeconds: 5
46 |           successThreshold: 1
47 |           timeoutSeconds: 1
48 | 
49 | ---
50 | apiVersion: v1
51 | kind: Service
52 | metadata:
53 |   name: rancher-metadata
54 |   namespace: cattle-system
55 | spec:
56 |   selector:
57 |     app: rancher-metadata
58 |   ports:
59 |     - protocol: TCP
60 |       port: 80
61 |       targetPort: 80
62 | 


--------------------------------------------------------------------------------
/rancher-metadata-syncer/download.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | echo "Downloading kontainer-driver-metadata for v2.4"
3 | wget --no-check-certificate -O v2-4.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.4/data.json
4 | 
5 | echo "Downloading kontainer-driver-metadata for v2.5"
6 | wget --no-check-certificate -O v2-5.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.5/data.json
7 | 


--------------------------------------------------------------------------------
/rancher-metadata-syncer/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo "Starting webserver..."
 3 | apachectl start
 4 | echo "ok" > /usr/local/apache2/htdocs/healthz
 5 | if [[ ! -z $HTTP_PROXY ]] || [[ ! -z $HTTPS_PROXY ]]
 6 | then
 7 |   echo "Detected proxy settings."
 8 |   echo "Starting downloader..."
 9 |   while true
10 |   do
11 |     /usr/local/bin/download.sh
12 |     echo "Sleeping..."
13 |     sleep 6h
14 |   done
15 | fi
16 | 
17 | if [[ -d /data ]]
18 | then
19 |   echo "Configmap detected, loading json files from Configmap..."
20 |   tar -zvxf v2-5.json.tar.gz -C /usr/local/apache2/htdocs/
21 |   tar -zvxf v2-5.json.tar.gz -C /usr/local/apache2/htdocs/
22 | fi
23 | 
24 | echo "Starting in static mode"
25 | while true
26 | do
27 |   sleep 10000
28 | done
29 | 


--------------------------------------------------------------------------------
/reverse-rke-state-migrations/README.md:
--------------------------------------------------------------------------------
 1 | # reverse-rke-state-migrations.sh
 2 | This script can be used to reverse RKE cluster state migrations that are performed automatically by Rancher on all downstream RKE clusters as of releases `v2.7.14`, and `v2.8.5`. Running this script should only be necessary if you have upgraded to a Rancher version at or above the aforementioned versions and need to restore Rancher back to a version that is older than the aforementioned versions. For example, you're on `v2.8.0` and you take a backup of Rancher and then upgrade to `v2.8.5`, but then you restore Rancher from your backup. In this case, you'd have to use this script to reverse the RKE cluster state migrations that would have occurred during the upgrade to `v2.8.5`.
 3 |  
 4 | ## Usage
 5 | ⚠️ **WARNING:** Before running this script, please ensure that **you've backed up your downstream RKE clusters**. The script **will delete `full-cluster-state` secrets from downstream RKE clusters**.
 6 |  
 7 | 1. Take backups of your downstream RKE clusters.
 8 | 2. Ensure you have [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl), [jq](https://jqlang.github.io/jq/), and [yq](https://mikefarah.gitbook.io/yq/#install) installed.
 9 | 3. Generate a Rancher API token and use it to set the `RANCHER_TOKEN` environment variable.
10 | 4. Run the script pointing to your Rancher server URL.
11 |  
12 | ```shell
13 | export RANCHER_TOKEN=<your token>
14 | ./reverse-rke-state-migrations.sh --rancher-host <my-rancher.my-domain.com>
15 | ```
16 |  
17 | This script will iterate over all downstream RKE clusters and, for each one, it will ensure that a `full-cluster-state` ConfigMap exists inside the cluster as is expected by older versions of RKE. After doing this successfully for each of the targeted clusters, the script will remove a ConfigMap from the local cluster that marks the original migration as complete since it will effectively have been reversed.
18 | 


--------------------------------------------------------------------------------
/reverse-rke-state-migrations/reverse-rke-state-migrations.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | set -e
  4 | set -o pipefail
  5 | 
  6 | # Use this to specify a custom kubectl base command or options.
  7 | KUBECTL="kubectl"
  8 | 
  9 | # Use this to specify a custom curl base command or options.
 10 | # By default, we pass options that make curl silent, except when errors occur,
 11 | # and we also force CURL to error if HTTP requests do not receive successful
 12 | # (2xx) response codes.
 13 | CURL="curl -sSf"
 14 | 
 15 | function display_help() {
 16 |   echo 'This script can be used to reverse RKE cluster state migrations.'
 17 |   echo 'Please ensure the $RANCHER_TOKEN environment variable is set to a valid Rancher API admin token'
 18 |   echo 'Please also ensure the following tools are installed:'
 19 |   echo '  kubectl: https://kubernetes.io/docs/tasks/tools/#kubectl'
 20 |   echo '  jq:      https://jqlang.github.io/jq'
 21 |   echo '  yq:      https://mikefarah.gitbook.io/yq/#install'
 22 |   echo
 23 |   echo
 24 |   echo "Usage: $(basename $0) --rancher-host [Rancher hostname]"
 25 |   echo
 26 |   echo '  $RANCHER_TOKEN                  [Required]    Environment variable containing Rancher admin token'
 27 |   echo '  -n, --rancher-host              [Required]    Rancher hostname'
 28 |   echo '  -k, --insecure-skip-tls-verify  [Optional]    Skip certificate verification'
 29 |   echo "  -d, --debug                     [Optional]    Calls 'set -x'"
 30 |   echo "  -h, --help                                    Print this message"
 31 | }
 32 | 
 33 | POSITIONAL_ARGS=()
 34 | 
 35 | while [[ $# -gt 0 ]]; do
 36 |   case $1 in
 37 |     -n|--rancher-host)
 38 |       RANCHER_HOST="$2"
 39 |       shift # past argument
 40 |       shift # past value
 41 |       ;;
 42 |     -k|--insecure-skip-tls-verify)
 43 |       KUBECTL="$KUBECTL --insecure-skip-tls-verify"
 44 |       CURL="$CURL -k"
 45 |       shift # past argument
 46 |       ;;
 47 |     -d|--debug)
 48 |       set -x
 49 |       shift # past argument
 50 |       ;;
 51 |     -h|--help)
 52 |       display_help
 53 |       exit 1
 54 |       ;;
 55 |     -*|--*)
 56 |       echo "Unknown option $1"
 57 |       display_help
 58 |       exit 1
 59 |       ;;
 60 |     *)
 61 |       POSITIONAL_ARGS+=("$1") # save positional arg
 62 |       shift # past argument
 63 |       ;;
 64 |   esac
 65 | done
 66 | 
 67 | set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters
 68 | 
 69 | # Make sure a Rancher API token was set
 70 | if [[ -z "$RANCHER_TOKEN" ]]; then
 71 |   echo 'ERROR: $RANCHER_TOKEN is unset'
 72 |   display_help
 73 |   exit 1
 74 | fi
 75 | 
 76 | # Make sure a rancher host was set
 77 | if [[ -z "$RANCHER_HOST" ]]; then
 78 |   echo 'ERROR: --rancher-host is unset'
 79 |   display_help
 80 |   exit 1
 81 | fi
 82 | 
 83 | # Make sure the jq command is available
 84 | if ! command -v "jq" &> /dev/null; then
 85 |   echo "Missing jq command. See download/installation instructions at https://jqlang.github.io/jq/."
 86 |   exit 1
 87 | fi
 88 | 
 89 | # Make sure the yq command is available
 90 | if ! command -v "yq" &> /dev/null; then
 91 |   echo "Missing yq command. See download/installation instructions at https://mikefarah.gitbook.io/yq/#install."
 92 |   exit 1
 93 | fi
 94 | 
 95 | # Make sure the kubectl command is available
 96 | if ! command -v "kubectl" &> /dev/null; then
 97 |   echo "Missing kubectl command. See download/installation instructions at https://kubernetes.io/docs/tasks/tools/#kubectl."
 98 |   exit 1
 99 | fi
100 | 
101 | # Downloads kubeconfig for the cluster with ID $MANAGEMENT_CLUSTER_ID.
102 | downloadKubeConfig() {
103 |   $CURL -X 'POST' -H 'accept: application/yaml' -u "$RANCHER_TOKEN" \
104 |    "https://${RANCHER_HOST}/v3/clusters/${MANAGEMENT_CLUSTER_ID}?action=generateKubeconfig" \
105 |    | yq -r '.config' > .kube/config-"$MANAGEMENT_CLUSTER_ID"
106 | }
107 | 
108 | # Downloads kubeconfig for the local cluster.
109 | getLocalKubeConfig() {
110 |   $CURL -X 'POST' -H 'accept: application/yaml' -u "$RANCHER_TOKEN" \
111 |    "https://${RANCHER_HOST}/v3/clusters/local?action=generateKubeconfig" \
112 |     | yq -r '.config' > .kube/config
113 | }
114 | 
115 | # Moves downstream cluster state from a secret to a configmap.
116 | reverseMigrateClusterState() {
117 |   # Load cluster state from the secret
118 |   SECRET=$($KUBECTL get secret full-cluster-state -n kube-system -o yaml)
119 |   if [ $? -ne 0 ]; then
120 |     echo "[cluster=$MANAGEMENT_CLUSTER_ID] failed to fetch secret full-cluster-state, skipping this cluster"
121 |     return
122 |   fi
123 | 
124 |   # Make sure the cluster state is not empty or invalid
125 |   CLUSTER_STATE=$(echo "$SECRET" | yq -r '.data.full-cluster-state' | base64 --decode)
126 |   if [[ "$?" -ne 0 || "${PIPESTATUS[0]}" -ne 0 || "${PIPESTATUS[1]}" -ne 0 || "${PIPESTATUS[2]}" -ne 0 ]]; then
127 |     echo "[cluster=$MANAGEMENT_CLUSTER_ID] failed to decode cluster state, skipping this cluster"
128 |     return
129 |   fi
130 | 
131 |   if [ -z "$CLUSTER_STATE" ]; then
132 |     echo "[cluster=$MANAGEMENT_CLUSTER_ID] cluster state is empty, skipping this cluster"
133 |     return
134 |   fi
135 | 
136 |   # Copy cluster state to a configmap
137 |   $KUBECTL create configmap full-cluster-state -n kube-system --from-literal=full-cluster-state="$CLUSTER_STATE"
138 | 
139 |   # Remove the secret
140 |   $KUBECTL delete secret full-cluster-state -n kube-system
141 | }
142 | 
143 | # Performs reverse migrations on all downstream RKE clusters.
144 | reverseMigrateRKEClusters() {
145 |   # Download kubeconfig for the local cluster
146 |   getLocalKubeConfig
147 | 
148 |   # Fetch all RKE cluster IDs
149 |   MANAGEMENT_CLUSTER_IDS=($(
150 |     $CURL -H 'accept: application/json' -u "$RANCHER_TOKEN" \
151 |     "https://${RANCHER_HOST}/v1/management.cattle.io.cluster?exclude=metadata.managedFields" \
152 |     | jq -r '.data[] | select(.spec.rancherKubernetesEngineConfig) | .id')
153 |   )
154 | 
155 |   # Migrate each RKE cluster's state
156 |   for MANAGEMENT_CLUSTER_ID in "${MANAGEMENT_CLUSTER_IDS[@]}"
157 |   do
158 |     # Download and point to downstream cluster kubeconfig
159 |     downloadKubeConfig
160 |     export KUBECONFIG=".kube/config-$MANAGEMENT_CLUSTER_ID"
161 | 
162 |     echo "Moving state back to configmap for cluster $MANAGEMENT_CLUSTER_ID"
163 |     set +e
164 |     reverseMigrateClusterState
165 |     set -e
166 |   done
167 | 
168 |   # Remove the migration configmap since we've reversed the migrations
169 |   if $KUBECTL get configmap migraterkeclusterstate -n cattle-system > /dev/null 2>&1; then
170 |     echo "Deleting configmap migraterkeclusterstate"
171 |     $KUBECTL delete configmap migraterkeclusterstate -n cattle-system
172 |   fi
173 | }
174 | 
175 | main() {
176 |   # Create temp directory to which we'll download cluster kubeconfig files.
177 |   cd "$(mktemp -d)"
178 |   echo "Using temp directory $(pwd)"
179 | 
180 |   echo "WARNING: 'full-cluster-state' secrets will be deleted for downstream RKE clusters after being moved."
181 |   echo -n "Please make sure you've backed them up before proceeding. Proceed? (yes/no) "
182 |   read ANSWER
183 | 
184 |   if [ "$ANSWER" = "yes" ]; then
185 |     mkdir -p .kube
186 |     reverseMigrateRKEClusters
187 |     rm -rf .kube
188 |   elif [ "$ANSWER" = "no" ]; then
189 |     echo "Aborting"
190 |     exit 1
191 |   else
192 |     echo "Invalid response. Please type 'yes' or 'no'."
193 |     exit 1
194 |   fi
195 | }
196 | 
197 | main
198 | 


--------------------------------------------------------------------------------
/rotate-tokens/README.md:
--------------------------------------------------------------------------------
 1 | # rotate-tokens.sh
 2 | 
 3 | This script is used to rotate the main service account and token for a Rancher
 4 | downstream cluster. It may be used in the event of a known token exposure or as
 5 | a routine preemptive measure.
 6 | 
 7 | ## Usage
 8 | 
 9 | Generate an API token in Rancher and use it to set the TOKEN environment
10 | variable. Set KUBECONFIG to point to your Rancher local cluster. Set
11 | RANCHER_SERVER to point to your Rancher service. The script can be run without
12 | any arguments. Example:
13 | 
14 | ```
15 | export TOKEN=token-ccabc:xyz123
16 | export KUBECONFIG=/path/to/kubeconfig
17 | export RANCHER_SERVER=https://rancher.example.com
18 | ./rotate-tokens.sh
19 | ```
20 | 
21 | For extra debugging information, run with DEBUG=y:
22 | 
23 | ```
24 | DEBUG=y ./rotate-tokens.sh
25 | ```
26 | 
27 | The script iterates over each downstream cluster sequentially. If you have many
28 | downstream clusters, this may take several minutes. Do not interrupt the script.
29 | 
30 | The script generates kubeconfigs for each downstream cluster and stores them in
31 | `./kubeconfigs` in the current working directory. They can be removed with
32 | `rm -r kubeconfigs`.
33 | 


--------------------------------------------------------------------------------
/rotate-tokens/rotate-tokens.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -e
  2 | 
  3 | if [ -n "$DEBUG" ]
  4 | then
  5 |     set -x
  6 | fi
  7 | 
  8 | usage() {
  9 |     echo 'TOKEN=<token> KUBECONFIG=</path/to/local/kubeconfig> RANCHER_SERVER=<https://rancher.server> ./rotate-tokens.sh'
 10 |     exit 0
 11 | }
 12 | 
 13 | if [ "$1" == "help" ]
 14 | then
 15 |     usage
 16 | fi
 17 | 
 18 | if [ "$TOKEN" == "" ]
 19 | then
 20 |     echo 'Create an API token in the Rancher UI and set the environment variable TOKEN before running this script.'
 21 |     exit 1
 22 | fi
 23 | 
 24 | if [ "$RANCHER_SERVER" == "" ]
 25 | then
 26 |     echo 'Set $RANCHER_SERVER to point to the Rancher URL.'
 27 |     exit 1
 28 | fi
 29 | 
 30 | if curl --insecure -s -u $TOKEN "${RANCHER_SERVER}/v3" | grep Unauthorized >/dev/null
 31 | then
 32 |     echo "Not authorized for Rancher server $RANCHER_SERVER."
 33 |     exit 1
 34 | fi
 35 | 
 36 | if ! which kubectl >/dev/null
 37 | then
 38 |     echo 'kubectl and jq must be installed.'
 39 |     exit 1
 40 | fi
 41 | 
 42 | if ! which jq >/dev/null
 43 | then
 44 |     echo 'kubectl and jq must be installed.'
 45 |     exit 1
 46 | fi
 47 | 
 48 | if ! kubectl get namespace cattle-global-data >/dev/null 2>&1
 49 | then
 50 |     echo 'Set $KUBECONFIG to point to the Rancher local cluster.'
 51 |     exit 1
 52 | fi
 53 | 
 54 | cleanup() {
 55 |     kubectl --namespace cattle-system patch deployment cattle-cluster-agent --patch '{"spec": {"template": {"spec": {"serviceAccount": "cattle", "serviceAccountName": "cattle"}}}}'
 56 |     kubectl --namespace cattle-system rollout status deployment cattle-cluster-agent
 57 |     kubectl --namespace cattle-system delete serviceaccount cattle-tmp >/dev/null 2>&1 || true
 58 |     kubectl --namespace cattle-system delete secret cattle-tmp-token >/dev/null 2>&1 || true
 59 |     kubectl delete clusterrolebinding cattle-admin-binding-tmp >/dev/null 2>&1 || true
 60 |     rm -f .error
 61 | }
 62 | 
 63 | create_token_secret() {
 64 |     name=$1
 65 |     uid=$2
 66 |     cat <<EOF | kubectl apply -f -
 67 | apiVersion: v1
 68 | kind: Secret
 69 | metadata:
 70 |   name: $name-token
 71 |   namespace: cattle-system
 72 |   annotations:
 73 |     kubernetes.io/service-account.name: $name
 74 |     kubernetes.io/service-account.uid: $uid
 75 | type: kubernetes.io/service-account-token
 76 | EOF
 77 | }
 78 | 
 79 | create_cluster_secret() {
 80 |     cluster=$1
 81 |     uid=$2
 82 |     token=$3
 83 |     secret=$(cat <<EOF | kubectl create --output jsonpath='{.metadata.name}' -f -
 84 | apiVersion: v1
 85 | kind: Secret
 86 | metadata:
 87 |   generateName: cluster-serviceaccounttoken-
 88 |   namespace: cattle-global-data
 89 |   ownerReferences:
 90 |   - apiVersion: management.cattle.io/v3
 91 |     kind: Cluster
 92 |     name: $cluster
 93 |     uid: $uid
 94 | data:
 95 |   credential: $token
 96 | type: opaque
 97 | EOF
 98 |     )
 99 |     echo $secret
100 | }
101 | 
102 | clusters=$(kubectl get clusters.management --output jsonpath='{.items[?(.metadata.name != "local")].metadata.name}')
103 | 
104 | mkdir -p kubeconfigs
105 | 
106 | MAIN_KUBECONFIG=$KUBECONFIG # may be empty, then default kubeconfig is used
107 | 
108 | for c in $clusters
109 | do
110 |     echo "Rotating service account for cluster $c..."
111 |     kubeconfig=$(curl --insecure -s -u $TOKEN \
112 |         -X POST \
113 |         -H 'Accept: application/json' \
114 |         -H 'Content-Type: application/json' \
115 |         -d '{}' \
116 |         "$RANCHER_SERVER/v3/clusters/${c}?action=generateKubeconfig" | jq -r .config)
117 |     echo "$kubeconfig" > kubeconfigs/${c}.config
118 |     KUBECONFIG=kubeconfigs/${c}.config
119 | 
120 |     # create temporary admin account
121 |     tmpuid=$(kubectl --namespace cattle-system create serviceaccount cattle-tmp --output jsonpath='{.metadata.uid}' 2>.error || true)
122 |     if [ -s .error ]
123 |     then
124 |         if grep 'already exists' .error >/dev/null
125 |         then
126 |             tmpuid=$(kubectl --namespace cattle-system get serviceaccount cattle-tmp --output jsonpath='{.metadata.uid}')
127 |         else
128 |             cat .error
129 |             rm .error
130 |             exit 1
131 |         fi
132 |         rm .error
133 |     fi
134 |     create_token_secret cattle-tmp $tmpuid
135 |     kubectl create clusterrolebinding --clusterrole cattle-admin --serviceaccount cattle-system:cattle-tmp cattle-admin-binding-tmp 2>.error || true
136 |     if [ -s .error ]
137 |     then
138 |         if ! grep 'already exists' .error >/dev/null
139 |         then
140 |             cat .error
141 |             rm .error
142 |             exit 1
143 |         fi
144 |         rm .error
145 |     fi
146 |     token=$(kubectl --namespace cattle-system get secret cattle-tmp-token --output jsonpath='{.data.token}')
147 |     kubectl --namespace cattle-system patch deployment cattle-cluster-agent --patch '{"spec": {"template": {"spec": {"serviceAccount": "cattle-tmp", "serviceAccountName": "cattle-tmp"}}}}'
148 |     kubectl --namespace cattle-system rollout status deployment cattle-cluster-agent
149 | 
150 |     # set cluster to use temporary account
151 |     KUBECONFIG=$MAIN_KUBECONFIG
152 |     old_secret=$(kubectl get clusters.management $c --output jsonpath='{.status.serviceAccountTokenSecret}')
153 |     cluster_uid=$(kubectl get clusters.management $c --output jsonpath='{.metadata.uid}')
154 |     secret=$(create_cluster_secret $c $cluster_uid $token)
155 |     kubectl patch clusters.management $c --patch '{"status": {"serviceAccountTokenSecret": "'$secret'"}}' --type=merge
156 |     kubectl --namespace cattle-global-data delete secret $old_secret
157 | 
158 |     # regenerate service account and secret
159 |     KUBECONFIG=kubeconfigs/${c}.config
160 |     if kubectl --namespace cattle-system get serviceaccount kontainer-engine >/dev/null 2>&1
161 |     then
162 |         serviceaccount=kontainer-engine
163 |     elif kubectl --namespace cattle-system get serviceaccount cattle >/dev/null 2>&1
164 |     then
165 |         serviceaccount=cattle
166 |     else
167 |         echo "could not find admin service account to rotate on cluster $c"
168 |         exit 1
169 |     fi
170 |     # 2.6 creates its own token
171 |     if kubectl --namespace cattle-system get secret $serviceaccount-token >/dev/null 2>&1
172 |     then
173 |         kubectl --namespace cattle-system delete serviceaccount $serviceaccount
174 |         uid=$(kubectl --namespace cattle-system create serviceaccount $serviceaccount --output jsonpath='{.metadata.uid}')
175 |         create_token_secret $serviceaccount $uid
176 |         tokensecret=$serviceaccount-token
177 |     # 2.5 uses the k8s-generated token
178 |     else
179 |         kubectl --namespace cattle-system delete serviceaccount $serviceaccount
180 |         kubectl --namespace cattle-system create serviceaccount $serviceaccount
181 |         tokensecret=$(kubectl --namespace cattle-system get serviceaccount $serviceaccount --output jsonpath='{.secrets[0].name}')
182 |     fi
183 |     # restore back to old account
184 |     token=$(kubectl --namespace cattle-system get secret $tokensecret --output jsonpath='{.data.token}')
185 |     KUBECONFIG=$MAIN_KUBECONFIG
186 |     secret=$(create_cluster_secret $c $cluster_uid $token)
187 |     kubectl patch clusters.management $c --patch '{"status": {"serviceAccountTokenSecret": "'$secret'"}}' --type=merge
188 | 
189 |     # cleanup temporary artifacts
190 |     KUBECONFIG=kubeconfigs/${c}.config
191 |     cleanup
192 | done
193 | 


--------------------------------------------------------------------------------
/swiss-army-knife/README.md:
--------------------------------------------------------------------------------
 1 | # Swiss-Army-Knife
 2 | Rancher Support uses the image of a standard tool called `swiss-army-knife` to help you manage your Rancher/Kubernetes environment. You can learn more about this image by visiting its official repo at [rancherlabs/swiss-army-knife](https://github.com/rancherlabs/swiss-army-knife/)
 3 | 
 4 | TLDR; This image has a lot of useful tools that can be used for scripting and troubleshooting.
 5 | - [`kubectl`](https://kubernetes.io/docs/reference/kubectl/overview/)
 6 | - [`helm`](https://helm.sh/docs/intro/)
 7 | - [`curl`](https://curl.haxx.se/docs/manpage.html)
 8 | - [`jq`](https://stedolan.github.io/jq/)
 9 | - [`traceroute`](https://www.traceroute.org/about.html)
10 | - [`dig`](https://www.dig.com/products/dns/dig/)
11 | - [`nslookup`](https://www.google.com/search?q=nslookup)
12 | - [`ping`](https://www.google.com/search?q=ping)
13 | - [`netstat`](https://www.google.com/search?q=netstat)
14 | - And many more!
15 | 
16 | ## Example deployments
17 | 
18 | ### Overlay Test
19 | As part of Rancher's overlay test, which can be found [here](https://ranchermanager.docs.rancher.com/troubleshooting/other-troubleshooting-tips/networking#check-if-overlay-network-is-functioning-correctly). This can be deployed to the cluster by running the following command:
20 | ```bash
21 | kubectl apply -f https://raw.githubusercontent.com/rancherlabs/support-tools/master/swiss-army-knife/overlaytest.yaml
22 | ```
23 | 
24 | This will deploy a deamonset that will run on all nodes in the cluster. These pods will be running `tail -f /dev/null,` which will do nothing but keep the pod running.
25 | 
26 | You can run the overlay test script by running the following command:
27 | ```bash
28 | curl -sfL https://raw.githubusercontent.com/rancherlabs/support-tools/master/swiss-army-knife/overlaytest.sh | bash
29 | ```
30 | 
31 | ### Admin Tools
32 | This deployment will deploy `swiss-army-knife` to all nodes in the cluster but with additional permissions and privileges. This is useful for troubleshooting and managing your Rancher environment. The pod will be running `tail -f /dev/null,` which will do nothing but keep the pod running.
33 | 
34 | This can be deployed to the cluster by running the following command:
35 | ```bash
36 | kubectl apply -f https://raw.githubusercontent.com/rancherlabs/support-tools/master/swiss-army-knife/admin-tools.yaml
37 | ```
38 | 
39 | Inside the pod, you will be able to un `kubectl` commands with cluster-admin privileges. Along with this pod being able to gain full access to the node, including the ability to gain a root shell on the node. By running the following commands:
40 | - `kubectl -n kube-system get pods -l app=swiss-army-knife -o wide`
41 | - This will show you all pods running `swiss-army-knife` in the `kube-system` namespace.
42 | - Find the pod on the node you want to interact with.
43 | - `kubectl -n kube-system exec -it <pod-name> -- bash`
44 | - `chroot /rootfs`
45 | 
46 | You are now running a root shell on the node with full privileges.
47 | 
48 | **Important:** This deployment is designed for troubleshooting and management purposes and should not be left running on a cluster.
49 | 


--------------------------------------------------------------------------------
/swiss-army-knife/admin-tools.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | apiVersion: v1
  3 | kind: Namespace
  4 | metadata:
  5 |   name: swiss-army-knife
  6 |   labels:
  7 |     app: swiss-army-knife
  8 | ---
  9 | apiVersion: rbac.authorization.k8s.io/v1
 10 | kind: ClusterRole
 11 | metadata:
 12 |   labels:
 13 |     app: swiss-army-knife
 14 |   name: swiss-army-knife
 15 | rules:
 16 |   - apiGroups:
 17 |       - "*"
 18 |     resources:
 19 |       - "*"
 20 |     verbs:
 21 |       - "*"
 22 |   - nonResourceURLs:
 23 |       - "*"
 24 |     verbs:
 25 |       - "*"
 26 | ---
 27 | apiVersion: v1
 28 | kind: ServiceAccount
 29 | metadata:
 30 |   name: swiss-army-knife
 31 |   namespace: swiss-army-knife
 32 |   labels:
 33 |     app: swiss-army-knife
 34 | ---
 35 | apiVersion: rbac.authorization.k8s.io/v1
 36 | kind: ClusterRoleBinding
 37 | metadata:
 38 |   labels:
 39 |     app: swiss-army-knife
 40 |   name: swiss-army-knife
 41 | roleRef:
 42 |   apiGroup: rbac.authorization.k8s.io
 43 |   kind: ClusterRole
 44 |   name: swiss-army-knife
 45 | subjects:
 46 |   - kind: ServiceAccount
 47 |     name: swiss-army-knife
 48 |     namespace: swiss-army-knife
 49 | ---
 50 | apiVersion: apps/v1
 51 | kind: DaemonSet
 52 | metadata:
 53 |   name: swiss-army-knife
 54 |   namespace: swiss-army-knife
 55 |   labels:
 56 |     app: swiss-army-knife
 57 | spec:
 58 |   selector:
 59 |     matchLabels:
 60 |       app: swiss-army-knife
 61 |   template:
 62 |     metadata:
 63 |       labels:
 64 |         app: swiss-army-knife
 65 |     spec:
 66 |       tolerations:
 67 |         - operator: Exists
 68 |       containers:
 69 |         - name: swiss-army-knife
 70 |           image: supporttools/swiss-army-knife
 71 |           imagePullPolicy: IfNotPresent
 72 |           securityContext:
 73 |             privileged: true
 74 |           resources:
 75 |             limits:
 76 |               cpu: 1000m
 77 |               memory: 1000Mi
 78 |             requests:
 79 |               cpu: 100m
 80 |               memory: 100Mi
 81 |           env:
 82 |             - name: POD_NAMESPACE
 83 |               valueFrom:
 84 |                 fieldRef:
 85 |                   fieldPath: metadata.namespace
 86 |             - name: POD_IP
 87 |               valueFrom:
 88 |                 fieldRef:
 89 |                   fieldPath: status.podIP
 90 |             - name: NODE_NAME
 91 |               valueFrom:
 92 |                 fieldRef:
 93 |                   fieldPath: spec.nodeName
 94 |           volumeMounts:
 95 |             - name: rootfs
 96 |               mountPath: /rootfs
 97 |       serviceAccountName: swiss-army-knife
 98 |       volumes:
 99 |         - name: rootfs
100 |           hostPath:
101 |             path: /
102 | ---
103 | apiVersion: v1
104 | kind: Service
105 | metadata:
106 |   name: swiss-army-knife
107 |   namespace: swiss-army-knife
108 |   labels:
109 |     app: swiss-army-knife
110 | spec:
111 |   selector:
112 |     name: swiss-army-knife
113 |   ports:
114 |     - protocol: TCP
115 |       port: 80
116 |       targetPort: 80
117 |   type: ClusterIP


--------------------------------------------------------------------------------
/swiss-army-knife/overlaytest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DNS_TEST=false
 4 | NAMESPACE=default
 5 | 
 6 | # Parse arguments
 7 | while [[ $# -gt 0 ]]; do
 8 |   case $1 in
 9 |     --dns-test)
10 |       DNS_TEST=true
11 |       shift
12 |       ;;
13 |     *)
14 |       echo "Unknown option: $1"
15 |       exit 1
16 |       ;;
17 |   esac
18 | done
19 | 
20 | echo "=> Start network overlay and DNS test"
21 | if $DNS_TEST
22 |   then
23 |     DNS_PASS=0; DNS_FAIL=0
24 |   else
25 |     echo "DNS tests are skipped. Use --dns-check to enable."
26 | fi
27 | echo
28 | NET_PASS=0; NET_FAIL=0
29 | 
30 | while read spod shost sip
31 | do
32 |   echo "Testing pod $spod on node $shost with IP $sip"
33 | 
34 |   # Overlay network test
35 |   echo "  => Testing overlay network connectivity"
36 |     while read tip thost
37 |   do
38 |     if [[ ! $shost == $thost ]]; then
39 |       kubectl -n $NAMESPACE exec $spod -c overlaytest -- /bin/sh -c "ping -c2 $tip > /dev/null 2>&1"
40 |       RC=$?
41 |       if [ $RC -ne 0 ]; then
42 |         ((NET_FAIL+=1)); echo "    FAIL: $spod on $shost cannot reach pod IP $tip on $thost"
43 |       else
44 |         ((NET_PASS+=1)); echo "    PASS: $spod on $shost can reach pod IP $tip on $thost"
45 |       fi
46 |     fi
47 |   done < <(kubectl get pods -n $NAMESPACE -l name=overlaytest -o jsonpath='{range .items[*]}{@.status.podIP}{" "}{@.spec.nodeName}{"\n"}{end}' | sort -k2)
48 | 
49 |   if $DNS_TEST; then
50 |     # Internal DNS test
51 |     echo "  => Testing DNS"
52 |     kubectl -n $NAMESPACE exec $spod -c overlaytest -- /bin/sh -c "nslookup kubernetes.default > /dev/null 2>&1"
53 |     RC=$?
54 |     if [ $RC -ne 0 ]; then
55 |       ((DNS_FAIL+=1)); echo "    FAIL: $spod cannot resolve internal DNS for 'kubernetes.default'"
56 |     else
57 |       ((DNS_PASS+=1)); echo "    PASS: $spod can resolve internal DNS for 'kubernetes.default'"
58 |     fi
59 | 
60 |     # External DNS test
61 |     kubectl -n $NAMESPACE exec $spod -c overlaytest -- /bin/sh -c "nslookup rancher.com > /dev/null 2>&1"
62 |     RC=$?
63 |     if [ $RC -ne 0 ]; then
64 |       ((DNS_FAIL+=1)); echo "    FAIL: $spod cannot resolve external DNS for 'rancher.com'"
65 |     else
66 |       ((DNS_PASS+=1)); echo "    PASS: $spod can resolve external DNS for 'rancher.com'"
67 |     fi
68 |   fi
69 |   echo
70 | 
71 | done < <(kubectl get pods -n $NAMESPACE -l name=overlaytest -o jsonpath='{range .items[*]}{@.metadata.name}{" "}{@.spec.nodeName}{" "}{@.status.podIP}{"\n"}{end}' | sort -k2)
72 | 
73 | NET_TOTAL=$(($NET_PASS + $NET_FAIL))
74 | echo "=> Network [$NET_PASS / $NET_TOTAL]"
75 | if $DNS_TEST; then
76 |   DNS_TOTAL=$(($DNS_PASS + $DNS_FAIL))
77 |   echo "=> DNS     [$DNS_PASS / $DNS_TOTAL]"
78 | fi
79 | echo; echo "=> End network overlay and DNS test"


--------------------------------------------------------------------------------
/swiss-army-knife/overlaytest.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: DaemonSet
 3 | metadata:
 4 |   name: overlaytest
 5 | spec:
 6 |   selector:
 7 |       matchLabels:
 8 |         name: overlaytest
 9 |   template:
10 |     metadata:
11 |       labels:
12 |         name: overlaytest
13 |     spec:
14 |       tolerations:
15 |       - operator: Exists
16 |       containers:
17 |       - image: rancherlabs/swiss-army-knife
18 |         imagePullPolicy: IfNotPresent
19 |         name: overlaytest
20 |         command: ["sh", "-c", "tail -f /dev/null"]
21 |         terminationMessagePath: /dev/termination-log


--------------------------------------------------------------------------------
/troubleshooting-scripts/README.md:
--------------------------------------------------------------------------------
 1 | # Troubleshooting Scripts
 2 | 
 3 | ## kube-scheduler
 4 | 
 5 | ### Finding the current leader
 6 | 
 7 | Command(s): `curl -s https://raw.githubusercontent.com/rancherlabs/support-tools/master/troubleshooting-scripts/kube-scheduler/find-leader.sh | bash`
 8 | 
 9 | **Example Output**
10 | 
11 | ```bash
12 | kube-scheduler is the leader on node a1ubk8slabl03
13 | ```
14 | 
15 | ## determine-leader
16 | 
17 | Command(s): `curl -s https://raw.githubusercontent.com/rancherlabs/support-tools/master/troubleshooting-scripts/determine-leader/rancher2_determine_leader.sh | bash`
18 | 
19 | **Example Output**
20 | 
21 | ```bash
22 | NAME                                    POD-IP         HOST-IP
23 | cattle-cluster-agent-776d795ff8-x77nq   10.42.0.93     10.10.100.83
24 | cattle-node-agent-4bsx6                 10.10.100.83   10.10.100.83
25 | rancher-54d47dc9cf-d4qt9                10.42.0.92     10.10.100.83
26 | rancher-54d47dc9cf-prn4d                10.42.0.90     10.10.100.83
27 | rancher-54d47dc9cf-rsn4g                10.42.0.91     10.10.100.83
28 | 
29 | rancher-54d47dc9cf-prn4d is the leader in this Rancher instance
30 | ```
31 | 


--------------------------------------------------------------------------------
/troubleshooting-scripts/determine-leader/rancher2_determine_leader.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | RANCHER_LEADER="$(kubectl -n kube-system get lease cattle-controllers -o json | jq -r '.spec.holderIdentity')"
3 | # Display Rancher Pods Information
4 | kubectl get pod -n cattle-system $RANCHER_LEADER -o custom-columns=NAME:.metadata.name,POD-IP:.status.podIP,HOST-IP:.status.hostIP
5 | printf "\n$RANCHER_LEADER is the leader in this Rancher instance\n"
6 | 


--------------------------------------------------------------------------------
/troubleshooting-scripts/etcd/README.md:
--------------------------------------------------------------------------------
 1 | # etcd-troubleshooting
 2 | 
 3 | ## Check etcd members
 4 | Command(s): `docker exec etcd etcdctl member list`
 5 | 
 6 | **Example Output of a healthy cluster**
 7 | ```bash
 8 | 2f080bc6ec98f39b, started, etcd-a1ubrkeat03, https://172.27.5.33:2380, https://172.27.5.33:2379,https://172.27.5.33:4001, false
 9 | 9d7204f89b221ba3, started, etcd-a1ubrkeat01, https://172.27.5.31:2380, https://172.27.5.31:2379,https://172.27.5.31:4001, false
10 | bd37bc0dc2e990b6, started, etcd-a1ubrkeat02, https://172.27.5.32:2380, https://172.27.5.32:2379,https://172.27.5.32:4001, false
11 | ```
12 | 
13 | ## Check etcd endpoints
14 | Command(s): `curl https://raw.githubusercontent.com/rancherlabs/support-tools/master/troubleshooting-scripts/etcd/check-endpoints.sh | bash `
15 | 
16 | **Example Output of a healthy cluster**
17 | ```bash
18 | Validating connection to https://172.27.5.33:2379/health
19 | {"health":"true"}
20 | Validating connection to https://172.27.5.31:2379/health
21 | {"health":"true"}
22 | Validating connection to https://172.27.5.32:2379/health
23 | {"health":"true"}
24 | ```
25 | 
26 | ## Check etcd logs
27 | 
28 | `health check for peer xxx could not connect: dial tcp IP:2380: getsockopt: connection refused`
29 | 
30 | A connection to the address shown on port 2380 cannot be established. Check if the etcd container is running on the host with the address shown.
31 | 
32 | 
33 | `xxx is starting a new election at term x`
34 | 
35 | The etcd cluster has lost it’s quorum and is trying to establish a new leader. This can happen when the majority of the nodes running etcd go down/unreachable.
36 | 
37 | 
38 | `connection error: desc = "transport: Error while dialing dial tcp 0.0.0.0:2379: i/o timeout"; Reconnecting to {0.0.0.0:2379 0 <nil>}`
39 | 
40 | The host firewall is preventing network communication.
41 | 
42 | 
43 | `rafthttp: request cluster ID mismatch`
44 | 
45 | The node with the etcd instance logging `rafthttp: request cluster ID mismatch` is trying to join a cluster that has already been formed with another peer. The node should be removed from the cluster, and re-added.
46 | 
47 | 
48 | `rafthttp: failed to find member`
49 | 
50 | The cluster state (`/var/lib/etcd`) contains wrong information to join the cluster. The node should be removed from the cluster, the state directory should be cleaned and the node should be re-added.
51 | 
52 | ## Enabling debug logging
53 | `curl -XPUT -d '{"Level":"DEBUG"}' --cacert $(docker exec etcd printenv ETCDCTL_CACERT) --cert $(docker exec etcd printenv ETCDCTL_CERT) --key $(docker exec etcd printenv ETCDCTL_KEY) https://localhost:2379/config/local/log`
54 | 
55 | ## Disabling debug logging
56 | `curl -XPUT -d '{"Level":"INFO"}' --cacert $(docker exec etcd printenv ETCDCTL_CACERT) --cert $(docker exec etcd printenv ETCDCTL_CERT) --key $(docker exec etcd printenv ETCDCTL_KEY) https://localhost:2379/config/local/log`
57 | 
58 | ## Getting etcd metrics
59 | `curl -X GET --cacert $(docker exec etcd printenv ETCDCTL_CACERT) --cert $(docker exec etcd printenv ETCDCTL_CERT) --key $(docker exec etcd printenv ETCDCTL_KEY) https://localhost:2379/metrics`
60 | 
61 | 
62 | **wal_fsync_duration_seconds (99% under 10 ms)**
63 | 
64 | A wal_fsync is called when etcd persists its log entries to disk before applying them.
65 | 
66 | 
67 | **backend_commit_duration_seconds (99% under 25 ms)**
68 | 
69 | A backend_commit is called when etcd commits an incremental snapshot of its most recent changes to disk.
70 | 


--------------------------------------------------------------------------------
/troubleshooting-scripts/etcd/check-endpoints.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | for endpoint in $(docker exec etcd /bin/sh -c "etcdctl member list | cut -d, -f5");
3 | do
4 |   echo "Validating connection to ${endpoint}/health";
5 |   docker run --net=host -v $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl:/etc/kubernetes/ssl:ro appropriate/curl -s -w "\n" --cacert $(docker exec etcd printenv ETCDCTL_CACERT) --cert $(docker exec etcd printenv ETCDCTL_CERT) --key $(docker exec etcd printenv ETCDCTL_KEY) "${endpoint}/health";
6 | done
7 | 


--------------------------------------------------------------------------------
/troubleshooting-scripts/kube-apiserver/check_apiserver-to-etcd.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | for i in $(docker inspect kube-apiserver | grep -m 1 "\--etcd-servers" | grep -Po '(?<=https://)[^:]*')
4 | do
5 |   echo -n "Checking $i "
6 |   curl --cacert /etc/kubernetes/ssl/kube-ca.pem --cert /etc/kubernetes/ssl/kube-node.pem --key /etc/kubernetes/ssl/kube-node-key.pem https://"$i":2379/health
7 |   echo ""
8 | done
9 | 


--------------------------------------------------------------------------------
/troubleshooting-scripts/kube-apiserver/check_endpoints.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Getting IPs from endpoint..."
 4 | EndPointIPs=`kubectl get endpoints kubernetes -o jsonpath='{.subsets[].addresses[*].ip}'`
 5 | 
 6 | for EndPointIP in $EndPointIPs
 7 | do
 8 |   if kubectl get nodes --selector=node-role.kubernetes.io/controlplane=true -o jsonpath={.items[*].status.addresses[?\(@.type==\"InternalIP\"\)].address} | grep $EndPointIP > /dev/null
 9 |   then
10 |     echo "Good - $EndPointIP"
11 |   else
12 |     echo "Problem - $EndPointIP"
13 |   fi
14 | done
15 | 


--------------------------------------------------------------------------------
/troubleshooting-scripts/kube-apiserver/responsiveness.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | for cip in $(kubectl get nodes -l "node-role.kubernetes.io/controlplane=true" -o jsonpath='{range.items[*].status.addresses[?(@.type=="InternalIP")]}{.address}{"\n"}{end}');
3 | do
4 |   kubectl --server https://${cip}:6443 get nodes -v6 2>&1| grep round_trippers;
5 | done
6 | 


--------------------------------------------------------------------------------
/troubleshooting-scripts/kube-scheduler/find-leader.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | NODE="$(kubectl -n kube-system get endpoints kube-scheduler -o jsonpath='{.metadata.annotations.control-plane\.alpha\.kubernetes\.io/leader}' | jq -r .holderIdentity | sed 's/_[^_]*$//')"
3 | echo "kube-scheduler is the leader on node $NODE"
4 | 


--------------------------------------------------------------------------------
/windows-access-control-lists/README.md:
--------------------------------------------------------------------------------
  1 | # Securing file ACLs on RKE2 Windows nodes
  2 | 
  3 | In certain cases, Windows nodes joined to RKE2 clusters may not have appropriate Access Control Lists (ACLs) configured for important files and directories, allowing improper access by unprivileged user accounts such as `NT AUTHORITY\Authenticated Users`. This occurs in the following configurations
  4 | 
  5 | + Standalone RKE2 nodes (i.e. RKE2 nodes **_not_** provisioned using Rancher) which run on Windows that were _initially_ provisioned using a version older than `1.27.15`, `1.28.11`, `1.29.6`, or `1.30.2`
  6 | 
  7 | + Rancher provisioned RKE2 nodes that run on Windows that were created using a Rancher version older than `2.9.3` or `2.8.9`.
  8 | 
  9 | This issue has been resolved for standalone RKE2 clusters starting with versions `1.27.15`, `1.28.1`, `1.29.6`, `1.30.2` and above. Rancher `2.9.3`, `2.8.9`, and above, have also been updated to properly configure ACLs on Windows nodes during initial provisioning as well as to retroactively update ACLs on existing nodes.
 10 | 
 11 | If you are maintaining a standalone RKE2 Windows cluster which was provisioned using a version of RKE2 older than `1.27.15`, `1.28.11`, `1.29.6`, `1.30.2`, or if you maintain a Rancher provisioned RKE2 Windows cluster but are unable to upgrade to at least `2.9.3` or `2.8.9`, then you can use the below powershell script to manually update the relevant ACLs.
 12 | 
 13 | This script only needs to be run once per node. If desired, additional files and directories can be secured by updating the `$restrictedPaths` variable. After running the script, only the `NT AUTHORITY\SYSTEM` and `BUILTIN\Administrators` group will have access to the specified files and directories. Directories will be configured with inheritance enabled to ensure child files and directories utilize the same restrictive ACL.
 14 | 
 15 | Add the below script to a PowerShell file and run it using the PowerShell console as an Administrator.
 16 | 
 17 | ```powershell
 18 | function Set-RestrictedPermissions {
 19 |     [CmdletBinding()]
 20 |     param (
 21 |         [Parameter(Mandatory=$true)]
 22 |         [string]
 23 |         $Path,
 24 |         [Parameter(Mandatory=$true)]
 25 |         [Boolean]
 26 |         $Directory
 27 |     )
 28 |     $Owner = "BUILTIN\Administrators"
 29 |     $Group = "NT AUTHORITY\SYSTEM"
 30 |     $acl = Get-Acl $Path
 31 |     
 32 |     foreach ($rule in $acl.GetAccessRules($true, $true, [System.Security.Principal.SecurityIdentifier])) {
 33 |         $acl.RemoveAccessRule($rule) | Out-Null
 34 |     }
 35 |     $acl.SetAccessRuleProtection($true, $false)
 36 |     $acl.SetOwner((New-Object System.Security.Principal.NTAccount($Owner)))
 37 |     $acl.SetGroup((New-Object System.Security.Principal.NTAccount($Group)))
 38 |     
 39 |     Set-FileSystemAccessRule -Directory $Directory -acl $acl
 40 | 
 41 |     $FullPath = Resolve-Path $Path
 42 |     Write-Host "Setting restricted ACL on $FullPath"
 43 |     Set-Acl -Path $Path -AclObject $acl
 44 | }
 45 | 
 46 | function Set-FileSystemAccessRule() {
 47 |     [CmdletBinding()]
 48 |     param (
 49 |         [Parameter(Mandatory=$true)]
 50 |         [Boolean]
 51 |         $Directory,
 52 |         [Parameter(Mandatory=$false)]
 53 |         [System.Security.AccessControl.ObjectSecurity]
 54 |         $acl
 55 |     )
 56 |     $users = @(
 57 |         $acl.Owner,
 58 |         $acl.Group
 59 |     )
 60 |     if ($Directory -eq $true) {
 61 |         foreach ($user in $users) {
 62 |             $rule = New-Object System.Security.AccessControl.FileSystemAccessRule(
 63 |                 $user,
 64 |                 [System.Security.AccessControl.FileSystemRights]::FullControl,
 65 |                 [System.Security.AccessControl.InheritanceFlags]'ObjectInherit,ContainerInherit',
 66 |                 [System.Security.AccessControl.PropagationFlags]::None,
 67 |                 [System.Security.AccessControl.AccessControlType]::Allow
 68 |             )
 69 |             $acl.AddAccessRule($rule)
 70 |         }
 71 |     } else {
 72 |         foreach ($user in $users) {
 73 |             $rule = New-Object System.Security.AccessControl.FileSystemAccessRule(
 74 |                 $user,
 75 |                 [System.Security.AccessControl.FileSystemRights]::FullControl,
 76 |                 [System.Security.AccessControl.AccessControlType]::Allow
 77 |             )
 78 |             $acl.AddAccessRule($rule)
 79 |         }
 80 |     }
 81 | }
 82 | 
 83 | function Confirm-ACL { 
 84 | 	[CmdletBinding()]
 85 | 	param (
 86 | 		[Parameter(Mandatory=$true)]
 87 | 		[String]
 88 | 		$Path
 89 | 	)
 90 | 	foreach ($a in (Get-Acl $path).Access) {
 91 | 		$ref = $a.IdentityReference
 92 | 		if (($ref -ne "BUILTIN\Administrators") -and ($ref -ne "NT AUTHORITY\SYSTEM")) { 
 93 | 			return $false
 94 | 		}
 95 | 	}
 96 | 	return $true
 97 | }
 98 | 
 99 | $RKE2_DATA_DIR="c:\var\lib\rancher\rke2"
100 | $SYSTEM_AGENT_DIR="c:\var\lib\rancher\agent"
101 | $RANCHER_PROVISIONING_DIR="c:\var\lib\rancher\capr"
102 | 
103 | $restrictedPaths = @(
104 |     [PSCustomObject]@{
105 |         Path = "c:\etc\rancher\wins\config"
106 |         Directory = $false
107 |     }
108 |     [PSCustomObject]@{
109 |         Path = "c:\etc\rancher\node\password"
110 |         Directory = $false
111 |     }
112 |     [PSCustomObject]@{
113 |         Path = "$SYSTEM_AGENT_DIR\rancher2_connection_info.json"
114 |         Directory = $false
115 |     }
116 |     [PSCustomObject]@{
117 |         Path = "c:\etc\rancher\rke2\config.yaml.d\50-rancher.yaml"
118 |         Directory = $false
119 |     }
120 |     [PSCustomObject]@{
121 |         Path = "c:\usr\local\bin\rke2.exe"
122 |         Directory = $false
123 |     }
124 |     [PSCustomObject]@{
125 |         Path = "$RANCHER_PROVISIONING_DIR"
126 |         Directory = $true
127 |     }
128 |     [PSCustomObject]@{
129 |         Path = "$SYSTEM_AGENT_DIR"
130 |         Directory = $true
131 |     }
132 |     [PSCustomObject]@{
133 |         Path = "$RKE2_DATA_DIR"
134 |         Directory = $true
135 |     }
136 | )
137 | 
138 | foreach ($path in $restrictedPaths) {
139 |     # Some paths will not exist on standalone RKE2 clusters
140 |     if (-Not (Test-Path -Path $path.Path)) {
141 |         continue
142 |     }
143 |     
144 |     if (-Not (Confirm-ACL -Path $path.Path)) {
145 |         Set-RestrictedPermissions -Path $path.Path -Directory $path.Directory
146 |     } else { 
147 |         Write-Host "ACLs have been properly configured for the $($path.Path) $(if($path.Directory){ "directory" } else { "file" })"
148 |     }
149 | }
150 | ```
151 | 


--------------------------------------------------------------------------------
/windows-agent-strict-verify/README.md:
--------------------------------------------------------------------------------
 1 | # Enabling agent strict TLS verification on existing Windows nodes
 2 | 
 3 | In certain conditions, Windows nodes will not respect the Agent TLS Mode value set on the Rancher server. This setting was implemented in Rancher 2.9.0 and 2.8.6
 4 | 
 5 | Windows nodes will not respect this setting if the following two conditions are true
 6 | 
 7 | 1. The node was provisioned using a Rancher version older than 2.9.2 or 2.8.8, and continues to be used after a Rancher upgrade to 2.9.2, 2.8.8, or greater
 8 | 2. The node is running a version of rke2 _older_ than the August 2024 patches. (i.e. any version _lower_ than v1.30.4, v1.29.8, v1.28.13, v1.27.16.)
 9 | 
10 | ## Workaround
11 | 
12 | In order to retroactively enable strict TLS verification on Windows nodes, the following process must be followed. A Powershell script, `update-node.ps1` has been included to automate some parts of this process, however some steps (such as retrieving the required credentials used by the script) must be done manually. 
13 | 
14 | 
15 | This process needs to be repeated for each Windows node joined to the cluster, but does not need to be done for newly provisioned nodes after Rancher has been upgraded to at least 2.9.2 or 2.8.8 - even if the rke2 version is older than the August patches. In scenarios where it is possible / safe to reprovision the impacted Windows nodes, this process may not be needed. 
16 | 
17 | 1. Stop the `rancher-wins` service using the `Stop-Service` PowerShell Command (`Stop-Service rancher-wins`)
18 | 
19 | 2. Update the version of `wins.exe` running on the node. This can either be done manually, or via the `update-node.ps1` PowerShell script by passing the `-DownloadWins` flag
20 |     1. If a manual approach is taken, download the latest [version of rancher-wins from GitHub](https://github.com/rancher/wins/releases) (at least version `0.4.18`) and place the updated binary in the `c:/usr/local/bin` and `c:/Windows` directories, replacing the existing binaries.
21 | 
22 |     2. If the automatic approach is taken, then you must include the `-DownloadWins` flag when invoking `update-node.ps1`. The version of `rancher-wins` packaged within your Rancher server will then be downloaded.
23 |         + You must ensure that you are running a version of Rancher which embeds at _least_ `rancher-wins` `v0.4.18`. This version is included in Rancher v2.9.2, v2.8.8, and above.
24 |         + Refer to the [`Obtaining the CATTLE_TOKEN and CATTLE_SERVER variables`](#obtaining-the-cattle_token-and-cattle_server-variables) section below to retrieve the required `CATTLE_TOKEN` and `CATTLE_SERVER` variables.
25 | 
26 | 3. Manually update the `rancher-wins` config file to enable strict tls verification
27 |     1. This file is located in `c:/etc/rancher/wins/config`.
28 |         1. At the root level (i.e. a new line just before the `system-agent` field) add the following value `agentStrictTLSMode: true`
29 |         2. An [example configuration file](#example-updated-wins-config-file) can be seen at the bottom of this file 
30 | 
31 | 4. If needed, regenerate the rancher connection file
32 |     1. To determine if you need to do this, look at the `/var/lib/rancher/agent/rancher2_connection_info.json` file. If you intend to use strict validation, this file must contain a valid `ca-certificate-data` field.
33 |     2. If this field is missing
34 |         1. Refer to the [`Obtaining the CATTLE_TOKEN and CATTLE_SERVER variables`](#obtaining-the-cattle_token-and-cattle_server-variables) section to retrieve the required `CATTLE_TOKEN` and `CATTLE_SERVER` parameters
35 |         2. Create a new file containing the `update-node.ps1` script and run it, ensuring you properly pass the `CATTLE_SERVER` value to the `-RancherServerURL` flag, and the `CATTLE_TOKEN` value to the `-Token` flag.
36 |            1. Depending on whether you wish to manually update `rancher-wins`, run one of the following two commands
37 |               1. `./update-node.ps1 -RancherServerURL $CATTLE_SERVER -Token $CATTLE_TOKEN`
38 |               2. `./update-node.ps1 -RancherServerURL $CATTLE_SERVER -Token $CATTLE_TOKEN -DownloadWins`
39 |            2. Confirm that the `rancher2_connection_info.json` file contains the correct CA data.
40 | 
41 | 5. Confirm the proper version of `rancher-wins` has been installed by running `win.exe --version`
42 | 6. Restart the node (`Restart-Computer`). 
43 |    1. If the node is running an RKE2 version older than the August patches, you **must** restart the node otherwise pod networking will be impacted. 
44 | 
45 | ### Obtaining the `CATTLE_TOKEN` and `CATTLE_SERVER` variables
46 | 
47 | - You must be a cluster administrator or have an account permitted to view cluster secrets in order to use this script, as the `CATTLE_TOKEN` is stored in a Kubernetes secret. You cannot simply generate an API token using the Rancher UI. 
48 | - To obtain the `CATTLE_TOKEN` and `CATTLE_SERVER` values using the Rancher UI
49 |   1. Open Rancher's Cluster Explorer UI for the cluster which contains the relevant Windows nodes. 
50 |   2. In the left hand section, under `More Resources`, go to `Core`, and then finally, `Secrets`. 
51 |   3. Find the secret named `stv-aggregation`, and copy the `CATTLE_SERVER` and `CATTLE_TOKEN` fields. 
52 |   4. Pass `CATTLE_TOKEN` to the `-Token` flag, and `CATTLE_SERVER` to the `-RancherServerURL` flag.
53 | - To obtain the `CATTLE_TOKEN` and `CATTLE_SERVER` values using kubectl 
54 |   1. `kubectl get secret -n cattle-system stv-aggregation --template={{.data.CATTLE_TOKEN}} | base64 -d`
55 |   2. `kubectl get secret -n cattle-system stv-aggregation --template={{.data.CATTLE_SERVER}} | base64 -d`
56 | 
57 | ### Example updated wins config file
58 | 
59 | ```yaml
60 | # This file is located at c:/etc/rancher/wins/config
61 | white_list:
62 |   processPaths:
63 |     - C:/etc/rancher/wins/powershell.exe
64 |     - C:/etc/rancher/wins/wins-upgrade.exe
65 |     - C:/etc/wmi-exporter/wmi-exporter.exe
66 |     - C:/etc/windows-exporter/windows-exporter.exe
67 |   proxyPorts:
68 |     - 9796
69 | agentStrictTLSMode: true
70 | systemagent:
71 |   workDirectory: C:/var/lib/rancher/agent/work
72 |   appliedPlanDirectory: C:/var/lib/rancher/agent/applied
73 |   remoteEnabled: true
74 |   preserveWorkDirectory: false
75 |   connectionInfoFile: C:/var/lib/rancher/agent/rancher2_connection_info.json
76 | csi-proxy:
77 |   url: https://haffel-rancher.cp-dev.rancher.space/assets/csi-proxy-%[1]s.tar.gz
78 |   version: v1.1.3
79 |   kubeletPath: C:/bin/kubelet.exe
80 | ```


--------------------------------------------------------------------------------
/windows-agent-strict-verify/update-node.ps1:
--------------------------------------------------------------------------------
 1 | <#
 2 |     .SYNOPSIS
 3 |     Updates the rancher_connection_info.json file on Windows nodes and optionally downloads the latest version of rancher-wins from the specified Rancher server
 4 | 
 5 |     .PARAMETER RancherServerURL
 6 |     The HTTPs URL of the Rancher server which manages the cluster this node is joined to
 7 | 
 8 |     .PARAMETER Token
 9 |     The Rancher API token tracked in the stv-aggregation secret
10 | 
11 |     .PARAMETER ForceRegeneration
12 |     When set to true, this script will overwrite the rancher2_connection_info.json file, even if the cetificate-authority-data field is present
13 | 
14 |     .PARAMETER DownloadWins
15 |     When set to true, this script will reach out to the RancherServerURL API and download the version of rancher-wins embedded in that sever
16 | #>
17 | 
18 | param (
19 |     [Parameter()]
20 |     [String]
21 |     $RancherServerURL,
22 | 
23 |     [Parameter()]
24 |     [String]
25 |     $Token,
26 | 
27 |     [Parameter()]
28 |     [Switch]
29 |     $ForceRegeneration,
30 | 
31 |     [Parameter()]
32 |     [Switch]
33 |     $DownloadWins
34 | )
35 | 
36 | if ($DownloadWins -eq $true) {
37 |     # Download the latest verson of wins from the rancher server
38 |     $responseCode = $(curl.exe --connect-timeout 60 --max-time 300 --write-out "%{http_code}\n" --ssl-no-revoke -sfL "$RancherServerURL/assets/wins.exe" -o "/usr/local/bin/wins.exe")
39 |     switch ( $responseCode ) {
40 |         { "ok200", 200 } {
41 |             Write-LogInfo "Successfully downloaded the wins binary."
42 |             break
43 |         }
44 |         default {
45 |             Write-LogError "$responseCode received while downloading the wins binary. Double check that the correct RancherServerURL has been provided"
46 |             exit 1
47 |         }
48 |     }
49 |     Copy-Item -Path "/usr/local/bin/wins.exe" -Destination "c:\Windows\wins.exe" -Force
50 | }
51 | 
52 | # Check the current connection file to determine if CA data is already present.
53 | $info = (Get-Content C:\var\lib\rancher\agent\rancher2_connection_info.json -ErrorAction Ignore)
54 | if (($null -ne $info) -and (($info | ConvertFrom-Json).kubeConfig).Contains("certificate-authority-data")) {
55 |     if (-Not $ForceRegeneration) {
56 |         Write-Host "certificate-authority-data is already present in rancher2_connection_info.json"
57 |         exit 0
58 |     }
59 | }
60 | 
61 | $CATTLE_ID=(Get-Content /etc/rancher/wins/cattle-id -ErrorAction Ignore)
62 | if (($null -eq $CATTLE_ID) -or ($CATTLE_ID -eq "")) {
63 |     Write-Host "Could not obtain required CATTLE_ID value from node"
64 |     exit 1
65 | }
66 | 
67 | Write-Host "Updating rancher2_connection_info.json file"
68 | 
69 | $responseCode = $(curl.exe --connect-timeout 60 --max-time 60 --write-out "%{http_code}\n " --ssl-no-revoke -sfL "$RancherServerURL/v3/connect/agent" -o /var/lib/rancher/agent/rancher2_connection_info.json -H "Authorization: Bearer $Token" -H "X-Cattle-Id: $CATTLE_ID" -H "Content-Type: application/json")
70 | 
71 | switch ( $responseCode ) {
72 |     { $_ -in "ok200", 200 } {
73 |         Write-Host "Successfully downloaded Rancher connection information."
74 |         exit 0
75 |     }
76 |     default {
77 |         Write-Host "$responseCode received while downloading Rancher connection information. Double check that the correct RancherServerURL and Token have been provided"
78 |         exit 1
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------