├── .gitignore ├── LICENSE ├── NGINX-to-pods-check ├── README.md ├── check.sh └── example-deployment.yml ├── README.md ├── adjust-downstream-webhook ├── README.md └── adjust-downstream-webhook.sh ├── bad-ingress-scanner ├── Dockerfile ├── README.md ├── bad-ingress.yaml ├── deployment.yaml └── run.sh ├── change-nodetemplate-owner ├── Dockerfile ├── README.md └── change-nodetemplate-owner.sh ├── charts ├── index.yaml ├── rancher-0.0.1.tgz ├── robots.txt ├── systems-info │ ├── .helmignore │ ├── Chart.yaml │ ├── questions.yml │ ├── templates │ │ ├── _helpers.tpl │ │ ├── clusterrolebinding.yaml │ │ ├── deployment.yaml │ │ ├── namespace.yaml │ │ ├── secret.yaml │ │ └── serviceaccount.yaml │ └── values.yaml └── systems-information │ ├── Chart.yaml │ ├── questions.yml │ ├── templates │ ├── clusterrolebinding.yaml │ ├── deployment.yaml │ ├── namespace.yaml │ ├── secret.yaml │ └── serviceaccount.yaml │ └── values.yaml ├── cleanup-etcd-part-files ├── README.md ├── alt-s3-sync.yaml └── delete-part-files.yaml ├── cleanup-evicted-pods ├── README.md ├── deploy.yaml └── serviceaccount.yaml ├── collection └── rancher │ ├── v1.6 │ └── logs-collector │ │ ├── README.md │ │ └── rancher16_logs_collector.sh │ └── v2.x │ ├── RBAC-role-collector │ ├── README.md │ └── role-dump.sh │ ├── logs-collector │ ├── README.md │ ├── collection-details.md │ └── rancher2_logs_collector.sh │ ├── profile-collector │ ├── README.md │ └── continuous_profiling.sh │ ├── rancher-pod-collector │ ├── README.md │ └── rancher-pod-collector.sh │ ├── supportability-review │ ├── README.md │ ├── cluster-collector.sh │ ├── collect.sh │ ├── collection-details.md │ ├── nodes-collector.sh │ └── security-policies.md │ ├── systems-information-v2 │ ├── README.md │ └── deploy.yaml │ ├── systems-information │ ├── Dockerfile │ ├── README.md │ ├── run.sh │ └── systems_summary.sh │ └── windows-log-collector │ ├── README.md │ └── win-log-collect.ps1 ├── eks-upgrade-using-api ├── README.md ├── common.sh ├── demo.gif └── eks-support.sh ├── eks-upgrade-using-kubectl ├── README.md ├── common.sh └── eks-support.sh ├── extended-rancher-2-cleanup ├── README.md └── extended-cleanup-rancher2.sh ├── files └── curl-format.txt ├── fleet-delete-cluster-registration ├── README.md └── delete_old_resources.sh ├── fleet-secrets-bro-patch ├── README.md └── patch_gitrepo_secrets.sh ├── how-to-retrieve-kubeconfig-from-custom-cluster ├── README.md └── rke-node-kubeconfig.sh ├── instant-fio-master ├── README.md └── instant-fio-master.sh ├── kubecert ├── README.md ├── base64 ├── jq-linux64 └── kubecert.sh ├── longhorn └── PlaceHolder.md ├── migrate-vsphere-clusters ├── README.md └── migrate-vsphere-clusters.sh ├── rancher-cleanup └── README.md ├── rancher-crd └── enumerate-resources │ ├── README.md │ └── rancher-resource-enumerator.sh ├── rancher-metadata-syncer ├── Dockerfile ├── README.md ├── apache.conf ├── deployment-configmap.yaml ├── deployment-proxy.yaml ├── download.sh └── run.sh ├── reverse-rke-state-migrations ├── README.md └── reverse-rke-state-migrations.sh ├── rotate-tokens ├── README.md └── rotate-tokens.sh ├── swiss-army-knife ├── README.md ├── admin-tools.yaml ├── overlaytest.sh └── overlaytest.yaml ├── troubleshooting-scripts ├── README.md ├── determine-leader │ └── rancher2_determine_leader.sh ├── etcd │ ├── README.md │ └── check-endpoints.sh ├── kube-apiserver │ ├── check_apiserver-to-etcd.sh │ ├── check_endpoints.sh │ └── responsiveness.sh └── kube-scheduler │ └── find-leader.sh ├── windows-access-control-lists └── README.md └── windows-agent-strict-verify ├── README.md └── update-node.ps1 /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | -------------------------------------------------------------------------------- /NGINX-to-pods-check/README.md: -------------------------------------------------------------------------------- 1 | # NGINX-to-pods-check 2 | This script is designed to walk through all the ingresses in a cluster and test that it can curl the backend pods from the NGINX pods. This is mainly done to verify the overlay network is working along with checking the overall configuration. 3 | 4 | ## Run script 5 | ``` 6 | curl https://raw.githubusercontent.com/rancherlabs/support-tools/master/NGINX-to-pods-check/check.sh | bash 7 | ``` 8 | 9 | ## Example output 10 | 11 | ### Broken pod 12 | 13 | ``` 14 | bash ./check.sh -F Table 15 | #################################################### 16 | Pod: webserver-bad-85cf9ccdf8-8v4mh 17 | PodIP: 10.42.0.252 18 | Port: 80 19 | Endpoint: ingress-1d8af467b8b7c9682fda18c8d5053db7 20 | Ingress: test-bad 21 | Ingress Pod: nginx-ingress-controller-b2s2d 22 | Node: a1ubphylbp01 23 | Status: Fail! 24 | #################################################### 25 | ``` 26 | 27 | ``` 28 | bash ./check.sh -F Inline 29 | Checking Pod webserver-bad-8v4mh PodIP 10.42.0.252 on Port 80 in endpoint ingress-bad for ingress test-bad from nginx-ingress-controller-b2s2d on node a1ubphylbp01 NOK 30 | ``` 31 | 32 | ### Working pod 33 | 34 | ``` 35 | bash ./check.sh -F Table 36 | #################################################### 37 | Pod: webserver-bad-85cf9ccdf8-8v4mh 38 | PodIP: 10.42.0.252 39 | Port: 80 40 | Endpoint: ingress-1d8af467b8b7c9682fda18c8d5053db7 41 | Ingress: test-bad 42 | Ingress Pod: nginx-ingress-controller-b2s2d 43 | Node: a1ubphylbp01 44 | Status: Pass! 45 | #################################################### 46 | ``` 47 | 48 | ``` 49 | bash ./check.sh -F Inline 50 | Checking Pod webserver-good-65644cffd4-gbpkj PodIP 10.42.0.251 on Port 80 in endpoint ingress-good for ingress test-good from nginx-ingress-controller-b2s2d on node a1ubphylbp01 OK 51 | ``` 52 | 53 | ## Testing 54 | 55 | The following commands will deploy two workloads and ingresses. One that is working with a web server that is responding on port 80. And the other will have the webserver disabled, so it will fail to connect. 56 | 57 | ``` 58 | kubectl apply -f https://raw.githubusercontent.com/rancherlabs/support-tools/master/NGINX-to-pods-check/example-deployment.yml 59 | ``` 60 | -------------------------------------------------------------------------------- /NGINX-to-pods-check/check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | usage() 5 | { 6 | cat << EOF 7 | usage: $0 options 8 | OPTIONS: 9 | -h Show this message 10 | -F Format Default: Table 11 | EOF 12 | } 13 | 14 | VERBOSE= 15 | while getopts .h:F:v. OPTION 16 | do 17 | case $OPTION in 18 | h) 19 | usage 20 | exit 1 21 | ;; 22 | F) 23 | FORMAT=$OPTARG 24 | ;; 25 | ?) 26 | usage 27 | exit 28 | ;; 29 | esac 30 | done 31 | 32 | if [[ -z $FORMAT ]] 33 | then 34 | FORMAT="Table" 35 | fi 36 | 37 | if [[ ! "$FORMAT" == "Table" ]] && [[ ! "$FORMAT" == "Inline" ]] 38 | then 39 | echo "Invalid Option for flag -F" 40 | exit 1 41 | fi 42 | 43 | 44 | kubectl get namespace -o custom-columns=NAMESPACE:.metadata.name --no-headers | while read namespace 45 | do 46 | kubectl get ingress -n "$namespace" -o custom-columns=ingress:.metadata.name --no-headers | while read ingress 47 | do 48 | kubectl get ingress $ingress -n $namespace -o yaml | grep 'service:' -A1 | awk '{print $2}' | sort | uniq | awk 'NF {p=1} p' | while read servicename 49 | do 50 | PORT="$(kubectl get endpoints "$servicename" -n "$namespace" -o yaml | grep 'port:' | awk '{print $2}'| head -n 1)" 51 | if [[ "$PORT" == 'port:' ]] 52 | then 53 | PORT="80" 54 | fi 55 | kubectl get endpoints "$servicename" -n "$namespace" -o yaml | grep '\- ip:' | awk '{print $3}' | while read endpointpodip 56 | do 57 | kubectl -n ingress-nginx get pods -l app=ingress-nginx -o custom-columns=POD:.metadata.name,NODE:.spec.nodeName,IP:.status.podIP --no-headers | while read ingresspod nodename podip 58 | do 59 | PODNAME="$(kubectl get pods -n $namespace -o custom-columns=POD:.metadata.name,IP:.status.podIP --no-headers | grep "$endpointpodip" | awk '{print $1}' | tr -d ' ')" 60 | if ! kubectl -n ingress-nginx exec $ingresspod -- curl -o /dev/null --connect-timeout 5 -s -q http://${endpointpodip}:${PORT} &> /dev/null 61 | then 62 | if [[ "$FORMAT" == "Inline" ]] 63 | then 64 | tput setaf 7; echo -n "Checking Pod $PODNAME PodIP $endpointpodip on Port $PORT in endpoint $servicename for ingress $ingress from $ingresspod on node $nodename "; tput setaf 1; echo "NOK"; tput sgr0 65 | fi 66 | if [[ "$FORMAT" == "Table" ]] 67 | then 68 | echo "####################################################" 69 | echo "Pod: $PODNAME" 70 | echo "PodIP: $endpointpodip" 71 | echo "Port: $PORT" 72 | echo "Endpoint: $servicename" 73 | echo "Ingress: $ingress" 74 | echo "Ingress Pod: $ingresspod" 75 | echo "Node: $nodename" 76 | tput setaf 1;echo "Status: Fail!"; tput sgr0 77 | echo "####################################################" 78 | fi 79 | else 80 | if [[ "$FORMAT" == "Inline" ]] 81 | then 82 | tput setaf 7; echo -n "Checking Pod $PODNAME PodIP $endpointpodip on Port $PORT in endpoint $servicename for ingress $ingress from $ingresspod on node $nodename "; tput setaf 2; echo "OK"; tput sgr0 83 | fi 84 | if [[ "$FORMAT" == "Table" ]] 85 | then 86 | echo "####################################################" 87 | echo "Pod: $PODNAME" 88 | echo "PodIP: $endpointpodip" 89 | echo "Port: $PORT" 90 | echo "Endpoint: $servicename" 91 | echo "Ingress: $ingress" 92 | echo "Ingress Pod: $ingresspod" 93 | echo "Node: $nodename" 94 | tput setaf 2;echo "Status: Pass!"; tput sgr0 95 | echo "####################################################" 96 | fi 97 | fi 98 | done 99 | done 100 | done 101 | done 102 | done 103 | 104 | -------------------------------------------------------------------------------- /NGINX-to-pods-check/example-deployment.yml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | labels: 6 | app: webserver-good 7 | name: webserver-good 8 | spec: 9 | selector: 10 | matchLabels: 11 | app: webserver-good 12 | template: 13 | metadata: 14 | labels: 15 | app: webserver-good 16 | spec: 17 | containers: 18 | - image: httpd 19 | name: webserver-good 20 | 21 | --- 22 | apiVersion: apps/v1 23 | kind: Deployment 24 | metadata: 25 | labels: 26 | app: webserver-bad 27 | name: webserver-bad 28 | spec: 29 | selector: 30 | matchLabels: 31 | app: webserver-bad 32 | template: 33 | metadata: 34 | labels: 35 | app: webserver-bad 36 | spec: 37 | containers: 38 | - args: 39 | - while true; do sleep 100000; done; 40 | command: 41 | - /bin/sh 42 | - -c 43 | image: httpd 44 | imagePullPolicy: Always 45 | name: webserver-bad 46 | 47 | --- 48 | apiVersion: v1 49 | kind: Service 50 | metadata: 51 | labels: 52 | app: webserver-good 53 | name: webserver-good 54 | spec: 55 | ports: 56 | - name: "80" 57 | port: 80 58 | targetPort: 80 59 | selector: 60 | app: webserver-good 61 | 62 | --- 63 | apiVersion: v1 64 | kind: Service 65 | metadata: 66 | labels: 67 | app: webserver-bad 68 | name: webserver-bad 69 | spec: 70 | ports: 71 | - name: "80" 72 | port: 80 73 | targetPort: 80 74 | selector: 75 | app: webserver-bad 76 | 77 | --- 78 | apiVersion: networking.k8s.io/v1 79 | kind: Ingress 80 | metadata: 81 | name: webserver-good 82 | spec: 83 | rules: 84 | - host: webserver-good.local 85 | http: 86 | paths: 87 | - backend: 88 | service: 89 | name: webserver-good 90 | port: 91 | number: 80 92 | path: / 93 | pathType: ImplementationSpecific 94 | --- 95 | apiVersion: networking.k8s.io/v1 96 | kind: Ingress 97 | metadata: 98 | name: webserver-bad 99 | spec: 100 | rules: 101 | - host: webserver-bad.local 102 | http: 103 | paths: 104 | - backend: 105 | service: 106 | name: webserver-bad 107 | port: 108 | number: 80 109 | path: / 110 | pathType: ImplementationSpecific 111 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # support-tools 2 | 3 | This repository contains Rancher support-tools to assist with investigating and troubleshooting issues with Rancher clusters, as well as other maintenance tasks. 4 | 5 | ## Caution: 6 | 7 | This repository contains scripts that can cause harm if used without the guidance of Rancher Support. We advise reaching out to Rancher Support before executing any of these scripts. Failure to reach out could incur production downtime. 8 | 9 | The repository consists of the following directories of tools: 10 | - collection: non-mutating, non-destructive scripts for the purpose of collecting information/logs from a cluster or node. 11 | - files: common files used in conjunction with troubleshooting commands. 12 | -------------------------------------------------------------------------------- /adjust-downstream-webhook/README.md: -------------------------------------------------------------------------------- 1 | # Adjust downstream webhook 2 | This script adjusts the version of the rancher-webhook release in downstream clusters. 3 | It decides what to do with the webhook deployment in each downstream cluster based on Rancher server version. 4 | 5 | ## Background 6 | The `rancher-webhook` chart is deployed in downstream clusters beginning with Rancher v2.7.2. 7 | On a rollback from a version >=2.7.2 to a version <2.7.2, the webhook will stay in the downstream clusters. 8 | Since each version of the webhook is one-to-one compatible with a specific version of Rancher, this can result in unexpected behavior. 9 | 10 | ## Usage 11 | 12 | ```bash 13 | ## Create a token through the UI. The token should have no scope and be made for a user who is a global admin. 14 | read -s RANCHER_TOKEN && export RANCHER_TOKEN 15 | ## The server URL for Rancher - you can get this value in the "server-url" setting. You can find it by going to Global Settings => Settings => server-url. The example format should be: https://rancher-test.home 16 | read -s RANCHER_URL && export RANCHER_URL 17 | bash adjust-downstream-webhook.sh 18 | ``` 19 | For Rancher setups using self-signed certificates, you can specify `--insecure-skip-tls-verify` to force the script to 20 | ignore TLS certificate verification. Note that this option is insecure, and should be avoided for production setups. 21 | 22 | ## Notes 23 | This script should be run after rolling back Rancher to the desired version 24 | (for example, when going from v2.7.2 to v2.7.0, only run this script after v2.7.0 is running). 25 | -------------------------------------------------------------------------------- /adjust-downstream-webhook/adjust-downstream-webhook.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | usage() { 4 | cat << EOF 5 | usage: bash adjust-downstream-webhook.sh [--insecure-skip-tls-verify] 6 | 7 | This script adjusts the rancher-webhook chart release in all clusters managed by Rancher (excluding the local cluster). 8 | Depending on the version of Rancher, it either deletes the downstream webhook release, adjusts its version and restarts, or does nothing. 9 | Requires kubectl and helm to be installed and available on \$PATH. 10 | Requires rancher-charts helm repo. If you don't have it, please add: helm repo add rancher-charts https://charts.rancher.io && helm repo update 11 | 12 | RANCHER_URL without a trailing slash must be set with the server URL of Rancher. 13 | RANCHER_TOKEN must be set with an admin token generated with no scope. 14 | To ignore TLS verification, set --insecure-skip-tls-verify. 15 | 16 | Users also need to ensure they have the rancher-charts repo in the local Helm index. 17 | EOF 18 | } 19 | 20 | if [ "$1" == "-h" ]; then 21 | usage 22 | exit 0 23 | fi 24 | 25 | delete_webhook() { 26 | cluster="$1" 27 | current_chart=$(helm list -n cattle-system -l name=rancher-webhook | tail -1 | cut -f 6) 28 | echo "Deleting $current_chart from cluster $cluster." 29 | helm uninstall rancher-webhook -n cattle-system 30 | } 31 | 32 | replace_webhook() { 33 | cluster="$1" 34 | new_version="$2" 35 | 36 | echo "Updating the agent to make it remember the min version $new_version of rancher-webhook, so that it can deploy it when needed in the future in cluster $cluster." 37 | kubectl set env -n cattle-system deployment/cattle-cluster-agent CATTLE_RANCHER_WEBHOOK_MIN_VERSION="$new_version" 38 | 39 | helm get values -n cattle-system rancher-webhook -o yaml > current_values.yaml 40 | echo "Re-installing rancher-webhook to use $new_version in cluster $cluster." 41 | helm upgrade --install rancher-webhook rancher-charts/rancher-webhook -n cattle-system --version "$new_version" --values current_values.yaml 42 | rm -f current_values.yaml 43 | } 44 | 45 | adjust_webhook() { 46 | cluster="$1" 47 | rancher_version="$2" 48 | 49 | if [[ "$rancher_version" =~ 2\.6\.13 ]]; then 50 | replace_webhook "$cluster" 1.0.9+up0.2.10 51 | elif [[ "$rancher_version" =~ 2\.6\.[0-9]$ ]] || [[ "$rancher_version" =~ 2\.6\.1[0-2]$ ]]; then 52 | delete_webhook "$cluster" 53 | elif [[ "$rancher_version" =~ 2\.7\.[0-1]$ ]]; then 54 | delete_webhook "$cluster" 55 | elif [[ "$rancher_version" =~ 2\.7\.2 ]]; then 56 | replace_webhook "$cluster" 2.0.2+up0.3.2 57 | elif [[ "$rancher_version" =~ 2\.7\.3 ]]; then 58 | replace_webhook "$cluster" 2.0.3+up0.3.3 59 | elif [[ "$rancher_version" =~ 2\.7\.4 ]]; then 60 | replace_webhook "$cluster" 2.0.4+up0.3.4 61 | elif [[ "$rancher_version" =~ 2\.[7-9]\..* ]]; then 62 | # This matches anything else above 2.7, including 2.8.x and 2.9.x. 63 | echo "No need to delete rancher-webhook, given Rancher version $rancher_version." 64 | echo "Ensuring CATTLE_RANCHER_WEBHOOK_MIN_VERSION is set to an empty string." 65 | kubectl set env -n cattle-system deployment/cattle-cluster-agent CATTLE_RANCHER_WEBHOOK_MIN_VERSION='' 66 | else 67 | echo "Nothing to do, given Rancher version $rancher_version." 68 | fi 69 | } 70 | 71 | if [ -n "$DEBUG" ] 72 | then 73 | set -x 74 | fi 75 | 76 | if [[ -z "$RANCHER_TOKEN" || -z "$RANCHER_URL" ]] 77 | then 78 | echo "Required environment variables aren't properly set." 79 | usage 80 | exit 1 81 | fi 82 | 83 | kubeconfig=" 84 | apiVersion: v1 85 | kind: Config 86 | clusters: 87 | - name: \"local\" 88 | cluster: 89 | server: \"$RANCHER_URL\" 90 | 91 | users: 92 | - name: \"local\" 93 | user: 94 | token: \"$RANCHER_TOKEN\" 95 | 96 | 97 | contexts: 98 | - name: \"local\" 99 | context: 100 | user: \"local\" 101 | cluster: \"local\" 102 | 103 | current-context: \"local\" 104 | " 105 | 106 | echo "$kubeconfig" >> .temp_kubeconfig.yaml 107 | # helm will complain if these are group/world readable 108 | chmod g-r .temp_kubeconfig.yaml 109 | chmod o-r .temp_kubeconfig.yaml 110 | export KUBECONFIG="$(pwd)/.temp_kubeconfig.yaml" 111 | 112 | if [[ "$1" == "--insecure-skip-tls-verify" ]] 113 | then 114 | kubectl config set clusters.local.insecure-skip-tls-verify true 115 | fi 116 | 117 | rancher_version=$(kubectl get setting server-version -o jsonpath='{.value}') 118 | if [[ -z "$rancher_version" ]]; then 119 | echo 'Failed to look up Rancher version.' 120 | exit 1 121 | fi 122 | 123 | clusters=$(kubectl get clusters.management.cattle.io -o jsonpath="{.items[*].metadata.name}") 124 | for cluster in $clusters 125 | do 126 | if [ "$cluster" == "local" ] 127 | then 128 | echo "Skipping deleting rancher-webhook in the local cluster." 129 | continue 130 | fi 131 | kubectl config set clusters.local.server "$RANCHER_URL/k8s/clusters/$cluster" 132 | adjust_webhook "$cluster" "$rancher_version" 133 | done 134 | 135 | rm .temp_kubeconfig.yaml 136 | -------------------------------------------------------------------------------- /bad-ingress-scanner/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | MAINTAINER Matthew Mattox 3 | 4 | ENV DEBIAN_FRONTEND=noninteractive 5 | 6 | RUN apt-get update && apt-get install -yq --no-install-recommends \ 7 | apt-utils \ 8 | curl \ 9 | && apt-get clean && rm -rf /var/lib/apt/lists/* 10 | 11 | ## Install kubectl 12 | RUN curl -kLO "https://storage.googleapis.com/kubernetes-release/release/$(curl -ks https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl" && \ 13 | chmod u+x kubectl && \ 14 | mv kubectl /usr/local/bin/kubectl 15 | 16 | COPY *.sh /root/ 17 | RUN chmod +x /root/*.sh 18 | CMD /root/run.sh 19 | -------------------------------------------------------------------------------- /bad-ingress-scanner/README.md: -------------------------------------------------------------------------------- 1 | [![Docker Pulls](https://img.shields.io/docker/pulls/cube8021/bad-ingress-scanner.svg)](https://hub.docker.com/r/rancher/bad-ingress-scanner) 2 | [![Build Status](https://drone-publish.rancher.io/api/badges/rancherlabs/support-tools/status.svg)](https://drone-publish.rancher.io/rancherlabs/support-tools) 3 | 4 | # Bad ingress scanner 5 | This tool is designed to scan for misbehaving ingresses. An example being an ingress that was deployed referencing a non-existent SSL cert or an ingress with an empty/missing backend service. 6 | 7 | ## Running report - remotely 8 | ```bash 9 | wget -o ingress-scanner.sh https://raw.githubusercontent.com/rancherlabs/support-tools/master/bad-ingress-scanner/run.sh 10 | chmod +x ./ingress-scanner.sh 11 | ./ingress-scanner.sh 12 | ``` 13 | 14 | ## Running report - in-cluster 15 | ```bash 16 | kubectl -n ingress-nginx delete job ingress-scanner 17 | kubectl apply -f deployment.yaml 18 | kubectl -n ingress-nginx logs -l app=ingress-scanner 19 | ``` 20 | 21 | ## Example output 22 | ```bash 23 | Pod: nginx-ingress-controller-r8kkz 24 | #################################################################### 25 | Found bad endpoints. 26 | default/ingress-75f627ce3d0ccd29dd268e0ab2b37008 27 | default/test-01-example-com 28 | default/test-02-example-com 29 | #################################################################### 30 | Found bad certs. 31 | default/test-01-example-com 32 | default/test-02-example-com 33 | ``` 34 | 35 | ## Removing 36 | ```bash 37 | kubectl delete -f deployment.yaml 38 | ``` 39 | 40 | ## Deploying test ingress rules 41 | Note: These rules are designed to be broken/invalid and are deployed to the default namespace. 42 | ```bash 43 | kubectl apply -f bad-ingress.yaml 44 | ``` 45 | 46 | ## Removing test ingress rules 47 | ```bash 48 | kubectl delete -f bad-ingress.yaml 49 | ``` 50 | -------------------------------------------------------------------------------- /bad-ingress-scanner/bad-ingress.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: networking.k8s.io/v1 3 | kind: Ingress 4 | metadata: 5 | name: test-01 6 | spec: 7 | rules: 8 | - host: test-01.example.com 9 | http: 10 | paths: 11 | - backend: 12 | service: 13 | name: test-01-example-com 14 | port: 15 | number: 80 16 | path: / 17 | pathType: Prefix 18 | --- 19 | apiVersion: networking.k8s.io/v1 20 | kind: Ingress 21 | metadata: 22 | name: test-02 23 | spec: 24 | rules: 25 | - host: test-02.example.com 26 | http: 27 | paths: 28 | - backend: 29 | service: 30 | name: test-02-example-com 31 | port: 32 | number: 80 33 | path: / 34 | pathType: Prefix 35 | tls: 36 | - hosts: 37 | - test-02.example.com 38 | secretName: test-02-example-com 39 | --- 40 | apiVersion: networking.k8s.io/v1 41 | kind: Ingress 42 | metadata: 43 | name: test-02-dup 44 | spec: 45 | rules: 46 | - host: test-02.example.com 47 | http: 48 | paths: 49 | - backend: 50 | service: 51 | name: test-02-example-com 52 | port: 53 | number: 80 54 | path: / 55 | pathType: Prefix 56 | tls: 57 | - hosts: 58 | - test-02.example.com 59 | secretName: test-02-example-com 60 | -------------------------------------------------------------------------------- /bad-ingress-scanner/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: ingress-scanner 5 | namespace: ingress-nginx 6 | --- 7 | kind: ClusterRole 8 | apiVersion: rbac.authorization.k8s.io/v1 9 | metadata: 10 | name: ingress-scanner 11 | namespace: ingress-nginx 12 | rules: 13 | - apiGroups: 14 | - "" 15 | resources: 16 | - pods 17 | - pods/log 18 | verbs: 19 | - get 20 | - list 21 | - apiGroups: 22 | - networking.k8s.io 23 | resources: 24 | - ingresses 25 | verbs: 26 | - list 27 | - watch 28 | - get 29 | --- 30 | apiVersion: rbac.authorization.k8s.io/v1 31 | kind: ClusterRoleBinding 32 | metadata: 33 | name: ingress-scanner 34 | roleRef: 35 | apiGroup: rbac.authorization.k8s.io 36 | kind: ClusterRole 37 | name: ingress-scanner 38 | subjects: 39 | - kind: ServiceAccount 40 | name: ingress-scanner 41 | namespace: ingress-nginx 42 | --- 43 | apiVersion: batch/v1 44 | kind: Job 45 | metadata: 46 | name: ingress-scanner 47 | namespace: ingress-nginx 48 | spec: 49 | backoffLimit: 10 50 | completions: 1 51 | parallelism: 1 52 | template: 53 | metadata: 54 | labels: 55 | app: ingress-scanner 56 | job-name: ingress-scanner 57 | spec: 58 | affinity: 59 | nodeAffinity: 60 | requiredDuringSchedulingIgnoredDuringExecution: 61 | nodeSelectorTerms: 62 | - matchExpressions: 63 | - key: beta.kubernetes.io/os 64 | operator: NotIn 65 | values: 66 | - windows 67 | - key: node-role.kubernetes.io/worker 68 | operator: Exists 69 | containers: 70 | - image: rancher/bad-ingress-scanner:latest 71 | imagePullPolicy: IfNotPresent 72 | name: ingress-scanner 73 | restartPolicy: Never 74 | serviceAccount: ingress-scanner 75 | serviceAccountName: ingress-scanner 76 | tolerations: 77 | - effect: NoExecute 78 | operator: Exists 79 | - effect: NoSchedule 80 | operator: Exists 81 | -------------------------------------------------------------------------------- /bad-ingress-scanner/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "####################################################################" 4 | echo "Scanning ingress controllers..." 5 | for ingressPod in `kubectl -n ingress-nginx get pods -l app=ingress-nginx -o name | awk -F'/' '{print $2}'` 6 | do 7 | echo "Pod: $ingressPod" 8 | kubectl -n ingress-nginx logs "$ingressPod" | grep 'Error obtaining Endpoints for Service' | awk -F '"' '{print $2}' > ./bad-endpoints.list 9 | kubectl -n ingress-nginx logs "$ingressPod" | grep 'Error getting SSL certificate' | awk -F '"' '{print $2}' > ./bad-certs.list 10 | done 11 | echo "####################################################################" 12 | echo "Sorting and removing duplicates from lists..." 13 | cat ./bad-endpoints.list | sort | uniq > ./bad-endpoints.list2 14 | mv ./bad-endpoints.list2 ./bad-endpoints.list 15 | cat ./bad-certs.list | sort | uniq > ./bad-certs.list2 16 | mv ./bad-certs.list2 ./bad-certs.list 17 | 18 | if [[ ! -z `cat ./bad-endpoints.list` ]] 19 | then 20 | echo "####################################################################" 21 | echo "Found bad endpoints." 22 | cat ./bad-endpoints.list 23 | else 24 | echo "####################################################################" 25 | echo "No bad endpoints found." 26 | fi 27 | 28 | if [[ ! -z `cat ./bad-certs.list` ]] 29 | then 30 | echo "####################################################################" 31 | echo "Found bad certs." 32 | cat ./bad-certs.list 33 | else 34 | echo "####################################################################" 35 | echo "No bad endpoints found." 36 | fi 37 | -------------------------------------------------------------------------------- /change-nodetemplate-owner/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | MAINTAINER patrick0057 3 | ENV TERM xterm 4 | RUN apt-get update && apt-get install -y apt-transport-https curl gnupg2 && \ 5 | curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \ 6 | echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" | tee -a /etc/apt/sources.list.d/kubernetes.list && \ 7 | apt-get update && \ 8 | apt-get install -y kubectl jq && \ 9 | rm -rf /var/lib/apt/lists/* && \ 10 | mkdir /root/.kube/ 11 | COPY change-nodetemplate-owner.sh /usr/bin/ 12 | WORKDIR /root 13 | RUN chmod +x /usr/bin/change-nodetemplate-owner.sh 14 | ENTRYPOINT ["/usr/bin/change-nodetemplate-owner.sh"] 15 | CMD [] 16 | -------------------------------------------------------------------------------- /change-nodetemplate-owner/README.md: -------------------------------------------------------------------------------- 1 | ## Update 2 | Note: As of Rancher v2.3.3 this should no longer be necessary. 3 | https://github.com/rancher/rancher/issues/12186 4 | 5 | ## Change node template owner 6 | This script will change your node template owner in Rancher 2.x. You can run this script as a Docker image or directly as a bash script. You'll need the cluster ID and the user ID you want to change the ownership to. 7 | 1. To obtain the cluster ID in the Rancher user interface, Navigate to Global> "Your Cluster Name"> then grab the cluster ID from your address bar. I have listed an example of the URL and a cluster ID derrived from the URL below. 8 | * Example URL: `https:///c/c-48x9z/monitoring` 9 | * Derrived cluster ID from above URL: **c-48x9z** 10 | 2. Now we need the user ID of the user to become the new node template owner, navigate to Global> Users> to find the ID. 11 | 3. To run the script using a docker image, make sure your $KUBECONFIG is set to the full path of your Rancher local cluster kube config then run the following command. 12 | 13 | ```bash 14 | docker run -ti -v $KUBECONFIG:/root/.kube/config patrick0057/change-nodetemplate-owner -c -n 15 | ``` 16 | 4. To run the script directly, just download change-nodetemplate-owner.sh, make sure your $KUBECONFIG or ~/.kube/config is pointing to the correct Rancher local cluster then run the following command: 17 | 18 | ```bash 19 | curl -LO https://github.com/rancherlabs/support-tools/raw/master/change-nodetemplate-owner/change-nodetemplate-owner.sh 20 | ./change-nodetemplate-owner.sh -c -n 21 | ``` 22 | ## Assign a node template to a cluster's node pool. 23 | Assign a node template to a cluster's node pool. This is useful for situations where the original owner of a cluster has been deleted which also deletes their node templates. To use this task successfully it is recommended that you create a new node template in the UI before 24 | using it. Make sure the node template matches the original ones as closely as possible. You will be shown options to choose from and 25 | prompted for confirmation. 26 | 27 | Run script with docker image 28 | 29 | ```bash 30 | docker run -ti -v $KUBECONFIG:/root/.kube/config patrick0057/change-nodetemplate-owner -t changenodetemplate -c 31 | ``` 32 | Run script from bash command line: 33 | 34 | ```bash 35 | curl -LO https://github.com/rancherlabs/support-tools/raw/master/change-nodetemplate-owner/change-nodetemplate-owner.sh 36 | ./change-nodetemplate-owner.sh -t changenodetemplate -c 37 | ``` 38 | -------------------------------------------------------------------------------- /change-nodetemplate-owner/change-nodetemplate-owner.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | newowner='' 3 | clusterid='' 4 | red=$(tput setaf 1) 5 | green=$(tput setaf 2) 6 | reset=$(tput sgr0) 7 | function helpmenu () { 8 | echo "Change the owner of all node templates in a cluster: 9 | ${green}change-nodetemplate-owner.sh -c -n ${reset} 10 | 11 | Assign a nodetemplate to a cluster's nodepool. This is useful 12 | for situations where the original owner of a cluster has been deleted 13 | which also deletes their nodetemplates. To use this task successfully 14 | it is recommended that you create a new nodetemplate in the UI before 15 | using it. Make sure the node template matches the original ones as 16 | closely as possible. You will be shown options to choose from and 17 | prompted for confirmation. 18 | ${green}change-nodetemplate-owner.sh -t changenodetemplate -c ${reset} 19 | " 20 | exit 1 21 | } 22 | while getopts "hc:n:t:" opt; do 23 | case ${opt} in 24 | h) # process option h 25 | helpmenu 26 | ;; 27 | c) # process option c 28 | clusterid=$OPTARG 29 | ;; 30 | n) # process option n 31 | newowner=$OPTARG 32 | ;; 33 | t) # process option t 34 | task=$OPTARG 35 | ;; 36 | \?) 37 | helpmenu 38 | exit 1 39 | ;; 40 | esac 41 | done 42 | #shift $((OPTIND -1)) 43 | if [[ -z "$task" ]] && [ -z "$clusterid" ]; then 44 | helpmenu 45 | exit 1 46 | fi 47 | if ! hash kubectl 2>/dev/null; then 48 | echo "!!!kubectl was not found!!!" 49 | echo "!!!download and install with:" 50 | echo "Linux users:" 51 | echo "curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl" 52 | echo "chmod +x ./kubectl" 53 | echo "mv ./kubectl /bin/kubectl" 54 | echo "!!!" 55 | echo "Mac users:" 56 | echo "brew install kubernetes-cli" 57 | exit 1 58 | fi 59 | if ! hash jq 2>/dev/null; then 60 | echo '!!!jq was not found!!!' 61 | echo "!!!download and install with:" 62 | echo "Linux users:" 63 | echo "curl -L -O https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64" 64 | echo "chmod +x jq-linux64" 65 | echo "mv jq-linux64 /bin/jq" 66 | echo "!!!" 67 | echo "Mac users:" 68 | echo "brew install jq" 69 | echo "brew link jq" 70 | exit 1 71 | fi 72 | if ! hash sed 2>/dev/null; then 73 | echo '!!!sed was not found!!!' 74 | exit 1 75 | fi 76 | if [ ! -f ~/.kube/config ] && [ -z "$KUBECONFIG" ]; then 77 | echo "${red}~/.kube/config does not exist and \$KUBECONFIG is not set!${reset} " 78 | exit 1 79 | fi 80 | function yesno () { 81 | shopt -s nocasematch 82 | response='' 83 | i=0 84 | while [[ ${response} != 'y' ]] && [[ ${response} != 'n' ]] 85 | do 86 | i=$((i+1)) 87 | if [ $i -gt 10 ]; then 88 | echo "Script is destined to loop forever, aborting! Make sure your docker run command has -ti then try again." 89 | exit 1 90 | fi 91 | printf '(y/n): ' 92 | read -n1 response 93 | echo 94 | done 95 | shopt -u nocasematch 96 | } 97 | echo 98 | kubectl get node 99 | echo 100 | 101 | if [ "$task" = '' ]; then 102 | if [[ -z "$clusterid" ]] || [[ -z "$newowner" ]]; 103 | then 104 | helpmenu 105 | exit 1 106 | fi 107 | echo -e "${green}Cluster: $clusterid${reset}" 108 | echo -e "${green}New Owner: $newowner${reset}" 109 | for nodepoolid in $(kubectl -n $clusterid get nodepool --no-headers -o=custom-columns=NAME:.metadata.name); do 110 | nodetemplateid=$(kubectl -n $clusterid get nodepool $nodepoolid -o json | jq -r .spec.nodeTemplateName | cut -d : -f 2) 111 | oldowner=$(kubectl -n $clusterid get nodepool $nodepoolid -o json | jq -r .spec.nodeTemplateName | cut -d : -f 1) 112 | echo -e "${red}creating new nodetemplate under $newowner's namespace${reset}" 113 | kubectl -n $oldowner get nodetemplate $nodetemplateid -o yaml | sed 's/'$oldowner'/'$newowner'/g' | kubectl apply --namespace=$newowner -f - 114 | echo -e "${red}patching $nodepoolid old owner: $oldowner new owner: $newowner${reset}" 115 | kubectl -n $clusterid patch nodepool $nodepoolid -p '{"spec":{"nodeTemplateName": "'$newowner:$nodetemplateid'"}}' --type=merge 116 | done 117 | echo 118 | echo 119 | echo -e "${green}We're all done! If see you kubectl complaining about duplicate nodetemplates, this is safe to ignore.${reset}" 120 | fi 121 | 122 | if [ "$task" = 'changenodetemplate' ]; then 123 | if [ -z "$clusterid" ] 124 | then 125 | helpmenu 126 | exit 1 127 | fi 128 | for nodepoolid in $(kubectl -n $clusterid get nodepool --no-headers -o=custom-columns=NAME:.metadata.name); do 129 | nodetemplateid=$(kubectl -n $clusterid get nodepool $nodepoolid -o json | jq -r .spec.nodeTemplateName | cut -d : -f 2) 130 | hostnameprefix=$(kubectl -n $clusterid get nodepool $nodepoolid -o json | jq -r .spec.hostnamePrefix | cut -d : -f 2) 131 | oldowner=$(kubectl -n $clusterid get nodepool $nodepoolid -o json | jq -r .spec.nodeTemplateName | cut -d : -f 1) 132 | echo "${green}-----------------------------------------------------------------------${reset}" 133 | echo "${green}Name prefix: ${hostnameprefix}${reset}" 134 | echo "${green}Nodepool ID: ${nodepoolid}${reset}" 135 | echo "${green}Owner ID: ${oldowner}${reset}" 136 | echo "${green}Nodetemplate ID: ${nodetemplateid}${reset}" 137 | echo "Would you like to change the node template for nodepool called ${hostnameprefix}?" 138 | 139 | yesno 140 | if [ ${response} == 'y' ] 141 | then 142 | echo "nodetemplate ID's available for selection: " 143 | echo "${green}-${reset}" 144 | IFS=$'\n' 145 | echo "${green}name: ID${reset}" 146 | for nt_namespace_name in $(kubectl get nodetemplate --all-namespaces -o=custom-columns=NAMESPACE:.metadata.namespace,NAME:.metadata.name --no-headers); do 147 | nodetemplateid1=$(echo ${nt_namespace_name} | sed -e's/ */ /g' | cut -d" " -f 2) 148 | oldowner1=$(echo ${nt_namespace_name} | sed -e's/ */ /g' | cut -d" " -f 1) 149 | nodetemplateid_displayname1=$(kubectl -n $oldowner1 get nodetemplate $nodetemplateid1 -o json | jq -r .spec.displayName | cut -d : -f 2) 150 | echo "${green}${nodetemplateid_displayname1}: ${nodetemplateid1}${reset}" 151 | done 152 | IFS=$' ' 153 | echo "${green}-${reset}" 154 | echo "What should the new nodetemplate ID be?" 155 | read new_nodetemplateid 156 | echo "I have ${new_nodetemplateid}, should I proceed?" 157 | yesno 158 | if [ ${response} == 'y' ] 159 | then 160 | echo "${green}OK making changes${reset}" 161 | echo -e "${red}patching $nodepoolid old template ID: $nodetemplateid new template ID: ${new_nodetemplateid}${reset}" 162 | kubectl -n $clusterid patch nodepool $nodepoolid -p '{"spec":{"nodeTemplateName": "'$oldowner:${new_nodetemplateid}'"}}' --type=merge 163 | else 164 | echo "${green}No changes made, moving on.${reset}" 165 | fi 166 | fi 167 | 168 | done 169 | echo "${green}-----------------------------------------------------------------------${reset}" 170 | fi 171 | -------------------------------------------------------------------------------- /charts/index.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | entries: 3 | rancher: 4 | - apiVersion: v1 5 | appVersion: 0.0.1 6 | created: "2021-03-04T11:39:06.0872245-06:00" 7 | description: Rancher Systems Info 8 | digest: b2d217c3c7ab839bb11151cdd1d99ab8920f0241dcda74764e3932969f17fe0c 9 | home: https://rancher.com 10 | icon: https://github.com/rancher/ui/blob/master/public/assets/images/logos/welcome-cow.svg 11 | keywords: 12 | - rancher 13 | - support 14 | maintainers: 15 | - email: charts@rancher.com 16 | name: Rancher Labs 17 | name: rancher 18 | sources: 19 | - https://github.com/rancherlabs/systems-info 20 | urls: 21 | - https://rancherlabs.github.io/systems-info/charts/rancher-0.0.1.tgz 22 | version: 0.0.1 23 | generated: "2021-03-04T11:39:06.0847725-06:00" 24 | -------------------------------------------------------------------------------- /charts/rancher-0.0.1.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rancherlabs/support-tools/2fa26cd1c13cc329f5553f88adfe693ee978848e/charts/rancher-0.0.1.tgz -------------------------------------------------------------------------------- /charts/robots.txt: -------------------------------------------------------------------------------- 1 | "User-Agent: *nDisallow: /" 2 | -------------------------------------------------------------------------------- /charts/systems-info/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /charts/systems-info/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: rancher 3 | description: Rancher Systems Info 4 | version: 0.0.1 5 | appVersion: 0.0.1 6 | home: https://rancher.com 7 | icon: https://github.com/rancher/ui/blob/master/public/assets/images/logos/welcome-cow.svg 8 | keywords: 9 | - rancher 10 | - support 11 | sources: 12 | - https://github.com/rancherlabs/systems-info 13 | maintainers: 14 | - name: Rancher Labs 15 | email: charts@rancher.com 16 | -------------------------------------------------------------------------------- /charts/systems-info/questions.yml: -------------------------------------------------------------------------------- 1 | labels: 2 | io.cattle.role: project # options are cluster/project 3 | categories: 4 | - Support 5 | Name: systems-info 6 | Namespace: systems-info 7 | questions: 8 | - variable: defaultImage 9 | default: true 10 | description: "Use default Docker image" 11 | label: Use Default Image 12 | type: boolean 13 | show_subquestion_if: false 14 | group: "Container Images" 15 | subquestions: 16 | - variable: image.repository 17 | default: "docker.io/rancher/systems-info" 18 | description: "Docker image repository" 19 | type: string 20 | label: Image Repository 21 | - variable: image.tag 22 | default: "v0.0.1" 23 | description: "Docker image tag" 24 | type: string 25 | label: Image Tag 26 | - variable: schedule 27 | required: true 28 | default: '0 0 * * 1' 29 | description: "Backup schedule in crontab format" 30 | type: string 31 | label: "Backup schedule" 32 | - variable: rancher_name 33 | default: "Rancher" 34 | description: "Your human readable for this install" 35 | type: string 36 | label: Rancher Name 37 | required: true 38 | group: "General Settings" 39 | - variable: to_address 40 | default: "" 41 | description: "Email address for sending report to" 42 | type: string 43 | label: To address 44 | required: true 45 | group: "General Settings" 46 | - variable: send_to_support 47 | default: "true" 48 | description: "Would you like this report sent to Rancher Support?" 49 | type: string 50 | label: Send report to Rancher Support 51 | required: false 52 | group: "General Settings" 53 | - variable: smtp_host 54 | default: "" 55 | description: "SMTP server hostname" 56 | type: string 57 | label: SMTP Server 58 | required: true 59 | group: "Mail Server Settings" 60 | - variable: smtp_port 61 | default: "587" 62 | description: "SMTP server port" 63 | type: string 64 | label: SMTP port 65 | required: true 66 | group: "Mail Server Settings" 67 | - variable: smtp_user 68 | default: "" 69 | description: "SMTP username" 70 | type: string 71 | label: SMTP username 72 | required: true 73 | group: "Mail Server Settings" 74 | - variable: smtp_pass 75 | default: "" 76 | description: "SMTP password" 77 | type: password 78 | label: SMTP password 79 | required: true 80 | group: "Mail Server Settings" 81 | - variable: from_address 82 | default: "" 83 | description: "From address" 84 | type: string 85 | label: From address 86 | required: true 87 | group: "Mail Server Settings" 88 | -------------------------------------------------------------------------------- /charts/systems-info/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "systems-information.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "systems-information.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "systems-information.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | 34 | {{/* 35 | Common labels 36 | */}} 37 | {{- define "systems-information.labels" -}} 38 | helm.sh/chart: {{ include "systems-information.chart" . }} 39 | {{ include "systems-information.selectorLabels" . }} 40 | {{- if .Chart.AppVersion }} 41 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 42 | {{- end }} 43 | app.kubernetes.io/managed-by: {{ .Release.Service }} 44 | {{- end -}} 45 | 46 | {{/* 47 | Selector labels 48 | */}} 49 | {{- define "systems-information.selectorLabels" -}} 50 | app.kubernetes.io/name: {{ include "systems-information.name" . }} 51 | app.kubernetes.io/instance: {{ .Release.Name }} 52 | {{- end -}} 53 | 54 | {{/* 55 | Create the name of the service account to use 56 | */}} 57 | {{- define "systems-information.serviceAccountName" -}} 58 | {{- if .Values.serviceAccount.create -}} 59 | {{ default (include "systems-information.fullname" .) .Values.serviceAccount.name }} 60 | {{- else -}} 61 | {{ default "default" .Values.serviceAccount.name }} 62 | {{- end -}} 63 | {{- end -}} 64 | -------------------------------------------------------------------------------- /charts/systems-info/templates/clusterrolebinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: systems-info 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: cluster-admin 9 | subjects: 10 | - kind: ServiceAccount 11 | name: systems-info 12 | namespace: systems-info 13 | -------------------------------------------------------------------------------- /charts/systems-info/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1beta1 2 | kind: CronJob 3 | metadata: 4 | name: systems-info 5 | spec: 6 | schedule: {{ .Values.schedule | quote }} 7 | jobTemplate: 8 | spec: 9 | template: 10 | spec: 11 | containers: 12 | - env: 13 | - name: from_address 14 | value: {{ .Values.from_address | quote }} 15 | - name: rancher_name 16 | value: {{ .Values.rancher_name | quote }} 17 | - name: send_to_support 18 | value: {{ .Values.send_to_support | quote }} 19 | - name: smtp_host 20 | value: {{ .Values.smtp_host | quote }} 21 | - name: smtp_port 22 | value: {{ .Values.smtp_port | quote }} 23 | - name: to_address 24 | value: {{ .Values.to_address | quote }} 25 | - name: smtp_user 26 | valueFrom: 27 | secretKeyRef: 28 | key: smtp_user 29 | name: mail-config 30 | optional: false 31 | - name: smtp_pass 32 | valueFrom: 33 | secretKeyRef: 34 | key: smtp_pass 35 | name: mail-config 36 | optional: false 37 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 38 | imagePullPolicy: {{ .Values.image.pullPolicy }} 39 | name: system-information 40 | dnsPolicy: ClusterFirst 41 | restartPolicy: Never 42 | schedulerName: default-scheduler 43 | serviceAccountName: systems-info 44 | -------------------------------------------------------------------------------- /charts/systems-info/templates/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: systems-info 5 | -------------------------------------------------------------------------------- /charts/systems-info/templates/secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: mail-config 5 | namespace: systems-info 6 | type: "Opaque" 7 | data: 8 | smtp_user: {{ default "" .Values.smtp_user | b64enc | quote }} 9 | smtp_pass: {{ default "" .Values.smtp_pass | b64enc | quote }} 10 | -------------------------------------------------------------------------------- /charts/systems-info/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: systems-info 5 | namespace: systems-info 6 | -------------------------------------------------------------------------------- /charts/systems-info/values.yaml: -------------------------------------------------------------------------------- 1 | replicaCount: 1 2 | 3 | image: 4 | repository: docker.io/rancher/systems-info 5 | tag: v0.0.1 6 | pullPolicy: IfNotPresent 7 | 8 | imagePullSecrets: [] 9 | 10 | Name: systems-info 11 | Namespace: systems-info 12 | 13 | rbac: 14 | create: true 15 | clusterAdminRole: true 16 | 17 | serviceAccount: 18 | create: true 19 | name: systems-info 20 | 21 | resources: 22 | limits: 23 | cpu: 100m 24 | memory: 100Mi 25 | requests: 26 | cpu: 100m 27 | memory: 100Mi 28 | 29 | secretName: "mail-config" 30 | smtp_user: "" 31 | smtp_pass: "" 32 | 33 | rancher_name: "" 34 | schedule: "0 0 * * 1" 35 | smtp_host: "" 36 | smtp_port: "587" 37 | to_address: "" 38 | from_address: "" 39 | send_to_support: "true" 40 | -------------------------------------------------------------------------------- /charts/systems-information/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: systems-info 3 | description: Rancher Systems Info 4 | 5 | # A chart can be either an 'application' or a 'library' chart. 6 | # 7 | # Application charts are a collection of templates that can be packaged into versioned archives 8 | # to be deployed. 9 | # 10 | # Library charts provide useful utilities or functions for the chart developer. They're included as 11 | # a dependency of application charts to inject those utilities and functions into the rendering 12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed. 13 | type: application 14 | 15 | # This is the chart version. This version number should be incremented each time you make changes 16 | # to the chart and its templates, including the app version. 17 | version: 0.0.1 18 | 19 | # This is the version number of the application being deployed. This version number should be 20 | # incremented each time you make changes to the application. 21 | appVersion: 0.0.1 22 | 23 | keywords: 24 | - rancher 25 | - support 26 | home: https://support.rancher.com 27 | sources: 28 | - https://github.com/rancherlabs/systems-info 29 | maintainers: 30 | - name: mattmattox 31 | email: matt.mattox@suse.com 32 | icon: https://rancher.com/img/brand-guidelines/assets/logos/png/color/rancher-logo-stacked-color.png 33 | -------------------------------------------------------------------------------- /charts/systems-information/questions.yml: -------------------------------------------------------------------------------- 1 | labels: 2 | io.cattle.role: project # options are cluster/project 3 | categories: 4 | - Support 5 | Name: systems-info 6 | Namespace: systems-info 7 | questions: 8 | - variable: defaultImage 9 | default: true 10 | description: "Use default Docker image" 11 | label: Use Default Image 12 | type: boolean 13 | show_subquestion_if: false 14 | group: "Container Images" 15 | subquestions: 16 | - variable: image.repository 17 | default: "docker.io/rancher/systems-info" 18 | description: "Docker image repository" 19 | type: string 20 | label: Image Repository 21 | - variable: image.tag 22 | default: "v0.0.1" 23 | description: "Docker image tag" 24 | type: string 25 | label: Image Tag 26 | - variable: schedule 27 | required: true 28 | default: '0 0 * * 1' 29 | description: "Backup schedule in crontab format" 30 | type: string 31 | label: "Backup schedule" 32 | - variable: rancher_name 33 | default: "Rancher" 34 | description: "Your human readable for this install" 35 | type: string 36 | label: Rancher Name 37 | required: true 38 | group: "General Settings" 39 | - variable: to_address 40 | default: "" 41 | description: "Email address for sending report to" 42 | type: string 43 | label: To address 44 | required: true 45 | group: "General Settings" 46 | - variable: send_to_support 47 | default: "true" 48 | description: "Would you like this report sent to Rancher Support?" 49 | type: string 50 | label: Send report to Rancher Support 51 | required: false 52 | group: "General Settings" 53 | - variable: smtp_host 54 | default: "" 55 | description: "SMTP server hostname" 56 | type: string 57 | label: SMTP Server 58 | required: true 59 | group: "Mail Server Settings" 60 | - variable: smtp_port 61 | default: "587" 62 | description: "SMTP server port" 63 | type: string 64 | label: SMTP port 65 | required: true 66 | group: "Mail Server Settings" 67 | - variable: smtp_user 68 | default: "" 69 | description: "SMTP username" 70 | type: string 71 | label: SMTP username 72 | required: true 73 | group: "Mail Server Settings" 74 | - variable: smtp_pass 75 | default: "" 76 | description: "SMTP password" 77 | type: password 78 | label: SMTP password 79 | required: true 80 | group: "Mail Server Settings" 81 | - variable: from_address 82 | default: "" 83 | description: "From address" 84 | type: string 85 | label: From address 86 | required: true 87 | group: "Mail Server Settings" 88 | -------------------------------------------------------------------------------- /charts/systems-information/templates/clusterrolebinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: systems-info 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: cluster-admin 9 | subjects: 10 | - kind: ServiceAccount 11 | name: systems-info 12 | namespace: systems-info 13 | -------------------------------------------------------------------------------- /charts/systems-information/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1beta1 2 | kind: CronJob 3 | metadata: 4 | name: systems-info 5 | spec: 6 | schedule: {{ .Values.schedule | quote }} 7 | jobTemplate: 8 | spec: 9 | template: 10 | spec: 11 | containers: 12 | - env: 13 | - name: from_address 14 | value: {{ .Values.from_address | quote }} 15 | - name: rancher_name 16 | value: {{ .Values.rancher_name | quote }} 17 | - name: send_to_support 18 | value: {{ .Values.send_to_support | quote }} 19 | - name: smtp_host 20 | value: {{ .Values.smtp_host | quote }} 21 | - name: smtp_port 22 | value: {{ .Values.smtp_port | quote }} 23 | - name: to_address 24 | value: {{ .Values.to_address | quote }} 25 | - name: smtp_user 26 | valueFrom: 27 | secretKeyRef: 28 | key: smtp_user 29 | name: mail-config 30 | optional: false 31 | - name: smtp_pass 32 | valueFrom: 33 | secretKeyRef: 34 | key: smtp_pass 35 | name: mail-config 36 | optional: false 37 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 38 | imagePullPolicy: {{ .Values.image.pullPolicy }} 39 | name: system-information 40 | dnsPolicy: ClusterFirst 41 | restartPolicy: Never 42 | schedulerName: default-scheduler 43 | serviceAccountName: systems-info 44 | -------------------------------------------------------------------------------- /charts/systems-information/templates/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: systems-info 5 | -------------------------------------------------------------------------------- /charts/systems-information/templates/secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: mail-config 5 | namespace: systems-info 6 | type: "Opaque" 7 | data: 8 | smtp_user: {{ default "" .Values.smtp_user | b64enc | quote }} 9 | smtp_pass: {{ default "" .Values.smtp_pass | b64enc | quote }} 10 | -------------------------------------------------------------------------------- /charts/systems-information/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: systems-info 5 | namespace: systems-info 6 | -------------------------------------------------------------------------------- /charts/systems-information/values.yaml: -------------------------------------------------------------------------------- 1 | replicaCount: 1 2 | 3 | image: 4 | repository: docker.io/rancher/systems-info 5 | tag: v0.0.1 6 | pullPolicy: IfNotPresent 7 | 8 | imagePullSecrets: [] 9 | 10 | Name: systems-info 11 | Namespace: systems-info 12 | 13 | rbac: 14 | create: true 15 | clusterAdminRole: true 16 | 17 | serviceAccount: 18 | create: true 19 | name: systems-info 20 | 21 | resources: 22 | limits: 23 | cpu: 100m 24 | memory: 100Mi 25 | requests: 26 | cpu: 100m 27 | memory: 100Mi 28 | 29 | secretName: "mail-config" 30 | smtp_user: "" 31 | smtp_pass: "" 32 | 33 | rancher_name: "" 34 | schedule: "0 0 * * 1" 35 | smtp_host: "" 36 | smtp_port: "587" 37 | to_address: "" 38 | from_address: "" 39 | send_to_support: "true" 40 | -------------------------------------------------------------------------------- /cleanup-etcd-part-files/README.md: -------------------------------------------------------------------------------- 1 | # Workaround ETCD Snapshots Part Files Issue 2 | To workaround issue [gh-30662](https://github.com/rancher/rancher/issues/30662) please select one of the following deployment options. 3 | 4 | ## Option A - cleanup file temp files 5 | This script runs on each etcd node in a while true loop every 5 minutes looking for leftover part files. If it finds part files older than 15 minutes, it will delete them. This is to prevent deleting a part file that is currently in-use. 6 | 7 | ### Changes to restore process 8 | None, the restore process is unchanged. 9 | 10 | ### Installation 11 | ``` 12 | kubectl apply -f delete-part-files.yaml 13 | ``` 14 | 15 | ## Option B - alternative s3 snapshots 16 | This script replaces the recurring snapshot functionality in RKE with a Kubernetes job that runs every 12 hours. 17 | 18 | ### Changes to restore process 19 | - You will need to manually take a new snapshot 20 | - Download the snapshot from S3 on all etcd nodes 21 | - Rename the old snapshot to the new snapshot filename 22 | - Restore the S3 snapshot in Rancher UI by selecting the new snapshot name 23 | 24 | ### Installation 25 | - Disable recurring snapshots in Rancher/RKE 26 | - At a minimum, `alt-s3-sync.yaml` must be modified (remember to base64 the values) to reflect the s3 details 27 | ``` 28 | kubectl apply -f alt-s3-sync.yaml 29 | ``` 30 | -------------------------------------------------------------------------------- /cleanup-etcd-part-files/alt-s3-sync.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | labels: 6 | app: alt-etcd-s3-sync 7 | name: etcd-s3 8 | namespace: kube-system 9 | type: Opaque 10 | data: 11 | # all values should be base64 encoded (ie: echo -n '123456' | base64) 12 | S3_ACCESS_KEY: 13 | S3_BUCKET_NAME: 14 | S3_BUCKET_REGION: 15 | S3_ENDPOINT: 16 | S3_FOLDER: 17 | S3_SECRET_KEY: 18 | --- 19 | apiVersion: apps/v1 20 | kind: Deployment 21 | metadata: 22 | labels: 23 | app: rolling-etcd-snapshots 24 | name: rolling-etcd-snapshots 25 | namespace: kube-system 26 | spec: 27 | replicas: 1 28 | selector: 29 | matchLabels: 30 | app: rolling-etcd-snapshots 31 | template: 32 | metadata: 33 | labels: 34 | app: rolling-etcd-snapshots 35 | spec: 36 | affinity: 37 | nodeAffinity: 38 | requiredDuringSchedulingIgnoredDuringExecution: 39 | nodeSelectorTerms: 40 | - matchExpressions: 41 | - key: beta.kubernetes.io/os 42 | operator: NotIn 43 | values: 44 | - windows 45 | - key: node-role.kubernetes.io/etcd 46 | operator: In 47 | values: 48 | - "true" 49 | containers: 50 | - args: 51 | - /opt/rke-tools/rke-etcd-backup 52 | - etcd-backup 53 | - save 54 | - --cacert 55 | - /etc/kubernetes/ssl/kube-ca.pem 56 | - --cert 57 | - /etc/kubernetes/ssl/kube-node.pem 58 | - --key 59 | - /etc/kubernetes/ssl/kube-node-key.pem 60 | - --s3-backup=true 61 | - --creation=12h 62 | - --retention=72h 63 | envFrom: 64 | - secretRef: 65 | name: etcd-s3 66 | optional: false 67 | image: rancher/rke-tools:v0.1.66 68 | imagePullPolicy: IfNotPresent 69 | name: rolling-etcd-snapshots 70 | volumeMounts: 71 | - mountPath: /backup 72 | name: rke-tools 73 | - mountPath: /etc/kubernetes 74 | name: k8s-certs 75 | hostNetwork: true 76 | tolerations: 77 | - effect: NoExecute 78 | key: node-role.kubernetes.io/etcd 79 | operator: Equal 80 | value: "true" 81 | volumes: 82 | - hostPath: 83 | path: /opt/rke/etcd-snapshots 84 | type: "" 85 | name: rke-tools 86 | - hostPath: 87 | path: /etc/kubernetes 88 | type: "" 89 | name: k8s-certs 90 | -------------------------------------------------------------------------------- /cleanup-etcd-part-files/delete-part-files.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: DaemonSet 3 | metadata: 4 | name: cleanup-etcd 5 | namespace: kube-system 6 | labels: 7 | app: cleanup-etcd 8 | spec: 9 | selector: 10 | matchLabels: 11 | app: cleanup-etcd 12 | template: 13 | metadata: 14 | labels: 15 | app: cleanup-etcd 16 | spec: 17 | affinity: 18 | nodeAffinity: 19 | requiredDuringSchedulingIgnoredDuringExecution: 20 | nodeSelectorTerms: 21 | - matchExpressions: 22 | - key: beta.kubernetes.io/os 23 | operator: NotIn 24 | values: 25 | - windows 26 | - key: node-role.kubernetes.io/etcd 27 | operator: In 28 | values: 29 | - "true" 30 | containers: 31 | - args: 32 | - while true; 33 | do 34 | echo "Before cleanup"; 35 | ls -lh /opt/rke/etcd-snapshots/; 36 | find /opt/rke/etcd-snapshots/ -mindepth 1 -type f -name *.* -mmin +15 | grep -v '.zip' | xargs rm -f; 37 | echo "Post cleanup"; 38 | ls -lh /opt/rke/etcd-snapshots/; 39 | echo "Sleeping..."; 40 | sleep 360; 41 | done; 42 | command: 43 | - /bin/sh 44 | - -c 45 | image: busybox 46 | name: cleanup-etcd 47 | volumeMounts: 48 | - mountPath: /opt/rke 49 | name: rke 50 | tolerations: 51 | - effect: NoExecute 52 | key: node-role.kubernetes.io/etcd 53 | operator: Equal 54 | value: "true" 55 | volumes: 56 | - hostPath: 57 | path: /opt/rke 58 | type: "" 59 | name: rke 60 | -------------------------------------------------------------------------------- /cleanup-evicted-pods/README.md: -------------------------------------------------------------------------------- 1 | # Cleanup evicted pods left behind after disk pressure 2 | When a node starts to evict pods under disk pressure, the evicted pods are left behind. All the resources like volumes, IP, containers, etc will be cleaned up and delete. But the pod object will be left behind in "evicted" status. Per upstream this is [intentional](https://github.com/kubernetes/kubernetes/issues/54525#issuecomment-340035375) 3 | 4 | ## Workaround 5 | 6 | ### Manual cleanup 7 | NOTE: This script is designed to work on Linux machines. 8 | ```bash 9 | kubectl get pods --all-namespaces -ojson | jq -r '.items[] | select(.status.reason!=null) | select(.status.reason | contains("Evicted")) | .metadata.name + " " + .metadata.namespace' | xargs -n2 -l bash -c 'kubectl delete pods $0 --namespace=$1' 10 | ``` 11 | 12 | ### Automatic cleanup 13 | This is a cronjob that runs every 30 mins inside the cluster that will find and remove any pods with the status of "Evicted." 14 | 15 | ```bash 16 | kubectl apply -f deploy.yaml 17 | ``` 18 | 19 | NOTE: This YAML uses the image `rancherlabs/swiss-army-knife`. 20 | -------------------------------------------------------------------------------- /cleanup-evicted-pods/deploy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: CronJob 3 | metadata: 4 | name: cleanup-evicted-pods 5 | namespace: kube-system 6 | labels: 7 | app: cleanup-evicted-pods 8 | spec: 9 | schedule: "*/30 * * * *" 10 | concurrencyPolicy: Forbid 11 | jobTemplate: 12 | spec: 13 | template: 14 | spec: 15 | containers: 16 | - name: cleanup-evicted-pods 17 | image: rancherlabs/swiss-army-knife 18 | imagePullPolicy: IfNotPresent 19 | command: ["sh", "-c", "kubectl get pods --all-namespaces --field-selector 'status.phase==Failed' -o json | kubectl delete -f -"] 20 | restartPolicy: OnFailure 21 | serviceAccount: rke-job-deployer 22 | serviceAccountName: rke-job-deployer 23 | -------------------------------------------------------------------------------- /cleanup-evicted-pods/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: rke-job-deployer 5 | namespace: kube-system 6 | -------------------------------------------------------------------------------- /collection/rancher/v1.6/logs-collector/README.md: -------------------------------------------------------------------------------- 1 | # rancher-logs-collector 2 | 3 | The script needs to be downloaded and run directly on the host using the `root` user or using `sudo`. 4 | 5 | ## How to use 6 | 7 | * Download the script and save as: `rancher16_logs_collector.sh` 8 | * Make sure the script is executable: `chmod +x rancher16_logs_collector.sh` 9 | * Run the script: `./rancher16_logs_collector.sh` 10 | -------------------------------------------------------------------------------- /collection/rancher/v1.6/logs-collector/rancher16_logs_collector.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #Check if we're running as root. 3 | if [[ $EUID -ne 0 ]]; then 4 | echo "This script must be run as root" 5 | exit 1 6 | fi 7 | 8 | # Create temp directory 9 | TMPDIR=$(mktemp -d) 10 | 11 | #Set TIMEOUT in seconds for select commands 12 | TIMEOUT=60 13 | 14 | function timeout_start_msg() { 15 | TIMEOUT_CMD=$1 16 | TIMEOUT_EXCEEDED_MSG="$TIMEOUT_CMD command timed out, killing process to prevent hanging." 17 | echo "Executing $TIMEOUT_CMD with a timeout of $TIMEOUT seconds." 18 | } 19 | function timeout_done_msg() { 20 | echo "Execution of $TIMEOUT_CMD has finished." 21 | echo 22 | } 23 | function timeout_cmd() { 24 | WPID=$!; sleep $TIMEOUT && if kill -0 $WPID > /dev/null 2>&1; then echo $TIMEOUT_EXCEEDED_MSG; kill $WPID &> /dev/null; fi & KPID=$!; wait $WPID 25 | } 26 | 27 | # System info 28 | mkdir -p $TMPDIR/systeminfo 29 | hostname > $TMPDIR/systeminfo/hostname 2>&1 30 | hostname -f > $TMPDIR/systeminfo/hostnamefqdn 2>&1 31 | cat /etc/hosts > $TMPDIR/systeminfo/etchosts 2>&1 32 | cat /etc/resolv.conf > $TMPDIR/systeminfo/etcresolvconf 2>&1 33 | date > $TMPDIR/systeminfo/date 2>&1 34 | free -m > $TMPDIR/systeminfo/freem 2>&1 35 | uptime > $TMPDIR/systeminfo/uptime 2>&1 36 | dmesg -T > $TMPDIR/systeminfo/dmesg 2>&1 37 | df -h > $TMPDIR/systeminfo/dfh 2>&1 38 | if df -i >/dev/null 2>&1; then 39 | df -i > $TMPDIR/systeminfo/dfi 2>&1 40 | fi 41 | lsmod > $TMPDIR/systeminfo/lsmod 2>&1 42 | mount > $TMPDIR/systeminfo/mount 2>&1 43 | ps aux > $TMPDIR/systeminfo/psaux 2>&1 44 | 45 | timeout_start_msg "lsof" 46 | lsof -Pn >$TMPDIR/systeminfo/lsof 2>&1 & timeout_cmd 47 | timeout_done_msg 48 | 49 | if $(command -v sysctl >/dev/null 2>&1); then 50 | sysctl -a > $TMPDIR/systeminfo/sysctla 2>/dev/null 51 | fi 52 | # OS: Ubuntu 53 | if $(command -v ufw >/dev/null 2>&1); then 54 | ufw status > $TMPDIR/systeminfo/ubuntu-ufw 2>&1 55 | fi 56 | if $(command -v apparmor_status >/dev/null 2>&1); then 57 | apparmor_status > $TMPDIR/systeminfo/ubuntu-apparmorstatus 2>&1 58 | fi 59 | # OS: RHEL 60 | if [ -f /etc/redhat-release ]; then 61 | systemctl status NetworkManager > $TMPDIR/systeminfo/rhel-statusnetworkmanager 2>&1 62 | systemctl status firewalld > $TMPDIR/systeminfo/rhel-statusfirewalld 2>&1 63 | if $(command -v getenforce >/dev/null 2>&1); then 64 | getenforce > $TMPDIR/systeminfo/rhel-getenforce 2>&1 65 | fi 66 | fi 67 | 68 | # Docker 69 | mkdir -p $TMPDIR/docker 70 | timeout_start_msg "docker info" 71 | docker info >$TMPDIR/docker/dockerinfo 2>&1 & timeout_cmd 72 | timeout_done_msg 73 | 74 | timeout_start_msg "docker ps -a" 75 | docker ps -a >$TMPDIR/docker/dockerpsa 2>&1 76 | timeout_done_msg 77 | 78 | timeout_start_msg "docker stats" 79 | docker stats -a --no-stream >$TMPDIR/docker/dockerstats 2>&1 & timeout_cmd 80 | timeout_done_msg 81 | 82 | if [ -f /etc/docker/daemon.json ]; then 83 | cat /etc/docker/daemon.json > $TMPDIR/docker/etcdockerdaemon.json 84 | fi 85 | 86 | # Networking 87 | mkdir -p $TMPDIR/networking 88 | iptables-save > $TMPDIR/networking/iptablessave 2>&1 89 | cat /proc/net/xfrm_stat > $TMPDIR/networking/procnetxfrmstat 2>&1 90 | if $(command -v ip >/dev/null 2>&1); then 91 | ip addr show > $TMPDIR/networking/ipaddrshow 2>&1 92 | ip route > $TMPDIR/networking/iproute 2>&1 93 | fi 94 | if $(command -v ifconfig >/dev/null 2>&1); then 95 | ifconfig -a > $TMPDIR/networking/ifconfiga 96 | fi 97 | 98 | # System logging 99 | mkdir -p $TMPDIR/systemlogs 100 | cp /var/log/syslog* /var/log/messages* /var/log/kern* /var/log/docker* /var/log/system-docker* /var/log/audit/* $TMPDIR/systemlogs 2>/dev/null 101 | 102 | # Rancher logging 103 | # Discover any server or agent running 104 | mkdir -p $TMPDIR/rancher/containerinspect 105 | mkdir -p $TMPDIR/rancher/containerlogs 106 | RANCHERSERVERS=$(docker ps -a | grep -E "rancher/server:|rancher/server |rancher/enterprise:|rancher/enterprise " | awk '{ print $1 }') 107 | RANCHERAGENTS=$(docker ps -a | grep -E "rancher/agent:|rancher/agent " | awk '{ print $1 }') 108 | 109 | for RANCHERSERVER in $RANCHERSERVERS; do 110 | docker inspect $RANCHERSERVER > $TMPDIR/rancher/containerinspect/server-$RANCHERSERVER 2>&1 111 | docker logs -t $RANCHERSERVER > $TMPDIR/rancher/containerlogs/server-$RANCHERSERVER 2>&1 112 | for LOGFILE in $(docker exec $RANCHERSERVER ls -1 /var/lib/cattle/logs 2>/dev/null); do 113 | mkdir -p $TMPDIR/rancher/cattlelogs/ 114 | docker cp $RANCHERSERVER:/var/lib/cattle/logs/$LOGFILE $TMPDIR/rancher/cattlelogs/$LOGFILE-$RANCHERSERVER 115 | done 116 | done 117 | 118 | for RANCHERAGENT in $RANCHERAGENTS; do 119 | docker inspect $RANCHERAGENT > $TMPDIR/rancher/containerinspect/agent-$RANCHERAGENT 2>&1 120 | docker logs -t $RANCHERAGENT > $TMPDIR/rancher/containerlogs/agent-$RANCHERAGENT 2>&1 121 | done 122 | 123 | # Infastructure/System stack containers 124 | for INFRACONTAINER in $(docker ps -a --filter label=io.rancher.container.system=true --format "{{.Names}}"); do 125 | mkdir -p $TMPDIR/infrastacks/containerlogs 126 | mkdir -p $TMPDIR/infrastacks/containerinspect 127 | docker inspect $INFRACONTAINER > $TMPDIR/infrastacks/containerinspect/$INFRACONTAINER 2>&1 128 | docker logs -t $INFRACONTAINER > $TMPDIR/infrastacks/containerlogs/$INFRACONTAINER 2>&1 129 | done 130 | 131 | # IPsec 132 | IPSECROUTERS=$(docker ps --filter label=io.rancher.stack_service.name=ipsec/ipsec/router --format "{{.Names}}") 133 | for IPSECROUTER in "${IPSECROUTERS[@]}"; do 134 | mkdir -p $TMPDIR/ipsec 135 | docker exec $IPSECROUTER bash -cx "swanctl --list-conns && swanctl --list-sas && ip -s xfrm state && ip -s xfrm policy && cat /proc/net/xfrm_stat && sysctl -a" > $TMPDIR/ipsec/ipsec.info.${IPSECROUTER}.log 2>&1 136 | done 137 | 138 | # Networkmanager 139 | NETWORKMANAGERS=$(docker ps --filter label=io.rancher.stack_service.name=network-services/network-manager --format "{{.Names}}") 140 | for NETWORKMANAGER in "${NETWORKMANAGERS[@]}"; do 141 | mkdir -p $TMPDIR/networkmanager 142 | docker exec $NETWORKMANAGER bash -cx "ip link && ip addr && ip neighbor && ip route && conntrack -L && iptables-save && sysctl -a && cat /etc/resolv.conf && uname -a" > $TMPDIR/networkmanager/nm.network.info.${NETWORKMANAGER}.log 2>&1 143 | done 144 | 145 | # System pods 146 | SYSTEMNAMESPACES=(kube-system) 147 | for SYSTEMNAMESPACE in "${SYSTEMNAMESPACES[@]}"; do 148 | CONTAINERS=$(docker ps -a --filter name=$SYSTEMNAMESPACE --format "{{.Names}}") 149 | for CONTAINER in $CONTAINERS; do 150 | mkdir -p $TMPDIR/k8s/podlogs 151 | mkdir -p $TMPDIR/k8s/podinspect 152 | docker inspect $CONTAINER > $TMPDIR/k8s/podinspect/$CONTAINER 2>&1 153 | docker logs -t $CONTAINER > $TMPDIR/k8s/podlogs/$CONTAINER 2>&1 154 | done 155 | done 156 | 157 | # etcd 158 | ETCDCONTAINERS=$(docker ps --filter label=io.rancher.stack_service.name=kubernetes/etcd --format "{{.Names}}") 159 | for ETCDCONTAINER in $ETCDCONTAINERS; do 160 | mkdir -p $TMPDIR/etcd 161 | docker exec $ETCDCONTAINER etcdctl cluster-health > $TMPDIR/etcd/cluster-health-${ETCDCONTAINER} 2>&1 162 | find $(docker inspect $ETCDCONTAINER --format '{{ range .Mounts }}{{ if eq .Destination "/pdata" }}{{ .Source }}{{ end }}{{ end }}') -type f -exec ls -la {} \; > $TMPDIR/etcd/findetcddata 2>&1 163 | done 164 | 165 | FILENAME="$(hostname)-$(date +'%Y-%m-%d_%H_%M_%S').tar" 166 | tar cf /tmp/$FILENAME -C ${TMPDIR}/ . 167 | 168 | if $(command -v gzip >/dev/null 2>&1); then 169 | gzip /tmp/${FILENAME} 170 | FILENAME="${FILENAME}.gz" 171 | fi 172 | 173 | echo "Created /tmp/${FILENAME}" 174 | echo "You can now remove ${TMPDIR}" 175 | -------------------------------------------------------------------------------- /collection/rancher/v2.x/RBAC-role-collector/README.md: -------------------------------------------------------------------------------- 1 | # Rancher 2.x RBAC role collector 2 | 3 | This project was created to collect RABC roles in a Kubernetes cluster to assist troubleshooting 4 | 5 | ## Usage 6 | 7 | 1. Download the script to a location from where you can run `kubectl` against the intended cluster, and save it as: `role-dump.sh` 8 | `curl -OLs https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/RBAC-role-collector/role-dump.sh` 9 | 2. Set kubectl context to the cluster where you see the issue you are investigating. You will likely want to run this against the Rancher local cluster as well as the downstream cluster where you see the issues 10 | 3. Run the script `bash ./role-dump.sh` 11 | 12 | ### What is collected 13 | 14 | The output will contain: 15 | 16 | - JSON files for each role type (in the following list) containing all the roles in the cluster 17 | - Listing (`rolebindings.list`) of all the rolebindings ordered by type 18 | - A tar.gz file that can be provided to support, an uncompressed directory will remain with all the data gathered for your inspection 19 | 20 | Having this information and a list of the user IDs of any users affected by the issue can help in troubleshooting. 21 | 22 | #### CRDs collected: 23 | 24 | ``` 25 | clusterroletemplatebindings 26 | globalrolebindings 27 | globalroles 28 | projectroletemplatebindings 29 | roletemplates.management.cattle.io 30 | roletemplatebindings 31 | clusterrolebindings 32 | clusterroles 33 | roletemplates.rancher.cattle.io 34 | rolebindings 35 | roles 36 | ``` -------------------------------------------------------------------------------- /collection/rancher/v2.x/RBAC-role-collector/role-dump.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # Troubleshooting Bash settings in case of unexpected failure 4 | # set -o errexit # Set to exit on error. Do not enable this unless running against upstream Rancher cluster 5 | # set -o xtrace # Set to output every line Bash runs as it runs the script 6 | 7 | # Unset variables used in the script to be safe 8 | unset crd cluster wd dir file role i 9 | 10 | _declare_variables () { 11 | # Role types to collect 12 | crd=(\ 13 | clusterroletemplatebindings \ 14 | globalrolebindings \ 15 | globalroles \ 16 | projectroletemplatebindings \ 17 | roletemplates.management.cattle.io \ 18 | roletemplatebindings \ 19 | clusterrolebindings \ 20 | clusterroles \ 21 | roletemplates.rancher.cattle.io \ 22 | rolebindings \ 23 | roles 24 | ) 25 | 26 | # Store filename friendly cluster name 27 | cluster=$(_slugify "$(kubectl config current-context)") # 28 | 29 | # Working directory 30 | wd="$cluster"_role-bindings_$(date +"%Y-%m-%d_%H_%M_%S") 31 | } 32 | 33 | 34 | # Slugify strings (replace any special characters with `-`) 35 | _slugify () { 36 | echo "$1" | 37 | iconv -t ascii//TRANSLIT | 38 | sed -r s/[^a-zA-Z0-9]+/-/g | 39 | sed -r s/^-+\|-+$//g | 40 | tr A-Z a-z 41 | } 42 | 43 | # Generate a list (`rolebindings.list`) of all the role bindings and template bindings in the cluster 44 | _list_rolebindings () { 45 | for i in ${crd[*]} ; do 46 | echo "Listing $i" 47 | printf "\n\n# $i\n" >> "$wd"/rolebindings.list 48 | kubectl get $i -A >> "$wd"/rolebindings.list 49 | done 50 | } 51 | 52 | # Generate a JSON per role type containing all the rolebindings 53 | _get_rolebindings () { 54 | for i in ${crd[*]} ; do 55 | echo "Getting $i JSON" 56 | file=$(_slugify "$i") 57 | kubectl get "$i" -A -o json > "$wd"/"$file".json 58 | done 59 | } 60 | 61 | # Archive and compress the report 62 | _tarball_wd () { 63 | echo "Compressing $wd" 64 | tar -czvf "$wd".tar.gz "$wd" 65 | } 66 | 67 | 68 | # Runs all the things 69 | main () { 70 | _declare_variables 71 | # Create working directory 72 | if [[ ! -e "$wd" ]]; then 73 | mkdir "$wd" 74 | fi 75 | _list_rolebindings >& "$wd"/list.log 76 | _get_rolebindings >& "$wd"/rolebindings.log 77 | _tarball_wd 78 | } 79 | 80 | # ACTUALLY run all the things 81 | main 82 | -------------------------------------------------------------------------------- /collection/rancher/v2.x/logs-collector/README.md: -------------------------------------------------------------------------------- 1 | # Rancher v2.x logs-collector 2 | 3 | This logs collector project was created to collect logs from Linux Kubernetes nodes. It is designed to be used in the following environments for troubleshooting support cases: 4 | - [RKE2 clusters](https://docs.rke2.io/) 5 | - [RKE1 clusters](https://rancher.com/docs/rke/latest/en/) 6 | - [K3s clusters](https://docs.k3s.io/) 7 | - [Custom clusters](https://docs.ranchermanager.rancher.io/pages-for-subheaders/use-existing-nodes) 8 | - [Infrastructure provider clusters](https://ranchermanager.docs.rancher.com/how-to-guides/new-user-guides/launch-kubernetes-with-rancher/use-new-nodes-in-an-infra-provider) 9 | - [Kubeadm clusters](https://kubernetes.io/docs/reference/setup-tools/kubeadm/) 10 | 11 | > Note: This script may not collect all necessary information when run on nodes in a [Hosted Kubernetes Provider](https://ranchermanager.docs.rancher.com/how-to-guides/new-user-guides/kubernetes-clusters-in-rancher-setup/set-up-clusters-from-hosted-kubernetes-providers) cluster. 12 | 13 | ## Usage 14 | 15 | The script needs to be downloaded and run directly on the node, using the `root` user or `sudo`. 16 | 17 | Output will be written to `/tmp` as a tar.gz archive named `-.tar.gz`, the default output directory can be changed with the `-d` flag. 18 | 19 | ### Download and run the script 20 | * Save the script as: `rancher2_logs_collector.sh` 21 | 22 | Using `wget`: 23 | ```bash 24 | wget https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/logs-collector/rancher2_logs_collector.sh 25 | ``` 26 | Using `curl`: 27 | ```bash 28 | curl -OLs https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/logs-collector/rancher2_logs_collector.sh 29 | ``` 30 | 31 | * Run the script: 32 | ```bash 33 | sudo bash rancher2_logs_collector.sh 34 | ``` 35 | 36 | ### Optional: Download and run the script in one command 37 | ```bash 38 | curl -Ls rnch.io/rancher2_logs | sudo bash 39 | ``` 40 | > Note: This command requires `curl` to be installed, and internet access from the node. 41 | 42 | ## Flags 43 | 44 | ``` 45 | Rancher 2.x logs-collector 46 | Usage: rancher2_logs_collector.sh [ -d -s -e -r -p -f ] 47 | 48 | All flags are optional 49 | 50 | -c Custom data-dir for RKE2 (ex: -c /opt/rke2) 51 | -d Output directory for temporary storage and .tar.gz archive (ex: -d /var/tmp) 52 | -s Start day of journald and docker log collection, # of days relative to the current day (ex: -s 7) 53 | -e End day of journald and docker log collection, # of days relative to the current day (ex: -e 5) 54 | -S Start date of journald and docker log collection. (ex: -S 2022-12-05) 55 | -E End date of journald and docker log collection. (ex: -E 2022-12-07) 56 | -r Override k8s distribution if not automatically detected (rke|k3s|rke2|kubeadm) 57 | -p When supplied runs with the default nice/ionice priorities, otherwise use the lowest priorities 58 | -f Force log collection if the minimum space isn't available 59 | -o Obfuscate IP addresses and hostnames 60 | ``` 61 | 62 | ## Scope of collection 63 | 64 | Collection includes the following areas, the logs collector is designed to gather necessary diagnostic information while respecting privacy and security concerns. A detailed list is maintained in [collection-details.md](./collection-details.md). 65 | 66 | - Related OS logs and configuration: 67 | - Network configuration - interfaces, iptables 68 | - Disk configuration - devices, filesystems, utilization 69 | - Performance - resource usage, tuning 70 | - OS release and logs - versions, messages/syslog 71 | - Related Kubernetes object output, kubectl commands, and pod logs 72 | - Related CRD objects 73 | - Output from kubectl for troubleshooting 74 | - Pod logs from related namespaces 75 | 76 | The scope of collection is intentionally limited to avoid sensitive data, use minimal resources and disk space, and focus on the core areas needed for troubleshooting. 77 | 78 | IP addresses and hostnames are collected and can assist with troubleshooting, however these can be obfuscated when adding the `-o` flag for the log collection script. 79 | 80 | Note, if additional verbosity, debug, or audit logging is enabled for the related Kubernetes and OS components, these logs can be included and may contain sensitive output. -------------------------------------------------------------------------------- /collection/rancher/v2.x/logs-collector/collection-details.md: -------------------------------------------------------------------------------- 1 | # Collection Details 2 | 3 | ## Overview 4 | This document provides transparency about the output collected when running the logs collector script. The collection is designed to gather necessary troubleshooting information while respecting privacy and security concerns 5 | 6 | Where possible output from the collection is sanitized, however we recommend you check a log collection and remove or edit any sensitive data 7 | 8 | ### Node-level collection 9 | 10 | Output that is collected only from the node where the logs collector script is run 11 | 12 | #### Operating System 13 | - General OS configuration, for example: the hostname, resources, process list, service list, packages, limits and tunables 14 | - Networking, iptables, netstat, interfaces, CNI configuration 15 | - Journalctl output for related services if available, a list of services is listed in [the `JOURNALD_LOGS` variable](https://github.com/rancherlabs/support-tools/blob/master/collection/rancher/v2.x/logs-collector/rancher2_logs_collector.sh#L12) 16 | - OS logs from /var/logs, a list of log files is listed in [the `VAR_LOG_FILES` variable](https://github.com/rancherlabs/support-tools/blob/master/collection/rancher/v2.x/logs-collector/rancher2_logs_collector.sh#L15) 17 | 18 | #### Kubernetes 19 | - Distribution logs, for example rke2 and k3s agent/server journalctl logs 20 | - Distribution configuration, rke2 and k3s configuration files, static pod manifests 21 | - Container runtime logs and configuration, containerd or docker 22 | 23 | ### Cluster-level collection 24 | 25 | Output that is collected from the cluster 26 | 27 | Note, pod logs from other nodes and additional kubectl output can only be collected when running on a control plane/server node 28 | 29 | #### Kubernetes 30 | - Kubernetes control plane and worker component configuration and logs, for example: kubelet etcd, kube-apiserver 31 | - Kubernetes pod logs from related namespaces, a list of namespaces is listed in [the `SYSTEM_NAMESPACE` variable](https://github.com/rancherlabs/support-tools/blob/master/collection/rancher/v2.x/logs-collector/rancher2_logs_collector.sh#L6) located in the script 32 | - Directory listings, for example: rke2 manifests directory, SSL certificates, etcd snapshots 33 | 34 | #### Kubectl output 35 | - Kubectl list of nodes, pods, services, RBAC roles, persistent volumes, events, ingress and deployments 36 | - Cluster provisioning CRD objects -------------------------------------------------------------------------------- /collection/rancher/v2.x/profile-collector/README.md: -------------------------------------------------------------------------------- 1 | # Rancher v2.x profiles-collector 2 | 3 | This profiles collector project was created to collect: 4 | - [Golang profiles](https://github.com/pkg/profile) for [Rancher Manager](https://github.com/rancher/rancher/), Rancher Cluster Agent, Fleet Controller and Fleet Agent 5 | - Rancher debug or trace logs when collecting Rancher profiles 6 | - Rancher audit logs when available 7 | - Events from the cattle-system namespace 8 | - metrics with kubectl top from pods and nodes 9 | - Rancher metrics exposed on /metrics 10 | 11 | ## Usage 12 | 13 | The script needs to be downloaded and run with a kubeconfig file for the Rancher Management (local) cluster, or a downstream cluster where cattle-cluster-agent pods are running 14 | 15 | ### Download and run the script 16 | - Save the script as: `continuous_profiling.sh` 17 | 18 | Using `wget`: 19 | ```bash 20 | wget https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/profile-collector/continuous_profiling.sh 21 | ``` 22 | Using `curl`: 23 | ```bash 24 | curl -OLs https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/profile-collector/continuous_profiling.sh 25 | ``` 26 | 27 | - Run the script: 28 | ```bash 29 | bash continuous_profiling.sh 30 | ``` 31 | The script will run until it receives a SIGKILL (Ctrl-C) 32 | A tarball will be generated at the same folder where the script is running. Please share that file with Rancher support. 33 | 34 | ## Flags 35 | 36 | ``` 37 | Rancher 2.x profile-collector 38 | Usage: profile-collector.sh [-a rancher -p goroutine,heap ] 39 | 40 | All flags are optional 41 | 42 | -a Application, rancher, cattle-cluster-agent, fleet-controller, fleet-agent 43 | -p Profiles to be collected (comma separated): goroutine,heap,threadcreate,block,mutex,profile 44 | -s Sleep time between loops in seconds 45 | -t Time of CPU profile collections 46 | -l Log level of the Rancher pods: debug or trace 47 | -h This help 48 | ``` 49 | 50 | ## Examples 51 | - The default collection is equivalent of: 52 | ```bash continuous_profiling -a rancher -p goroutine,heap,profile -s 120 -t 30``` 53 | 54 | - Collecting Upstream Rancher profiles every 30 minutes, and collect trace level logs 55 | ```bash continuous_profiling -s 1800 -l trace``` 56 | 57 | - Collecting cattle-cluster-agent heap and profile 58 | ```bash continuous_profiling -a cattle-cluster-agent -p heap,profile ``` 59 | 60 | - Collecting fleet-agent profile profile (cpu) over a minute 61 | ```bash continuous_profiling -a fleet-agent -t 60``` 62 | -------------------------------------------------------------------------------- /collection/rancher/v2.x/rancher-pod-collector/README.md: -------------------------------------------------------------------------------- 1 | # Rancher v2.x rancher-pod-collector 2 | 3 | This project was created to collect output for the Rancher installation in a Rancher Management (local) cluster when troubleshooting support cases 4 | 5 | This script needs to be downloaded and run on one of the following locations: 6 | 7 | - A server or workstation with kubectl access to the Rancher Management (local) cluster 8 | - Directly on one of the cluster nodes using the `root` user or using `sudo` 9 | - As a k8s deployment on the local cluster 10 | 11 | ## Usage 12 | 13 | - Download the script and save as: `rancher-pod-collector.sh` 14 | - Make sure the script is executable: `chmod +x rancher-pod-collector.sh` 15 | - Run the script: `./rancher-pod-collector.sh` 16 | 17 | Output will be written to `/tmp` as a tar.gz archive named `-.tar.gz`, the default output directory can be changed with the `-d` flag. 18 | 19 | ## Flags 20 | 21 | ``` 22 | Rancher Pod Collector 23 | Usage: rancher-pod-collector.sh [ -d -k KUBECONFIG -t -w -f ] 24 | 25 | All flags are optional. 26 | 27 | -d Output directory for temporary storage and .tar.gz archive (ex: -d /var/tmp) 28 | -k Override the kubeconfig (ex: ~/.kube/custom) 29 | -t Enable trace logs 30 | -w Live tailing Rancher logs 31 | -f Force log collection if the minimum space isn't available." 32 | ``` 33 | 34 | ## Important disclaimer 35 | 36 | The flag `-t` will enables trace logging. This can capture sensitive information about your Rancher install, including but not limited to usernames, passwords, encryption keys, etc. 37 | -------------------------------------------------------------------------------- /collection/rancher/v2.x/rancher-pod-collector/rancher-pod-collector.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Minimum space needed to run the script (MB) 4 | SPACE="512" 5 | 6 | setup() { 7 | 8 | TMPDIR=$(mktemp -d $MKTEMP_BASEDIR) 9 | techo "Created ${TMPDIR}" 10 | 11 | } 12 | 13 | disk-space() { 14 | 15 | AVAILABLE=$(df -m ${TMPDIR} | tail -n 1 | awk '{ print $4 }') 16 | if [ "${AVAILABLE}" -lt "${SPACE}" ] 17 | then 18 | techo "${AVAILABLE} MB space free, minimum needed is ${SPACE} MB." 19 | DISK_FULL=1 20 | fi 21 | 22 | } 23 | 24 | verify-access() { 25 | 26 | techo "Verifying cluster access" 27 | if [[ ! -z $OVERRIDE_KUBECONFIG ]]; 28 | then 29 | ## Just use the kubeconfig that was set by the user 30 | KUBECTL_CMD="kubectl --kubeconfig $OVERRIDE_KUBECONFIG" 31 | elif [[ ! -z $KUBECONFIG ]]; 32 | then 33 | KUBECTL_CMD="kubectl" 34 | elif [[ ! -z $KUBERNETES_PORT ]]; 35 | then 36 | ## We are inside the k8s cluster or we're using the local kubeconfig 37 | RANCHER_POD=$(kubectl -n cattle-system get pods -l app=rancher --no-headers -o custom-columns=id:metadata.name | head -n1) 38 | KUBECTL_CMD="kubectl -n cattle-system exec -c rancher ${RANCHER_POD} -- kubectl" 39 | elif $(command -v k3s >/dev/null 2>&1) 40 | then 41 | ## We are on k3s node 42 | KUBECTL_CMD="k3s kubectl" 43 | elif $(command -v docker >/dev/null 2>&1) 44 | then 45 | DOCKER_ID=$(docker ps | grep "k8s_rancher_rancher" | cut -d' ' -f1 | head -1) 46 | KUBECTL_CMD="docker exec ${DOCKER_ID} kubectl" 47 | else 48 | ## Giving up 49 | techo "Could not find a kubeconfig" 50 | fi 51 | if ! ${KUBECTL_CMD} cluster-info >/dev/null 2>&1 52 | then 53 | techo "Can not access cluster" 54 | exit 1 55 | else 56 | techo "Cluster access has been verified" 57 | fi 58 | } 59 | 60 | cluster-info() { 61 | 62 | techo "Collecting cluster info" 63 | mkdir -p $TMPDIR/clusterinfo 64 | ${KUBECTL_CMD} cluster-info > $TMPDIR/clusterinfo/cluster-info 2>&1 65 | ${KUBECTL_CMD} get nodes -o wide > $TMPDIR/clusterinfo/get-node-wide 2>&1 66 | ${KUBECTL_CMD} cluster-info dump -o yaml -n cattle-system --log-file-max-size 200 --output-directory $TMPDIR/clusterinfo/cluster-info-dump 67 | ## Grabbing cattle-system items 68 | mkdir -p $TMPDIR/cattle-system/ 69 | ${KUBECTL_CMD} get endpoints -n cattle-system -o wide > $TMPDIR/cattle-system/get-endpoints 2>&1 70 | ${KUBECTL_CMD} get ingress -n cattle-system -o yaml > $TMPDIR/cattle-system/get-ingress.yaml 2>&1 71 | ${KUBECTL_CMD} get pods -n cattle-system -o wide > $TMPDIR/cattle-system/get-pods 2>&1 72 | ${KUBECTL_CMD} get svc -n cattle-system -o yaml > $TMPDIR/cattle-system/get-svc.yaml 2>&1 73 | ## Grabbing kube-system items 74 | mkdir -p $TMPDIR/kube-system/ 75 | ${KUBECTL_CMD} get configmap -n kube-system cattle-controllers -o yaml > $TMPDIR/kube-system/get-configmap-cattle-controllers.yaml 2>&1 76 | ## Grabbing cluster configuration 77 | mkdir -p $TMPDIR/clusters 78 | ${KUBECTL_CMD} get clusters.management.cattle.io -A > $TMPDIR/clusters/clusters 2>&1 79 | ${KUBECTL_CMD} get clusters.management.cattle.io -A -o yaml > $TMPDIR/clusters/clusters.yaml 2>&1 80 | 81 | } 82 | 83 | enable-debug() { 84 | 85 | techo "Enabling debug for Rancher pods" 86 | for POD in $(${KUBECTL_CMD} get pods -n cattle-system -l app=rancher --no-headers | awk '{print $1}'); 87 | do 88 | if [ ! -z "${TRACE}" ] 89 | then 90 | techo "Pod: $POD `${KUBECTL_CMD} exec -n cattle-system -c rancher $POD -- loglevel --set trace`" 91 | else 92 | techo "Pod: $POD `${KUBECTL_CMD} exec -n cattle-system -c rancher $POD -- loglevel --set debug`" 93 | fi 94 | done 95 | 96 | } 97 | 98 | disable-debug() { 99 | 100 | techo "Disabling debug for Rancher pods" 101 | for POD in $(${KUBECTL_CMD} get pods -n cattle-system -l app=rancher --no-headers | awk '{print $1}'); 102 | do 103 | techo "Pod: $POD `${KUBECTL_CMD} exec -n cattle-system -c rancher $POD -- loglevel --set info`" 104 | done 105 | 106 | } 107 | 108 | watch-logs() { 109 | 110 | techo "Live tailing debug logs from Rancher pods" 111 | techo "Please use Ctrl+C to finish tailing" 112 | mkdir -p $TMPDIR/rancher-logs/ 113 | ${KUBECTL_CMD} -n cattle-system logs -f -l app=rancher -c rancher | tee $TMPDIR/rancher-logs/live-logs 114 | 115 | } 116 | 117 | 118 | pause() { 119 | 120 | read -n1 -rsp $'Press any key once finished logging with debug loglevel, or Ctrl+C to exit and leave debug loglevel enabled... \n' 121 | 122 | } 123 | 124 | archive() { 125 | 126 | FILEDIR=$(dirname $TMPDIR) 127 | FILENAME="$(kubectl config view -o jsonpath='{.current-context}')-$(date +'%Y-%m-%d_%H_%M_%S').tar" 128 | tar --create --file ${FILEDIR}/${FILENAME} --directory ${TMPDIR}/ . 129 | ## gzip separately for Rancher OS 130 | gzip ${FILEDIR}/${FILENAME} 131 | 132 | techo "Created ${FILEDIR}/${FILENAME}.gz" 133 | 134 | } 135 | 136 | cleanup() { 137 | 138 | techo "Removing ${TMPDIR}" 139 | rm -r -f "${TMPDIR}" >/dev/null 2>&1 140 | 141 | } 142 | 143 | help() { 144 | 145 | echo "Rancher Pod Collector 146 | Usage: rancher-pod-collector.sh [ -d -k KUBECONFIG -t -w -f ] 147 | 148 | All flags are optional 149 | 150 | -d Output directory for temporary storage and .tar.gz archive (ex: -d /var/tmp) 151 | -k Override the kubeconfig (ex: ~/.kube/custom) 152 | -t Enable trace logs 153 | -w Live tailing Rancher logs 154 | -f Force log collection if the minimum space isn't available" 155 | 156 | } 157 | 158 | timestamp() { 159 | 160 | date "+%Y-%m-%d %H:%M:%S" 161 | 162 | } 163 | 164 | techo() { 165 | 166 | echo "$(timestamp): $*" 167 | 168 | } 169 | 170 | while getopts ":d:k:ftwh" opt; do 171 | case $opt in 172 | d) 173 | MKTEMP_BASEDIR="${OPTARG}/temp.XXXX" 174 | ;; 175 | k) 176 | OVERRIDE_KUBECONFIG="${OPTARG}" 177 | ;; 178 | f) 179 | FORCE=1 180 | ;; 181 | t) 182 | TRACE=1 183 | ;; 184 | w) 185 | WATCH=1 186 | ;; 187 | h) 188 | help && exit 0 189 | ;; 190 | :) 191 | techo "Option -$OPTARG requires an argument." 192 | exit 1 193 | ;; 194 | *) 195 | help && exit 0 196 | esac 197 | done 198 | 199 | setup 200 | disk-space 201 | if [ -n "${DISK_FULL}" ] 202 | then 203 | if [ -z "${FORCE}" ] 204 | then 205 | techo "Cleaning up and exiting" 206 | cleanup 207 | exit 1 208 | else 209 | techo "-f (force) used, continuing" 210 | fi 211 | fi 212 | 213 | if [ ! -z "${TRACE}" ] 214 | then 215 | techo "WARNING: Trace logging has been set. Please confirm that you understand this may capture sensitive information." 216 | pause 217 | fi 218 | verify-access 219 | enable-debug 220 | if [ ! -z "${WATCH}" ] 221 | then 222 | watch-logs 223 | else 224 | techo "Debug loglevel has been set" 225 | pause 226 | fi 227 | disable-debug 228 | cluster-info 229 | archive 230 | cleanup 231 | -------------------------------------------------------------------------------- /collection/rancher/v2.x/supportability-review/collect.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "${DEBUG}" == "true" ]; then 4 | set -x 5 | fi 6 | 7 | HELP_MENU() { 8 | echo "Supportability Review 9 | Usage: collect.sh [ -h ] 10 | 11 | All flags are optional 12 | 13 | -h Print help menu for Supportability Review 14 | 15 | Environment variables: 16 | 17 | RANCHER_URL: Specify Rancher Server URL (Ex: https://rancher.example.com) 18 | RANCHER_TOKEN: Specify Rancher Token to connect to Rancher Server 19 | SR_IMAGE: Use this variable to point to custom container image of Supportability Review 20 | " 21 | } 22 | 23 | SR_IMAGE=${SR_IMAGE:-"rancher/supportability-review:latest"} 24 | 25 | if [ "${CONTAINER_RUNTIME}" == "" ]; then 26 | if command -v docker &> /dev/null; then 27 | echo "setting CONTAINER_RUNTIME=docker" 28 | CONTAINER_RUNTIME="docker" 29 | elif command -v nerdctl &> /dev/null; then 30 | echo "setting CONTAINER_RUNTIME=nerdctl" 31 | CONTAINER_RUNTIME="nerdctl" 32 | elif command -v podman &> /dev/null; then 33 | echo "setting CONTAINER_RUNTIME=podman" 34 | CONTAINER_RUNTIME="podman" 35 | else 36 | echo "error: couldn't detect CONTAINER_RUNTIME" 37 | exit 1 38 | fi 39 | else 40 | supported_runtime=false 41 | for runtime in docker nerdctl podman; do 42 | if [ "${CONTAINER_RUNTIME}" == ${runtime} ]; then 43 | supported_runtime=true 44 | break 45 | fi 46 | done 47 | if [ "${supported_runtime}" == false ]; then 48 | echo "error: unsupported CONTAINER_RUNTIME. Use docker|nerdctl|podman." 49 | exit 1 50 | fi 51 | fi 52 | 53 | if [[ "$SR_IMAGE" != *":dev" ]]; then 54 | echo "pulling image: ${SR_IMAGE}" 55 | $CONTAINER_RUNTIME pull "${SR_IMAGE}" 56 | fi 57 | 58 | CONTAINER_RUNTIME_ARGS="" 59 | COLLECT_INFO_FROM_RANCHER_SETUP_ARGS="" 60 | 61 | if [ "$ENABLE_PRIVILEGED" = "true" ]; then 62 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS --privileged" 63 | fi 64 | 65 | if [ "${SONOBUOY_TOLERATION_FILE}" != "" ]; then 66 | if [ ! -f "${SONOBUOY_TOLERATION_FILE}" ]; then 67 | echo "error: SONOBUOY_TOLERATION_FILE=${SONOBUOY_TOLERATION_FILE} specified, but cannot access that file" 68 | exit 1 69 | fi 70 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -v ${SONOBUOY_TOLERATION_FILE}:/tmp/sonobuoy_toleration.yml" 71 | COLLECT_INFO_FROM_RANCHER_SETUP_ARGS="$COLLECT_INFO_FROM_RANCHER_SETUP_ARGS --sonobuoy-toleration-file /tmp/sonobuoy_toleration.yml" 72 | fi 73 | 74 | if [ "${KUBECONFIG}" == "" ]; then 75 | if [ "${RANCHER_URL}" == "" ]; then 76 | echo "error: RANCHER_URL is not set" 77 | exit 1 78 | fi 79 | 80 | if [ "${RANCHER_TOKEN}" == "" ]; then 81 | echo "error: RANCHER_TOKEN is not set" 82 | exit 1 83 | fi 84 | 85 | if [ "$1" == "-h" ]; then 86 | HELP_MENU 87 | fi 88 | 89 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e RANCHER_URL="${RANCHER_URL}"" 90 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e RANCHER_TOKEN="${RANCHER_TOKEN}"" 91 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e RANCHER_VERIFY_SSL_CERTS="${RANCHER_VERIFY_SSL_CERTS}"" 92 | else 93 | # TODO: Check if it's absolute path 94 | # TODO: Check if the file exists and it's readable 95 | echo "KUBECONFIG specified: ${KUBECONFIG}" 96 | 97 | if [ ! -f "${KUBECONFIG}" ]; then 98 | echo "error: KUBECONFIG=${KUBECONFIG} specified, but cannot access that file" 99 | exit 1 100 | fi 101 | 102 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -v ${KUBECONFIG}:/tmp/kubeconfig.yml" 103 | COLLECT_INFO_FROM_RANCHER_SETUP_ARGS="$COLLECT_INFO_FROM_RANCHER_SETUP_ARGS --kubeconfig /tmp/kubeconfig.yml" 104 | 105 | if [ -d "$HOME/.aws" ]; then 106 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -v $HOME/.aws:/root/.aws" 107 | fi 108 | if [ -n "$AWS_ACCESS_KEY_ID" ] && [ -n "$AWS_SECRET_ACCESS_KEY" ]; then 109 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e AWS_ACCESS_KEY_ID="${AWS_ACCESS_KEY_ID}" -e AWS_SECRET_ACCESS_KEY="${AWS_SECRET_ACCESS_KEY}"" 110 | if [ -n "$AWS_SESSION_TOKEN" ]; then 111 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e AWS_SESSION_TOKEN="${AWS_SESSION_TOKEN}"" 112 | fi 113 | fi 114 | 115 | fi 116 | 117 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e DB_HOST_NAME="${DB_HOST_NAME}"" 118 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e DB_PORT_NUMBER="${DB_PORT_NUMBER}"" 119 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e DB_KEY="${DB_KEY}"" 120 | 121 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS ${SR_IMAGE}" 122 | 123 | $CONTAINER_RUNTIME run --rm \ 124 | -it \ 125 | --network host \ 126 | -v `pwd`:/data \ 127 | $CONTAINER_RUNTIME_ARGS \ 128 | collect_info_from_rancher_setup.py $COLLECT_INFO_FROM_RANCHER_SETUP_ARGS "$@" 129 | -------------------------------------------------------------------------------- /collection/rancher/v2.x/supportability-review/collection-details.md: -------------------------------------------------------------------------------- 1 | # Rancher Supportability Review Collection Details 2 | 3 | ## Overview 4 | This document provides transparency about the data collected during a Rancher supportability review. The collection is designed to gather necessary diagnostic information while respecting privacy and security concerns. 5 | 6 | ## Cluster-Level Collection 7 | 8 | ### Kubernetes Components 9 | - API server configuration 10 | - Controller manager settings 11 | - Scheduler configuration 12 | - etcd status and metrics 13 | - Kubelet configuration 14 | - Container runtime status 15 | 16 | ### Workload Information 17 | - Pod status and configuration 18 | - Deployment configurations 19 | - StatefulSet configurations 20 | - DaemonSet configurations 21 | - Service configurations 22 | - Ingress configurations 23 | 24 | ### Cluster Resources 25 | - Namespace listing 26 | - Resource quotas 27 | - Limit ranges 28 | - Network policies 29 | - Storage classes and PV/PVC status 30 | 31 | ### Custom Resources 32 | - Rancher-specific CRDs status 33 | - Cluster configuration CRs 34 | - Helm releases 35 | 36 | ## Node-Level Collection 37 | 38 | ### System Information 39 | - OS version and distribution 40 | - Kernel parameters 41 | - System resources (CPU, memory, disk) 42 | - Network configuration 43 | 44 | ### Container Runtime 45 | - Docker/containerd version 46 | - Runtime configuration 47 | - Container logs 48 | - Image list 49 | 50 | ### Kubernetes Components 51 | - Kubelet status 52 | - Proxy configuration 53 | - CNI configuration 54 | - Container runtime logs 55 | 56 | ### System Logs 57 | - Kubernetes component logs 58 | - System service logs related to container runtime 59 | - Kernel logs related to container operations 60 | 61 | ## What is NOT Collected 62 | 63 | ### Excluded Data 64 | - Application data and logs 65 | - Secrets and sensitive configurations 66 | - User data 67 | - Database contents 68 | - Custom application configurations 69 | - SSL private keys 70 | - Authentication tokens 71 | - Password hashes 72 | 73 | ### Storage 74 | - Application persistent volumes content 75 | - User uploaded files 76 | - Backup files 77 | 78 | ### Network 79 | - Raw network traffic 80 | - Packet captures 81 | - Private network configurations 82 | - VPN configurations 83 | 84 | ## Data Handling 85 | 86 | ### Collection Process 87 | 1. Data is collected using Sonobuoy plugins 88 | 2. Information is aggregated at cluster level 89 | 3. Results are bundled into a single archive 90 | 91 | ### Security Measures 92 | - All collection is read-only 93 | - No modifications are made to cluster configuration 94 | - Collection runs with minimal required permissions 95 | - Data transfer is encrypted 96 | - Generated bundles are encoded and compressed 97 | 98 | ## Usage of Collected Data 99 | 100 | The collected information is used for: 101 | - Identifying potential system issues 102 | - Validating configurations 103 | - Ensuring compliance with best practices 104 | - Troubleshooting reported problems 105 | - Providing optimization recommendations 106 | 107 | The data is analyzed by SUSE Rancher Support to: 108 | - Verify system health 109 | - Identify potential improvements 110 | - Ensure security compliance 111 | - Provide targeted recommendations 112 | - Support issue resolution 113 | 114 | ## Questions or Concerns 115 | 116 | If you have questions about data collection or need to exclude certain types of information, please contact SUSE Rancher Support before running the collection tool. We can provide guidance on: 117 | - Customizing collection scope 118 | - Excluding sensitive namespaces 119 | - Modifying collection parameters 120 | - Reviewing collection results -------------------------------------------------------------------------------- /collection/rancher/v2.x/supportability-review/security-policies.md: -------------------------------------------------------------------------------- 1 | # Security Policy Configuration Guide 2 | 3 | ## Overview 4 | This guide provides detailed configuration examples for running the Rancher Supportability Review tool in environments with various security policies. 5 | 6 | ## Kyverno Policies 7 | 8 | ### Required Exclusions 9 | ```yaml 10 | apiVersion: kyverno.io/v1 11 | kind: ClusterPolicy 12 | metadata: 13 | name: privilege-policy 14 | spec: 15 | validationFailureAction: Enforce 16 | background: true 17 | rules: 18 | - name: privilege-escalation 19 | match: 20 | any: 21 | - resources: 22 | kinds: 23 | - Pod 24 | exclude: 25 | any: 26 | - resources: 27 | namespaces: 28 | - sonobuoy 29 | validate: 30 | message: "Privilege escalation is disallowed..." 31 | ``` 32 | 33 | ### Common Kyverno Policies Requiring Modification 34 | - Privilege escalation policies 35 | - Container security policies 36 | - Resource quota policies 37 | - Host path mounting policies 38 | 39 | ## Pod Security Policies 40 | 41 | ### Required Permissions 42 | ```yaml 43 | apiVersion: policy/v1beta1 44 | kind: PodSecurityPolicy 45 | metadata: 46 | name: sonobuoy-psp 47 | spec: 48 | privileged: true 49 | allowPrivilegeEscalation: true 50 | volumes: 51 | - hostPath 52 | - configMap 53 | - emptyDir 54 | hostNetwork: true 55 | hostPID: true 56 | hostIPC: true 57 | runAsUser: 58 | rule: RunAsAny 59 | seLinux: 60 | rule: RunAsAny 61 | supplementalGroups: 62 | rule: RunAsAny 63 | fsGroup: 64 | rule: RunAsAny 65 | ``` 66 | 67 | ## Network Policies 68 | 69 | ### Sonobuoy Aggregator Access 70 | ```yaml 71 | apiVersion: networking.k8s.io/v1 72 | kind: NetworkPolicy 73 | metadata: 74 | name: allow-sonobuoy 75 | namespace: sonobuoy 76 | spec: 77 | podSelector: {} 78 | policyTypes: 79 | - Ingress 80 | - Egress 81 | ingress: 82 | - from: 83 | - namespaceSelector: 84 | matchLabels: 85 | kubernetes.io/metadata.name: sonobuoy 86 | egress: 87 | - to: 88 | - namespaceSelector: {} 89 | ``` 90 | 91 | ## Image Pull Policies 92 | 93 | ### Required Registry Access 94 | ```yaml 95 | apiVersion: operator.openshift.io/v1alpha1 96 | kind: ImageContentSourcePolicy 97 | metadata: 98 | name: sonobuoy-repo 99 | spec: 100 | repositoryDigestMirrors: 101 | - mirrors: 102 | - registry.example.com/supportability-review 103 | source: rancher/supportability-review 104 | - mirrors: 105 | - registry.example.com/sonobuoy 106 | source: rancher/mirrored-sonobuoy-sonobuoy 107 | ``` 108 | 109 | ## OPA Exempting Namespaces 110 | 111 | ### Required Exemption 112 | ```yaml 113 | apiVersion: config.gatekeeper.sh/v1alpha1 114 | kind: Config 115 | metadata: 116 | name: config 117 | namespace: "gatekeeper-system" 118 | spec: 119 | match: 120 | - excludedNamespaces: ["sonobuoy"] 121 | processes: ["*"] 122 | ``` 123 | 124 | 125 | ## Troubleshooting Security Policies 126 | 127 | ### Common Issues and Solutions 128 | 129 | #### 1. Privilege Escalation Blocked 130 | ```yaml 131 | # Error: 132 | validation error: privileged containers are not allowed 133 | 134 | # Solution: 135 | Add namespace exclusion for sonobuoy namespace in your policy 136 | ``` 137 | 138 | #### 2. Host Path Mounting Blocked 139 | ```yaml 140 | # Error: 141 | hostPath volumes are not allowed 142 | 143 | # Solution: 144 | Modify PSP to allow hostPath volume types for sonobuoy namespace 145 | ``` 146 | 147 | #### 3. Network Policy Blocks 148 | ```yaml 149 | # Error: 150 | unable to connect to sonobuoy aggregator 151 | 152 | # Solution: 153 | Ensure NetworkPolicy allows pod-to-pod communication in sonobuoy namespace 154 | ``` 155 | 156 | ## Best Practices 157 | 158 | ### Security Policy Configuration 159 | 1. Use namespace-specific exclusions 160 | 2. Avoid blanket exemptions 161 | 3. Monitor policy audit logs 162 | 4. Regular policy review 163 | 164 | ### Deployment Considerations 165 | 1. Use dedicated service accounts 166 | 2. Implement least-privilege access 167 | 3. Regular security audits 168 | 4. Documentation of exceptions 169 | 170 | ## Support 171 | For additional assistance with security policy configuration, contact SUSE Rancher Support with: 172 | 1. Current policy configurations 173 | 2. Error messages 174 | 3. Cluster configuration details 175 | -------------------------------------------------------------------------------- /collection/rancher/v2.x/systems-information-v2/README.md: -------------------------------------------------------------------------------- 1 | # Rancher 2.x Systems Summary v2 2 | 3 | The script runs as a pod in the Rancher Management (local) cluster and collects information about the clusters managed by Rancher. The script collects the following information: 4 | 5 | - Rancher server version and installation UUID 6 | - Details of all clusters managed by Rancher, including: 7 | - Cluster ID and name 8 | - Kubernetes version 9 | - Provider type 10 | - Creation timestamp 11 | - Nodes associated with each cluster 12 | - For each cluster, detailed information about each node, including: 13 | - Node ID and address 14 | - Role within the cluster 15 | - CPU and RAM capacity 16 | - Operating system and Docker version 17 | - Creation timestamp 18 | - Total count of nodes across all clusters 19 | 20 | ## How to use 21 | 22 | Run the following command to deploy the script as a pod in the Rancher Management (local) cluster: 23 | 24 | ```bash 25 | # Deploy the pod in the cluster 26 | kubectl apply -f https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/systems-information-v2/deploy.yaml 27 | 28 | # Wait for the pod to reach Succeeded status 29 | while [[ $(kubectl get pod rancher-systems-summary-pod -n cattle-system -o 'jsonpath={..status.phase}') != "Succeeded" ]]; do 30 | echo "Waiting for rancher-systems-summary-pod to complete..." 31 | sleep 5 32 | done 33 | 34 | # Follow the logs from the pod 35 | kubectl logs -f pod/rancher-systems-summary-pod -n cattle-system 36 | 37 | # Clean up the pod 38 | kubectl delete pod/rancher-systems-summary-pod -n cattle-system 39 | ``` 40 | 41 | > Note: It might take a few minutes for the pod to collect the information and display it in the logs. The script will exit after displaying the information, you should see `Total node count` at the end of the log output 42 | 43 | Example output: 44 | 45 | ```bash 46 | Rancher Systems Summary Report 47 | ============================== 48 | Run on Mon Aug 12 16:46:44 UTC 2024 49 | 50 | NAME READY STATUS RESTARTS AGE 51 | rancher-747c5647d7-5fmh7 2/2 Running 3 (63m ago) 94m 52 | rancher-747c5647d7-76hjr 2/2 Running 5 (61m ago) 101m 53 | rancher-747c5647d7-sfmlc 2/2 Running 2 (35m ago) 92m 54 | Rancher version: v2.9.0 55 | Rancher id: b82b0b06-6f0b-4052-9f17-3602499f07dc 56 | 57 | Cluster Id Name K8s Version Provider Created Nodes 58 | c-m-mfc8m8z5 a1-ops-prd v1.30.2+rke2r1 imported 2024-01-27T20:16:15Z 59 | c-m-tncnvhrs a1-harvester-prd v1.27.13+rke2r1 rke2 2023-12-11T00:52:36Z 60 | local a1-rancher-prd v1.30.2+rke2r1 rke2 2023-08-13T08:46:40Z 61 | 62 | -------------------------------------------------------------------------------- 63 | Cluster: a1-ops-prd (c-m-mfc8m8z5) 64 | Node Id Address Role CPU RAM OS Docker Version Created 65 | machine-4m5rd 172.28.2.217,a1-ops-prd-medium-7962bbf5-wrc2t 8 16273392Ki 2024-07-10T18:28:25Z 66 | machine-4tvh7 172.28.2.142,a1-ops-prd-mgmt-105e966c-xvlg7 8 16273396Ki 2024-07-09T13:19:54Z 67 | machine-5dnpc 172.28.2.234,a1-ops-prd-large-ba0dc7eb-tpmh8 12 49228384Ki 2024-07-12T06:33:51Z 68 | machine-bpmld 172.28.2.235,a1-ops-prd-large-ba0dc7eb-2xzfv 12 49228376Ki 2024-07-12T06:39:50Z 69 | machine-hnhqb 172.28.2.185,a1-ops-prd-mgmt-105e966c-b68bx 8 16273400Ki 2024-07-08T05:36:20Z 70 | machine-j7ckv 172.28.2.220,a1-ops-prd-medium-7962bbf5-sptzb 8 16273412Ki 2024-07-10T18:34:02Z 71 | machine-lvljm 172.28.2.218,a1-ops-prd-small-8918c748-9hjl7 4 8029568Ki 2024-07-10T18:32:48Z 72 | machine-q8blw 172.28.2.205,a1-ops-prd-small-8918c748-5wz8n 4 8029568Ki 2024-07-10T17:58:51Z 73 | machine-rslml 172.28.2.222,a1-ops-prd-small-8918c748-rs7tf 4 8029564Ki 2024-07-10T21:55:58Z 74 | machine-sv2n2 172.28.2.167,a1-ops-prd-mgmt-105e966c-fbtdz 8 16273400Ki 2024-07-08T13:29:51Z 75 | machine-v5mxt 172.28.2.219,a1-ops-prd-small-8918c748-r9knc 4 8029556Ki 2024-07-10T18:33:35Z 76 | machine-vs9tn 172.28.2.223,a1-ops-prd-medium-7962bbf5-lqfwj 8 16273400Ki 2024-07-10T21:54:43Z 77 | machine-xjwjv 172.28.2.236,a1-ops-prd-large-ba0dc7eb-sbrfm 12 49228388Ki 2024-07-12T06:47:55Z 78 | machine-z674w 172.28.2.221,a1-ops-prd-small-8918c748-tlzvx 4 8029560Ki 2024-07-10T21:06:23Z 79 | Node count: 14 80 | 81 | -------------------------------------------------------------------------------- 82 | Cluster: a1-harvester-prd (c-m-tncnvhrs) 83 | Node Id Address Role CPU RAM OS Docker Version Created 84 | machine-4rbqg 172.28.2.22,a1hrr720p02 24 396150564Ki 2023-12-11T01:32:03Z 85 | machine-f864m 172.28.2.24,a1hrr720p04 24 264029632Ki 2024-02-10T00:54:14Z 86 | machine-p5lqp 172.28.2.21,a1hrr720p01 24 264030104Ki 2023-12-11T00:54:08Z 87 | machine-srwm6 172.28.2.23,a1hrr720p03 24 396150588Ki 2023-12-11T03:12:46Z 88 | machine-wfv9d 172.28.2.25,a1hrr720p05 24 264049860Ki 2024-02-10T01:01:46Z 89 | Node count: 5 90 | 91 | -------------------------------------------------------------------------------- 92 | Cluster: a1-rancher-prd (local) 93 | Node Id Address Role CPU RAM OS Docker Version Created 94 | machine-5xwg6 172.28.4.191,a1ubranvp-02 16 32761048Ki 2024-07-07T09:03:53Z 95 | machine-kplk9 172.28.4.116,a1ubranvp-03 16 32761056Ki 2024-07-07T08:55:21Z 96 | machine-tgqhj 172.28.4.160,a1ubranvp-01 16 32761060Ki 2024-07-07T09:03:53Z 97 | Node count: 3 98 | -------------------------------------------------------------------------------- 99 | Total node count: 22 100 | ``` 101 | -------------------------------------------------------------------------------- /collection/rancher/v2.x/systems-information-v2/deploy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: rancher-systems-summary-pod 5 | namespace: cattle-system 6 | spec: 7 | serviceAccountName: rancher 8 | containers: 9 | - name: rancher-systems-summary 10 | image: rancherlabs/swiss-army-knife 11 | command: ["/bin/bash", "-c"] 12 | args: 13 | - | 14 | echo 'Rancher Systems Summary Report'; 15 | echo '=============================='; 16 | echo "Run on $(date)"; 17 | echo; 18 | kubectl -n cattle-system get pods -l app=rancher; 19 | echo "Rancher version: $(kubectl get settings.management.cattle.io server-version --no-headers -o custom-columns=version:value)"; 20 | echo "Rancher id: $(kubectl get settings.management.cattle.io install-uuid --no-headers -o custom-columns=id:value)"; 21 | echo; 22 | kubectl get clusters.management.cattle.io -o custom-columns=Cluster\ Id:metadata.name,Name:spec.displayName,K8s\ Version:status.version.gitVersion,Provider:status.provider,Created:metadata.creationTimestamp,Nodes:status.appliedSpec.rancherKubernetesEngineConfig.nodes[*].address; 23 | CLUSTER_IDS=$(kubectl get cluster.management.cattle.io --no-headers -o custom-columns=id:metadata.name); 24 | for ID in $CLUSTER_IDS; do 25 | CLUSTER_NAME=$(kubectl get cluster.management.cattle.io ${ID} --no-headers -o custom-columns=name:spec.displayName); 26 | NODE_COUNT=$(kubectl get nodes.management.cattle.io -n ${ID} --no-headers 2>/dev/null | wc -l ); 27 | ((TOTAL_NODE_COUNT += NODE_COUNT)); 28 | echo; 29 | echo '--------------------------------------------------------------------------------'; 30 | echo "Cluster: ${CLUSTER_NAME} (${ID})"; 31 | kubectl get nodes.management.cattle.io -n ${ID} -o custom-columns=Node\ Id:metadata.name,Address:status.internalNodeStatus.addresses[*].address,etcd:spec.etcd,Control\ Plane:spec.controlPlane,Worker:spec.worker,CPU:status.internalNodeStatus.capacity.cpu,RAM:status.internalNodeStatus.capacity.memory,OS:status.internalNodeStatus.nodeInfo.osImage,Container\ Runtime\ Version:status.internalNodeStatus.nodeInfo.containerRuntimeVersion,Created:metadata.creationTimestamp; 32 | echo "Node count: ${NODE_COUNT}"; 33 | done; 34 | echo '--------------------------------------------------------------------------------'; 35 | echo "Total node count: ${TOTAL_NODE_COUNT}"; 36 | restartPolicy: Never 37 | -------------------------------------------------------------------------------- /collection/rancher/v2.x/systems-information/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | MAINTAINER Rancher Support support@rancher.com 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | 5 | RUN apt-get update && apt-get install -yq --no-install-recommends \ 6 | curl \ 7 | msmtp \ 8 | && apt-get clean && rm -rf /var/lib/apt/lists/* 9 | 10 | ##Installing kubectl 11 | RUN curl -k -LO https://storage.googleapis.com/kubernetes-release/release/`curl -k -s https://storage.googleapis.com/kubernetes-release/release/stable.txt`/bin/linux/amd64/kubectl && mv kubectl /bin/kubectl && chmod +x /bin/kubectl 12 | 13 | ADD *.sh /usr/bin/ 14 | RUN chmod +x /usr/bin/*.sh 15 | 16 | WORKDIR /root 17 | CMD /usr/bin/run.sh 18 | -------------------------------------------------------------------------------- /collection/rancher/v2.x/systems-information/README.md: -------------------------------------------------------------------------------- 1 | # Rancher 2.x Systems Summary 2 | 3 | The script needs to be downloaded and run directly on a host running a Rancher server container, either as a single node install or a Rancher Pod as part of a High Availability install. The script needs to be run by a user with access to the Docker socket or using `sudo`. 4 | 5 | ## How to use 6 | 7 | * Download the script and save as: `rancher2_systems_information.sh` 8 | * Make sure the script is executable: `chmod +x rancher2_systems_information.sh` 9 | * Run the script: `./rancher2_systems_information.sh` 10 | -------------------------------------------------------------------------------- /collection/rancher/v2.x/systems-information/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | if [[ -z $smtp_user ]] 5 | then 6 | cat << EOF > /etc/msmtprc 7 | account default 8 | host ${smtp_host} 9 | port ${smtp_port} 10 | from ${from_address} 11 | logfile /var/log/msmtp.log 12 | EOF 13 | 14 | else 15 | cat << EOF > /etc/msmtprc 16 | account default 17 | host ${smtp_host} 18 | port ${smtp_port} 19 | tls on 20 | tls_starttls on 21 | tls_certcheck off 22 | auth on 23 | user ${smtp_user} 24 | password ${smtp_pass} 25 | from ${from_address} 26 | logfile /var/log/msmtp.log 27 | EOF 28 | fi 29 | chmod 600 /etc/msmtprc 30 | 31 | echo "Running Summary Report..." 32 | /usr/bin/systems_summary.sh | tee report.txt 33 | 34 | echo "To: ${to_address}" > email.txt 35 | if [[ "$send_to_support" == "true" ]] 36 | then 37 | echo "CC: support@support.tools" >> email.txt 38 | fi 39 | echo "From: ${from_address}" >> email.txt 40 | echo "Subject: Rancher Systems Summary Report - ${rancher_name}" >> email.txt 41 | cat report.txt >> email.txt 42 | cat email.txt | msmtp -a default ${to_address} 43 | -------------------------------------------------------------------------------- /collection/rancher/v2.x/systems-information/systems_summary.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Rancher Systems Summary Report" 4 | echo "==============================" 5 | echo "Run on `date`" 6 | echo 7 | 8 | if [[ ! -z $KUBERNETES_PORT ]]; 9 | then 10 | RANCHER_POD=$(kubectl -n cattle-system get pods -l app=rancher --no-headers -o custom-columns=id:metadata.name --field-selector status.phase=Running | head -n1) 11 | KUBECTL_CMD="kubectl -n cattle-system exec ${RANCHER_POD} -c rancher -- kubectl" 12 | else 13 | if $(command -v rke2 >/dev/null 2>&1) 14 | then 15 | KUBECTL_CMD="/var/lib/rancher/rke2/bin/kubectl --kubeconfig=/etc/rancher/rke2/rke2.yaml" 16 | elif $(command -v k3s >/dev/null 2>&1) 17 | then 18 | KUBECTL_CMD="k3s kubectl" 19 | else 20 | # Get docker id for rancher single node install 21 | DOCKER_ID=$(docker ps | grep "rancher/rancher:" | cut -d' ' -f1) 22 | if [ -z "${DOCKER_ID}" ] 23 | then 24 | # Get docker id for rancher ha install 25 | DOCKER_ID=$(docker ps | grep "k8s_rancher_rancher" | cut -d' ' -f1 | head -1) 26 | if [ -z "${DOCKER_ID}" ] 27 | then 28 | echo "Could not find Rancher 2 container, exiting..." 29 | exit -1 30 | fi 31 | fi 32 | KUBECTL_CMD="docker exec ${DOCKER_ID} kubectl" 33 | fi 34 | fi 35 | 36 | echo "Rancher version: $(${KUBECTL_CMD} get settings.management.cattle.io server-version --no-headers -o custom-columns=version:value)" 37 | echo "Rancher id: $(${KUBECTL_CMD} get settings.management.cattle.io install-uuid --no-headers -o custom-columns=id:value)" 38 | echo 39 | 40 | ${KUBECTL_CMD} get clusters.management.cattle.io -o custom-columns=Cluster\ Id:metadata.name,Name:spec.displayName,K8s\ Version:status.version.gitVersion,Provider:status.driver,Created:metadata.creationTimestamp,Nodes:status.appliedSpec.rancherKubernetesEngineConfig.nodes[*].address 41 | 42 | CLUSTER_IDS=$(${KUBECTL_CMD} get cluster.management.cattle.io --no-headers -o custom-columns=id:metadata.name) 43 | 44 | for ID in $CLUSTER_IDS 45 | do 46 | CLUSTER_NAME=$(${KUBECTL_CMD} get cluster.management.cattle.io ${ID} --no-headers -o custom-columns=name:spec.displayName) 47 | NODE_COUNT=$(${KUBECTL_CMD} get nodes.management.cattle.io -n ${ID} --no-headers 2>/dev/null | wc -l ) 48 | ((TOTAL_NODE_COUNT += NODE_COUNT)) 49 | echo 50 | echo "--------------------------------------------------------------------------------" 51 | echo "Cluster: ${CLUSTER_NAME} (${ID})" 52 | ${KUBECTL_CMD} get nodes.management.cattle.io -n ${ID} -o custom-columns=Node\ Id:metadata.name,Address:status.internalNodeStatus.addresses[*].address,Role:status.rkeNode.role[*],CPU:status.internalNodeStatus.capacity.cpu,RAM:status.internalNodeStatus.capacity.memory,OS:status.dockerInfo.OperatingSystem,Docker\ Version:status.dockerInfo.ServerVersion,Created:metadata.creationTimestamp 53 | echo "Node count: ${NODE_COUNT}" 54 | done 55 | echo "--------------------------------------------------------------------------------" 56 | echo "Total node count: ${TOTAL_NODE_COUNT}" 57 | -------------------------------------------------------------------------------- /collection/rancher/v2.x/windows-log-collector/README.md: -------------------------------------------------------------------------------- 1 | # Rancher v2.x Windows log-collector 2 | 3 | This logs collector project was created to collect logs from Windows Kubernetes nodes. It is designed to be used with RKE1 Windows clusters for troubleshooting support cases. 4 | 5 | ## Usage 6 | 7 | - Open a new PowerShell window with Administrator Privileges (Find Windows PowerShell in Start Menu, right click, Run As Administrator) 8 | - Run the following commands in your PowerShell window 9 | 10 | ```ps1 11 | Set-ExecutionPolicy Bypass 12 | Start-BitsTransfer https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/windows-log-collector/win-log-collect.ps1 13 | .\win-log-collect.ps1 14 | ``` 15 | 16 | ### Upon successful completion, your log bundle will be on the root of the C: drive (example below) 17 | 18 | ``` 19 | > dir C:\ 20 | d----- 11/14/2018 6:56 AM EFI 21 | d----- 6/2/2020 3:31 PM etc 22 | d----- 6/2/2020 3:31 PM opt 23 | d----- 5/13/2020 6:03 PM PerfLogs 24 | d-r--- 5/13/2020 5:25 PM Program Files 25 | d----- 6/2/2020 3:16 PM Program Files (x86) 26 | d----- 6/2/2020 7:23 PM rancher 27 | d----- 6/2/2020 4:06 PM run 28 | d-r--- 6/1/2020 6:30 PM Users 29 | d----- 6/2/2020 3:31 PM var 30 | d----- 6/1/2020 6:26 PM Windows 31 | -a---- 6/2/2020 5:07 PM 428911 rancher_EC2AMAZ-ENEJ0H8_20200602T1704290242Z.tgz 32 | ``` 33 | 34 | ### Expected output 35 | 36 | > Note: The `Unable to Collect Windows Firewall information` error is expected if it there are no Domain specific firewall rules 37 | 38 | ```ps1 39 | Running Rancher Log Collection 40 | Creating temporary directory 41 | OK 42 | Collecting System information 43 | OK 44 | Collecting PS output 45 | Collecting Disk information 46 | Collecting Volume info 47 | OK 48 | Collecting Windows Firewall info 49 | Collecting Rules for Domain profile 50 | get_firewall_info : Unable to Collect Windows Firewall information 51 | At C:\Users\Administrator\log-collect-beta.ps1:397 char:5 52 | + get_firewall_info 53 | + ~~~~~~~~~~~~~~~~~ 54 | + CategoryInfo : NotSpecified: (:) [Write-Error], WriteErrorException 55 | + FullyQualifiedErrorId : Microsoft.PowerShell.Commands.WriteErrorException,get_firewall_info 56 | 57 | Collecting installed applications list 58 | OK 59 | Collecting Services list 60 | OK 61 | Collecting Docker daemon information 62 | OK 63 | Collecting Kubernetes components config 64 | OK 65 | Collecting Windows Event logs 66 | OK 67 | Collecting Kubernetes Logs 68 | OK 69 | Collecting network Information 70 | OK 71 | Collecting group policy information 72 | Get-GPOReport is not a valid cmdlet 73 | Collecting proxy information 74 | OK 75 | Archiving Rancher log collection script data 76 | OK 77 | Done. Your log bundle is located in C:\rancher_EC2AMAZ-ENEJ0H8_20200602T1704290242Z 78 | Please supply the log bundle(s) to Rancher Support 79 | Cleaning up directory 80 | OK 81 | ``` 82 | -------------------------------------------------------------------------------- /eks-upgrade-using-api/README.md: -------------------------------------------------------------------------------- 1 | # SURE-5880 Support Script 2 | 3 | ## Purpose 4 | 5 | This script is designed to be used to upgrade EKS clusters using the Rancher API. Its been specifically designed for Rancher v2.6.10 and upgrading EKS clusters from 1.22 to 1.23 (whilst a UI issue prevents this). 6 | 7 | ## Requirements 8 | 9 | This script requires the following: 10 | 11 | - jq 12 | - cURL 13 | - Rancher API Endpoint 14 | - Rancher API Token 15 | 16 | ## Demo 17 | 18 | ![demo](demo.gif) 19 | 20 | ## Usage 21 | 22 | 1. Create an API key in Rancher.The key can be scoped per cluster or with no scope. Its easier to have no scope as you can use the same API key for all cluster upgrades. 23 | 2. Note down the **Bearer Token** API key 24 | 3. Note down the API Endpoint 25 | 2. Open a terminal 26 | 3. Export environment variables for the key and endpoint 27 | 28 | ```bash 29 | export RANCHER_TOKEN="" 30 | export RANCHER_API="" 31 | ``` 32 | 33 | 4. Get a list of your EKS clusters using this command 34 | 35 | ```bash 36 | # For v2 37 | ./eks-support.sh list -t $RANCHER_TOKEN --endpoint $RANCHER_API 38 | # For v1 39 | ./eks-support.sh list -t $RANCHER_TOKEN --endpoint $RANCHER_API --kev1 40 | ``` 41 | 42 | > The output will list all the found EKS clusters with their name, id, current version and state. 43 | 44 | ### Upgrading EKS Clusters 45 | 46 | 1. For each EKS cluster you want to upgrade run the following command: 47 | 48 | ```bash 49 | # For v2 50 | ./eks-support.sh upgrade -t $RANCHER_TOKEN --endpoint $RANCHER_API --from 1.22 --to 1.23 --name 51 | # For v1 52 | ./eks-support.sh upgrade -t $RANCHER_TOKEN --endpoint $RANCHER_API --from 1.22 --to 1.23 --name --aws-secret-key "" --kev1 53 | ``` 54 | 55 | > Replace the values of --from, --to and --name with your values. 56 | 57 | 2. The cluster will start to upgrade. You can check the status of a specific cluster using this command: 58 | 59 | ```bash 60 | ./eks-support.sh status -t $RANCHER_TOKEN --endpoint $RANCHER_API --name richtest1 61 | ``` 62 | 63 | ### Unsetting Node Groups as managed fields for imported EKS Clusters (only for KEv2) 64 | 65 | ```bash 66 | # For v2 67 | ./eks-support.sh unset_nodegroups -t $RANCHER_TOKEN --endpoint $RANCHER_API --name 68 | ``` 69 | 70 | -------------------------------------------------------------------------------- /eks-upgrade-using-api/common.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This containers common functions for shell scripts. Its 4 | # meant to be source included into another script. 5 | 6 | ## HELPER FUNCS 7 | 8 | # Send a green message to stdout, followed by a new line 9 | say() { 10 | [ -t 1 ] && [ -n "$TERM" ] && 11 | echo "$(tput setaf 2)[$MY_NAME]$(tput sgr0) $*" || 12 | echo "[$MY_NAME] $*" 13 | } 14 | 15 | # Send a green message to stdout, without a trailing new line 16 | say_noln() { 17 | [ -t 1 ] && [ -n "$TERM" ] && 18 | echo -n "$(tput setaf 2)[$MY_NAME]$(tput sgr0) $*" || 19 | echo "[$MY_NAME] $*" 20 | } 21 | 22 | # Send a red message to stdout, followed by a new line 23 | say_err() { 24 | [ -t 2 ] && [ -n "$TERM" ] && 25 | echo -e "$(tput setaf 1)[$MY_NAME] $*$(tput sgr0)" 1>&2 || 26 | echo -e "[$MY_NAME] $*" 1>&2 27 | } 28 | 29 | # Send a yellow message to stdout, followed by a new line 30 | say_warn() { 31 | [ -t 1 ] && [ -n "$TERM" ] && 32 | echo "$(tput setaf 3)[$MY_NAME] $*$(tput sgr0)" || 33 | echo "[$MY_NAME] $*" 34 | } 35 | 36 | # Send a yellow message to stdout, without a trailing new line 37 | say_warn_noln() { 38 | [ -t 1 ] && [ -n "$TERM" ] && 39 | echo -n "$(tput setaf 3)[$MY_NAME] $*$(tput sgr0)" || 40 | echo "[$MY_NAME] $*" 41 | } 42 | 43 | # Exit with an error message and (optional) code 44 | # Usage: die [-c ] 45 | die() { 46 | code=1 47 | [[ "$1" = "-c" ]] && { 48 | code="$2" 49 | shift 2 50 | } 51 | say_err "$@" 52 | exit "$code" 53 | } 54 | 55 | # Exit with an error message if the last exit code is not 0 56 | ok_or_die() { 57 | code=$? 58 | [[ $code -eq 0 ]] || die -c $code "$@" 59 | } 60 | 61 | ## MAIN 62 | main() { 63 | if [ $# = 0 ]; then 64 | die "No command provided. Please use \`$0 help\` for help." 65 | fi 66 | 67 | # Parse main command line args. 68 | while [ $# -gt 0 ]; do 69 | case "$1" in 70 | -h | --help) 71 | cmd_help 72 | exit 1 73 | ;; 74 | -*) 75 | die "Unknown arg: $1. Please use \`$0 help\` for help." 76 | ;; 77 | *) 78 | break 79 | ;; 80 | esac 81 | shift 82 | done 83 | 84 | # $1 is now a command name. Check if it is a valid command and, if so, 85 | # run it. 86 | # 87 | declare -f "cmd_$1" >/dev/null 88 | ok_or_die "Unknown command: $1. Please use \`$0 help\` for help." 89 | 90 | cmd=cmd_$1 91 | shift 92 | 93 | # $@ is now a list of command-specific args 94 | # 95 | $cmd "$@" 96 | } -------------------------------------------------------------------------------- /eks-upgrade-using-api/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rancherlabs/support-tools/2fa26cd1c13cc329f5553f88adfe693ee978848e/eks-upgrade-using-api/demo.gif -------------------------------------------------------------------------------- /eks-upgrade-using-kubectl/README.md: -------------------------------------------------------------------------------- 1 | # SURE-5880 Support Script 2 | 3 | ## Purpose 4 | 5 | This script is designed to be used to upgrade EKS clusters using kubectl. Its been specifically designed for Rancher v2.6.10 and upgrading EKS clusters from 1.22 to 1.23 (whilst a UI issue prevents this). 6 | 7 | ## Requirements 8 | 9 | This script requires the following: 10 | 11 | - jq 12 | - kubectl 13 | 14 | ## Usage 15 | 16 | 1. Open a terminal 17 | 2. Export environment variables for the path to the kubeconfig for your Rancher cluster 18 | 19 | ```bash 20 | export RANCHER_KUBE="" 21 | ``` 22 | 23 | ### Upgrading EKS Clusters 24 | 25 | 1. Get a list of your EKS clusters using this command 26 | 27 | ```bash 28 | # For v2 29 | ./eks-support.sh list -k $RANCHER_KUBE 30 | # For v1 31 | ./eks-support.sh list -k $RANCHER_KUBE --kev1 32 | ``` 33 | 34 | 2. For each EKS cluster you want to upgrade run the following command: 35 | 36 | ```bash 37 | # For v2 38 | ./eks-support.sh upgrade -k $RANCHER_KUBE --from 1.22 --to 1.23 --nname 39 | # For v1 40 | ./eks-support.sh upgrade -k $RANCHER_KUBE --from 1.22 --to 1.23 --name --kev1 41 | ``` 42 | 43 | > Replace the values of --from, --to and --name with your values. 44 | 45 | ### Unsetting Node Groups as managed fields for imported EKS Clusters (only for KEv2) 46 | 47 | ```bash 48 | # For v2 49 | ./eks-support.sh unset_nodegroups -k $RANCHER_KUBE --name 50 | ``` 51 | -------------------------------------------------------------------------------- /eks-upgrade-using-kubectl/common.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This containers common functions for shell scripts. Its 4 | # meant to be source included into another script. 5 | 6 | ## HELPER FUNCS 7 | 8 | # Send a green message to stdout, followed by a new line 9 | say() { 10 | [ -t 1 ] && [ -n "$TERM" ] && 11 | echo "$(tput setaf 2)[$MY_NAME]$(tput sgr0) $*" || 12 | echo "[$MY_NAME] $*" 13 | } 14 | 15 | # Send a green message to stdout, without a trailing new line 16 | say_noln() { 17 | [ -t 1 ] && [ -n "$TERM" ] && 18 | echo -n "$(tput setaf 2)[$MY_NAME]$(tput sgr0) $*" || 19 | echo "[$MY_NAME] $*" 20 | } 21 | 22 | # Send a red message to stdout, followed by a new line 23 | say_err() { 24 | [ -t 2 ] && [ -n "$TERM" ] && 25 | echo -e "$(tput setaf 1)[$MY_NAME] $*$(tput sgr0)" 1>&2 || 26 | echo -e "[$MY_NAME] $*" 1>&2 27 | } 28 | 29 | # Send a yellow message to stdout, followed by a new line 30 | say_warn() { 31 | [ -t 1 ] && [ -n "$TERM" ] && 32 | echo "$(tput setaf 3)[$MY_NAME] $*$(tput sgr0)" || 33 | echo "[$MY_NAME] $*" 34 | } 35 | 36 | # Send a yellow message to stdout, without a trailing new line 37 | say_warn_noln() { 38 | [ -t 1 ] && [ -n "$TERM" ] && 39 | echo -n "$(tput setaf 3)[$MY_NAME] $*$(tput sgr0)" || 40 | echo "[$MY_NAME] $*" 41 | } 42 | 43 | # Exit with an error message and (optional) code 44 | # Usage: die [-c ] 45 | die() { 46 | code=1 47 | [[ "$1" = "-c" ]] && { 48 | code="$2" 49 | shift 2 50 | } 51 | say_err "$@" 52 | exit "$code" 53 | } 54 | 55 | # Exit with an error message if the last exit code is not 0 56 | ok_or_die() { 57 | code=$? 58 | [[ $code -eq 0 ]] || die -c $code "$@" 59 | } 60 | 61 | ## MAIN 62 | main() { 63 | if [ $# = 0 ]; then 64 | die "No command provided. Please use \`$0 help\` for help." 65 | fi 66 | 67 | # Parse main command line args. 68 | while [ $# -gt 0 ]; do 69 | case "$1" in 70 | -h | --help) 71 | cmd_help 72 | exit 1 73 | ;; 74 | -*) 75 | die "Unknown arg: $1. Please use \`$0 help\` for help." 76 | ;; 77 | *) 78 | break 79 | ;; 80 | esac 81 | shift 82 | done 83 | 84 | # $1 is now a command name. Check if it is a valid command and, if so, 85 | # run it. 86 | # 87 | declare -f "cmd_$1" >/dev/null 88 | ok_or_die "Unknown command: $1. Please use \`$0 help\` for help." 89 | 90 | cmd=cmd_$1 91 | shift 92 | 93 | # $@ is now a list of command-specific args 94 | # 95 | $cmd "$@" 96 | } -------------------------------------------------------------------------------- /extended-rancher-2-cleanup/README.md: -------------------------------------------------------------------------------- 1 | ## Extended Rancher 2 Cleanup 2 | 3 | This script is designed to clean a node provisioned with the RKE1 distribution using Rancher or the RKE CLI. 4 | 5 | The node will be cleaned of all state to ensure it is consistent to reuse in a cluster or other use case. 6 | 7 | For [RKE2](https://docs.rke2.io/install/uninstall) and [K3s](https://rancher.com/docs/k3s/latest/en/installation/uninstall/) nodes, use the uninstall.sh script created during installation 8 | 9 | > **Warning** this script will delete all containers, volumes, network interfaces, and directories that relate to Rancher and Kubernetes. It will also flush all iptables rules and optionally delete container images. 10 | 11 | > It is important to perform pre-checks, and backup the node as needed before proceeding with any steps below. 12 | 13 | ### Running the script 14 | 15 | #### Download the script 16 | ```bash 17 | curl -LO https://github.com/rancherlabs/support-tools/raw/master/extended-rancher-2-cleanup/extended-cleanup-rancher2.sh 18 | ``` 19 | #### Run the script as root, or prefix with sudo 20 | ```bash 21 | bash extended-cleanup-rancher2.sh 22 | ``` 23 | 24 | ### Usage 25 | 26 | ```bash 27 | # bash extended-cleanup-rancher2.sh -h 28 | Rancher 2.x extended cleanup 29 | Usage: bash extended-cleanup-rancher2.sh [ -f -i -s ] 30 | 31 | All flags are optional 32 | 33 | -f | --skip-iptables Skip flush of iptables rules 34 | -i | --delete-images Cleanup all container images 35 | -s | --delete-snapshots Cleanup all etcd snapshots 36 | -h This help menu 37 | 38 | !! Warning, this script flushes iptables rules, removes containers, and all data specific to Kubernetes and Rancher 39 | !! Docker will be restarted when flushing iptables rules 40 | !! Backup data as needed before running this script 41 | !! Use at your own risk 42 | ``` 43 | -------------------------------------------------------------------------------- /extended-rancher-2-cleanup/extended-cleanup-rancher2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Cleanup for nodes provisioned using the RKE1 distribution 4 | # Note, for RKE2 and K3s use the uninstall script deployed on the node during install. 5 | 6 | # Directories to cleanup 7 | CLEANUP_DIRS=(/etc/ceph /etc/cni /etc/kubernetes /opt/cni /run/secrets/kubernetes.io /run/calico /run/flannel /var/lib/calico /var/lib/weave /var/lib/etcd /var/lib/cni /var/lib/kubelet /var/lib/rancher/rke/log /var/log/containers /var/log/pods /var/run/calico) 8 | 9 | # Interfaces to cleanup 10 | CLEANUP_INTERFACES=(flannel.1 cni0 tunl0 weave datapath vxlan-6784) 11 | 12 | run() { 13 | 14 | CONTAINERS=$(docker ps -qa) 15 | if [[ -n ${CONTAINERS} ]] 16 | then 17 | cleanup-containers 18 | else 19 | techo "No containers exist, skipping container cleanup..." 20 | fi 21 | cleanup-dirs 22 | cleanup-interfaces 23 | VOLUMES=$(docker volume ls -q) 24 | if [[ -n ${VOLUMES} ]] 25 | then 26 | cleanup-volumes 27 | else 28 | techo "No volumes exist, skipping container volume cleanup..." 29 | fi 30 | if [[ ${DELETE_IMAGES} -eq 1 ]] 31 | then 32 | IMAGES=$(docker images -q) 33 | if [[ -n ${IMAGES} ]] 34 | then 35 | cleanup-images 36 | else 37 | techo "No images exist, skipping container image cleanup..." 38 | fi 39 | fi 40 | if [[ -z ${SKIP_FLUSH_IPTABLES} ]] 41 | then 42 | flush-iptables 43 | else 44 | techo "Skipping flush of iptables rules..." 45 | fi 46 | techo "Done!" 47 | 48 | } 49 | 50 | cleanup-containers() { 51 | 52 | techo "Removing containers..." 53 | docker rm -f $(docker ps -qa) 54 | 55 | } 56 | 57 | cleanup-dirs() { 58 | 59 | techo "Unmounting filesystems..." 60 | for mount in $(mount | grep '/var/lib/kubelet' | awk '{ print $3 }') 61 | do 62 | umount -f $mount 63 | done 64 | 65 | if [ -n "${DELETE_SNAPSHOTS}" ] 66 | then 67 | techo "Removing etcd snapshots..." 68 | rm -rf /opt/rke 69 | fi 70 | 71 | techo "Removing directories..." 72 | for DIR in "${CLEANUP_DIRS[@]}" 73 | do 74 | techo "Removing $DIR" 75 | rm -rf $DIR 76 | done 77 | 78 | } 79 | 80 | cleanup-images() { 81 | 82 | techo "Removing images..." 83 | docker rmi -f $(docker images -q) 84 | 85 | } 86 | 87 | cleanup-interfaces() { 88 | 89 | techo "Removing interfaces..." 90 | for INTERFACE in "${CLEANUP_INTERFACES[@]}" 91 | do 92 | if $(ip link show ${INTERFACE} > /dev/null 2>&1) 93 | then 94 | techo "Removing $INTERFACE" 95 | ip link delete $INTERFACE 96 | fi 97 | done 98 | 99 | } 100 | 101 | cleanup-volumes() { 102 | 103 | techo "Removing volumes..." 104 | docker volume rm $(docker volume ls -q) 105 | 106 | } 107 | 108 | flush-iptables() { 109 | 110 | techo "Flushing iptables..." 111 | iptables -F -t nat 112 | iptables -X -t nat 113 | iptables -F -t mangle 114 | iptables -X -t mangle 115 | iptables -F 116 | iptables -X 117 | techo "Restarting Docker..." 118 | if systemctl list-units --full -all | grep -q docker.service 119 | then 120 | systemctl restart docker 121 | else 122 | /etc/init.d/docker restart 123 | fi 124 | 125 | } 126 | 127 | help() { 128 | 129 | echo "Rancher 2.x extended cleanup 130 | Usage: bash extended-cleanup-rancher2.sh [ -f -i -s ] 131 | 132 | All flags are optional 133 | 134 | -f | --skip-iptables Skip flush of iptables rules 135 | -i | --delete-images Cleanup all container images 136 | -s | --delete-snapshots Cleanup all etcd snapshots 137 | -h This help menu 138 | 139 | !! Warning, this script flushes iptables rules, removes containers, and all data specific to Kubernetes and Rancher 140 | !! Docker will be restarted when flushing iptables rules 141 | !! Backup data as needed before running this script 142 | !! Use at your own risk" 143 | 144 | } 145 | 146 | timestamp() { 147 | 148 | date "+%Y-%m-%d %H:%M:%S" 149 | 150 | } 151 | 152 | techo() { 153 | 154 | echo "$(timestamp): $*" 155 | 156 | } 157 | 158 | # Check if we're running as root. 159 | if [[ $EUID -ne 0 ]] 160 | then 161 | techo "This script must be run as root" 162 | exit 1 163 | fi 164 | 165 | while test $# -gt 0 166 | do 167 | case ${1} in 168 | -f|--skip-iptables) 169 | shift 170 | SKIP_FLUSH_IPTABLES=1 171 | ;; 172 | -i|--delete-images) 173 | shift 174 | DELETE_IMAGES=1 175 | ;; 176 | -s|--delete-snapshots) 177 | shift 178 | DELETE_SNAPSHOTS=1 179 | ;; 180 | h) 181 | help && exit 0 182 | ;; 183 | *) 184 | help && exit 0 185 | esac 186 | done 187 | 188 | # Run the cleanup 189 | run -------------------------------------------------------------------------------- /files/curl-format.txt: -------------------------------------------------------------------------------- 1 | http_code: %{http_code}\n 2 | http_connect: %{http_connect}\n 3 | time_total: %{time_total}\n 4 | time_namelookup: %{time_namelookup}\n 5 | time_connect: %{time_connect}\n 6 | time_appconnect: %{time_appconnect}\n 7 | time_pretransfer: %{time_pretransfer}\n 8 | time_redirect: %{time_redirect}\n 9 | time_starttransfer: %{time_starttransfer}\n 10 | size_download: %{size_download}\n 11 | size_upload: %{size_upload}\n 12 | size_header: %{size_header}\n 13 | size_request: %{size_request}\n 14 | speed_download: %{speed_download}\n 15 | speed_upload: %{speed_upload}\n 16 | content_type: %{content_type}\n 17 | num_connects: %{num_connects}\n 18 | num_redirects :%{num_redirects}\n 19 | -------------------------------------------------------------------------------- /fleet-delete-cluster-registration/README.md: -------------------------------------------------------------------------------- 1 | # Fleet | Registration Resource Cleanup 2 | 3 | This is a cleanup script to work around a known Fleet bug whereby patching a downstream cluster, for instance when re-deploying a Fleet agent in such a cluster, causes new resources to be created without obsolete resources being deleted. Ultimately, this clutters the upstream cluster. 4 | 5 | This script retrieves all cluster registration resources, orders them by cluster then by creation timestamp, and deletes all but the youngest cluster registration for each cluster. This causes obsolete cluster registrations and their child resources to be deleted. -------------------------------------------------------------------------------- /fleet-delete-cluster-registration/delete_old_resources.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | namespace=${1-fleet-default} 4 | chunk_size=${2-100} 5 | 6 | if [ "$chunk_size" -le 1 ]; then 7 | chunk_size=1 8 | fi 9 | 10 | # We output the cluster name first, then the creation timestamp, then the 11 | # resource name for ordering to work by cluster, then by ascending creation 12 | # timestamp, which is in "YYYY-MM-DDTHH:mm:SSZ" format. 13 | jsonPath='{range .items[*]}{@.status.clusterName}{"_"}{@.metadata.creationTimestamp}{"_"}{@.metadata.name}{"\n"}{end}' 14 | cluster_regs=$(kubectl get clusterregistration -o=jsonpath="$jsonPath" -n "$namespace" | sort) 15 | 16 | read -ra regs -d '' <<< "${cluster_regs}" 17 | 18 | # delete_chunk deletes cluster registrations, extracting their names from $regs 19 | # This function operates on set of indexes between first_idx (first argument) 20 | # and last_chunk_idx (second argument), both included. 21 | delete_chunk() { 22 | first_idx=$1 23 | last_idx=$2 24 | 25 | for (( i = first_idx; i < last_idx; i++ )); do 26 | IFS=_ read -r cluster_name creation_timestamp name <<< "${regs[i]}" 27 | IFS=_ read -r next_cluster_name next_creation_timestamp next_name <<< "${regs[i+1]}" 28 | 29 | if [[ "$next_cluster_name" = "$cluster_name" ]]; then 30 | # The most recent cluster registration is still ahead of us: deletion is safe. 31 | echo -n "Cluster: $cluster_name" 32 | echo -e "\t$(kubectl delete --ignore-not-found=true clusterregistration "$name" -n "$namespace")" 33 | fi 34 | done 35 | } 36 | 37 | declare -a pids 38 | 39 | # The only resource we do not want to delete for each cluster is the last 40 | # element, most recently created. 41 | last_idx=$(( ${#regs[@]} - 1 )) 42 | if [ $chunk_size -ge $last_idx ]; then 43 | chunk_size=$last_idx 44 | fi 45 | 46 | # Start an async deletion process for each chunk. 47 | for (( i = 0; i < last_idx; i+= chunk_size )); do 48 | last_chunk_idx=$(( i + chunk_size - 1 )) 49 | if [ $last_chunk_idx -ge $last_idx ]; then 50 | last_chunk_idx="$last_idx" 51 | fi 52 | 53 | delete_chunk $i $last_chunk_idx & 54 | pids[${i}]=$! 55 | done 56 | 57 | # wait for deletion to complete on all chunks. 58 | for pid in ${pids[@]}; do 59 | wait $pid 60 | done 61 | -------------------------------------------------------------------------------- /fleet-secrets-bro-patch/README.md: -------------------------------------------------------------------------------- 1 | # Fleet | GitRepo Secret Backup Restore Patch 2 | 3 | This is a patching script to ensure all secrets used by Fleet `GitRepos` are backed up by the Rancher Backups tool. 4 | 5 | From Rancher v2.8.?? (TBD) and v2.9.0 all `Secrets` created via the Fleet UI in Rancher will be included in Rancher Backups. 6 | 7 | Any GitRepo `Secrets` created before this, or outside of the Fleet UI in Rancher, will not be included in Rancher Backups. 8 | 9 | By running this patching script on your Rancher cluster, it will identify all secrets used by GitRepos and label them as managed by Fleet. This labeling ensures they are backed up by Rancher Backups. 10 | 11 | ## Running the script 12 | To run this script you simply need a valid KUBECONFIG to connect to your Rancher cluster. Then execute the shell script: 13 | > ./patch_gitrepo_secrets.sh 14 | 15 | When run you should see output similar to: 16 | 17 | ```bash 18 | # ./patch_gitrepo_secrets.sh 19 | Patching unique secret combinations: 20 | Patching secret: fleet-default:auth-helm-creds 21 | secret/auth-helm-creds patched 22 | Patching secret: fleet-local:auth-gitlab-creds 23 | secret/auth-gitlab-creds patched (no change) 24 | ``` 25 | 26 | Note: If the secret already has the necessary label it will look like the `secret/auth-gitlab-creds` line above. 27 | 28 | ### Dry-run 29 | Optionally you can run the script with dry-run flag `-D`, it will produce output like: 30 | ```bash 31 | # ./patch_gitrepo_secrets.sh -D 32 | Patching unique secret combinations: 33 | Would patch secret: fleet-default/auth-6w5gn 34 | Would patch secret: fleet-default/auth-lfkdr 35 | Would patch secret: fleet-local/auth-gitlab-creds 36 | ``` -------------------------------------------------------------------------------- /fleet-secrets-bro-patch/patch_gitrepo_secrets.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DRYRUN=0 4 | 5 | while getopts "D" opt; do 6 | case $opt in 7 | D) DRYRUN=1;; 8 | \?) echo "Invalid option: -$OPTARG"; exit 1;; 9 | esac 10 | done 11 | 12 | output=$(kubectl get gitrepo -A -o custom-columns=NAMESPACE:.metadata.namespace,CLIENT:.spec.clientSecretName,HELM:.spec.helmSecretName,HELMPATHS:.spec.helmSecretNameForPaths --no-headers) 13 | 14 | secret_combinations=() 15 | while read -r row; do 16 | # Extract the namespace and potential secret names from each row 17 | namespace=$(echo "$row" | awk '{print $1}') 18 | read -r -a secrets <<< "$(echo "$row" | awk '{print $2, $3, $4}')" 19 | # Create a list of secret combinations for this namespace 20 | for secret in "${secrets[@]}"; do 21 | if [ "$secret" != "" ]; then 22 | secret_combinations+=("$namespace:$secret") 23 | fi 24 | done 25 | done <<< "$(echo "$output" | awk '{print $0}')" 26 | 27 | # Sort and uniq the list of secret combinations 28 | sorted_secret_combinations=($(printf "%s\n" "${secret_combinations[@]}" | sort -u)) 29 | 30 | echo "Patching unique secret combinations:" 31 | for combination in "${sorted_secret_combinations[@]}"; do 32 | # Set the delimiter 33 | IFS=':' 34 | # Read the input string into two variables 35 | read -r namespace name <<< "$combination" 36 | if [ $DRYRUN -eq 1 ]; then 37 | echo "[DRY-RUN] Would patch secret: $namespace/$name" 38 | else 39 | echo "Patching secret: $combination" 40 | kubectl patch secret -n "$namespace" "$name" -p '{"metadata": {"labels": {"fleet.cattle.io/managed": "true"}}}' 41 | fi 42 | done -------------------------------------------------------------------------------- /how-to-retrieve-kubeconfig-from-custom-cluster/README.md: -------------------------------------------------------------------------------- 1 | # How to retrieve a kubeconfig from an RKE1 cluster 2 | 3 | During a Rancher outage or other disaster event you may lose access to a downstream cluster via Rancher and be unable to manage your applications. This process creates a kubeconfig to bypass Rancher, it connects directly to the local kube-apiserver on a control plane node. 4 | 5 | **Note**: The [Authorised Cluster Endpoint (ACE)](https://ranchermanager.docs.rancher.com/how-to-guides/new-user-guides/manage-clusters/access-clusters/use-kubectl-and-kubeconfig#authenticating-directly-with-a-downstream-cluster) is a default option enabled on clusters provisioned by Rancher, this contains a second context which connects directly to the downstream kube-apiserver and also bypasses Rancher. 6 | 7 | ### Pre-requisites 8 | 9 | - Rancher v2.2.x or newer 10 | - RKE v0.2.x or newer 11 | - SSH access to one of the controlplane nodes 12 | - Access to the Docker CLI or root/sudo 13 | 14 | ## Retrieve a kubeconfig - using jq 15 | 16 | This option requires `kubectl` and `jq` to be installed on the server. 17 | 18 | **Note**: kubectl can be copied from the kubelet container 19 | 20 | ```bash 21 | docker cp kubelet:/usr/local/bin/kubectl /usr/local/bin/ 22 | ``` 23 | 24 | - Get kubeconfig (Rancher 2.7.14+/Rancher 2.8.5+, RKE 1.4.19+/RKE 1.5.10+) 25 | 26 | ```bash 27 | kubectl --kubeconfig $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl/kubecfg-kube-node.yaml get secrets -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | base64 -d | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_" > kubeconfig_admin.yaml 28 | ``` 29 | 30 | - Get kubeconfig (Earlier versions of Rancher and RKE) 31 | 32 | ```bash 33 | kubectl --kubeconfig $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl/kubecfg-kube-node.yaml get configmap -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_" > kubeconfig_admin.yaml 34 | ``` 35 | 36 | - Run `kubectl get nodes` 37 | ```bash 38 | kubectl --kubeconfig kubeconfig_admin.yaml get nodes 39 | ``` 40 | 41 | ## Retrieve a kubeconfig - without jq 42 | 43 | This option does not require `kubectl` or `jq` on the server because this uses the `rancher/rancher-agent` image to retrieve the kubeconfig. 44 | 45 | - Get kubeconfig (Rancher 2.7.14+/Rancher 2.8.5+, RKE 1.4.19+/RKE 1.5.10+) 46 | ```bash 47 | docker run --rm --net=host -v $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl:/etc/kubernetes/ssl:ro --entrypoint bash $(docker inspect $(docker images -q --filter=label=org.opencontainers.image.source=https://github.com/rancher/hyperkube) --format='{{index .RepoTags 0}}' | tail -1) -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get secret -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | base64 -d | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_"' > kubeconfig_admin.yaml 48 | ``` 49 | 50 | - Get kubeconfig (Earlier versions of Rancher and RKE) 51 | 52 | ```bash 53 | docker run --rm --net=host -v $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl:/etc/kubernetes/ssl:ro --entrypoint bash $(docker inspect $(docker images -q --filter=label=org.opencontainers.image.source=https://github.com/rancher/hyperkube.git) --format='{{index .RepoTags 0}}' | tail -1) -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get configmap -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_"' > kubeconfig_admin.yaml 54 | ``` 55 | 56 | - Run `kubectl get nodes` 57 | ```bash 58 | docker run --rm --net=host -v $PWD/kubeconfig_admin.yaml:/root/.kube/config:z --entrypoint bash $(docker inspect $(docker images -q --filter=label=org.opencontainers.image.source=https://github.com/rancher/hyperkube) --format='{{index .RepoTags 0}}' | tail -1) -c 'kubectl get nodes' 59 | ``` 60 | 61 | ## Script 62 | Run `https://raw.githubusercontent.com/rancherlabs/support-tools/master/how-to-retrieve-kubeconfig-from-custom-cluster/rke-node-kubeconfig.sh` and follow the instructions given. 63 | -------------------------------------------------------------------------------- /how-to-retrieve-kubeconfig-from-custom-cluster/rke-node-kubeconfig.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PRIVATE_REGISTRY="$1/" 4 | 5 | # Check if controlplane node (kube-apiserver) 6 | CONTROLPLANE=$(docker ps -q --filter=name=kube-apiserver) 7 | 8 | # Get agent image from Docker images 9 | RANCHER_IMAGE=$(docker inspect $(docker images -q --filter=label=io.cattle.agent=true) --format='{{index .RepoTags 0}}' | tail -1) 10 | 11 | if [ -z $RANCHER_IMAGE ]; then 12 | RANCHER_IMAGE="${PRIVATE_REGISTRY}rancher/rancher-agent:v2.6.11" 13 | fi 14 | 15 | if [ -d /opt/rke/etc/kubernetes/ssl ]; then 16 | K8S_SSLDIR=/opt/rke/etc/kubernetes/ssl 17 | else 18 | K8S_SSLDIR=/etc/kubernetes/ssl 19 | fi 20 | 21 | # Determine object type for full-cluster-state (depends on Rancher/RKE version), can be either a configmap (older versions) or a secret (newer versions) 22 | FULL_CLUSTER_STATE_TYPE=$(docker run --rm --net=host -v $K8S_SSLDIR:/etc/kubernetes/ssl:ro --entrypoint bash $RANCHER_IMAGE -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get configmap -n kube-system full-cluster-state -o jsonpath='{.kind}' 2>/dev/null || kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get secret -n kube-system full-cluster-state -o jsonpath='{.kind}' 2>/dev/null') 23 | 24 | # Generate kubeconfig depending on object type for full-cluster-state 25 | if [ "$FULL_CLUSTER_STATE_TYPE" = "Secret" ]; then 26 | docker run --rm --net=host -v $K8S_SSLDIR:/etc/kubernetes/ssl:ro --entrypoint bash $RANCHER_IMAGE -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get secret -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | base64 -d | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_"' > kubeconfig_admin.yaml 27 | elif [ "$FULL_CLUSTER_STATE_TYPE" = "ConfigMap" ]; then 28 | docker run --rm --net=host -v $K8S_SSLDIR:/etc/kubernetes/ssl:ro --entrypoint bash $RANCHER_IMAGE -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get configmap -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_"' > kubeconfig_admin.yaml 29 | else 30 | echo "Invalid type for object \"full-cluster-state\" (should be a Secret or a ConfigMap). Exiting..." 31 | exit 1 32 | fi 33 | 34 | if [ -s kubeconfig_admin.yaml ]; then 35 | if [ -z $CONTROLPLANE ]; then 36 | echo "This is supposed to be run on a node with the 'controlplane' role as it will try to connect to https://127.0.0.1:6443" 37 | echo "You can manually change the 'server:' parameter inside 'kubeconfig_admin.yaml' to point to a node with the 'controlplane' role" 38 | fi 39 | echo "Kubeconfig is stored at: kubeconfig_admin.yaml 40 | 41 | You can use on of the following commands to use it: 42 | 43 | docker run --rm --net=host -v $PWD/kubeconfig_admin.yaml:/root/.kube/config --entrypoint bash $RANCHER_IMAGE -c 'kubectl get nodes' 44 | 45 | kubectl --kubeconfig kubeconfig_admin.yaml get nodes 46 | 47 | Note: if kubectl is not available on the node, the binary can be copied from the kubelet container: 48 | docker cp kubelet:/usr/local/bin/kubectl /usr/local/bin/" 49 | else 50 | echo "Failed to retrieve kubeconfig" 51 | fi -------------------------------------------------------------------------------- /instant-fio-master/README.md: -------------------------------------------------------------------------------- 1 | # instant-fio-master.sh 2 | Simple script to install fio from source. It does the following: 3 | 4 | - Installs dependencies automatically on RedHat and Debian based operating systems. 5 | - clones fio master branch 6 | - compiles fio from source then performs a make install 7 | - updates ~/.bash_profile to add /usr/local/bin/ to your PATH if it isn't already there 8 | 9 | Usage: 10 | ``` 11 | curl -LO https://raw.githubusercontent.com/rancherlabs/support-tools/master/instant-fio-master/instant-fio-master.sh 12 | bash instant-fio-master.sh 13 | 14 | mkdir test-data 15 | fio --rw=write --ioengine=sync --fdatasync=1 --directory=test-data --size=100m --bs=2300 --name=mytest 16 | ``` 17 | -------------------------------------------------------------------------------- /instant-fio-master/instant-fio-master.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | red=$(tput setaf 1) 3 | green=$(tput setaf 2) 4 | reset=$(tput sgr0) 5 | START_TIME=$(date +%Y-%m-%d--%H%M%S) 6 | SCRIPT_NAME="instant-fio-master.sh" 7 | function helpmenu() { 8 | echo "Usage: ${SCRIPT_NAME} 9 | " 10 | exit 1 11 | } 12 | while getopts "h" opt; do 13 | case ${opt} in 14 | h) # process option h 15 | helpmenu 16 | ;; 17 | \?) 18 | helpmenu 19 | exit 1 20 | ;; 21 | esac 22 | done 23 | if [[ $EUID -ne 0 ]]; then 24 | echo "This script must be run as root" 25 | exit 1 26 | fi 27 | #set os and install dependencies 28 | if [[ -f /etc/lsb-release ]]; then 29 | OS=ubuntu 30 | echo You are using Ubuntu 31 | apt install -y gcc zlib1g-dev make git 32 | fi 33 | if [[ -f /etc/redhat-release ]]; then 34 | OS=redhat 35 | echo You are using Red Hat 36 | yum -y install zlib-devel gcc make git 37 | fi 38 | 39 | if ! hash fio 2>/dev/null; then 40 | git clone git://git.kernel.dk/fio.git 41 | cd fio 42 | ./configure 43 | make 44 | make install 45 | if [[ '/usr/local/bin' != *"$PATH"* ]]; then 46 | export PATH=/usr/local/bin:$PATH 47 | echo "Run the following command or logout and log back in again so that your .bash_profile can add it for you." 48 | echo 'export PATH=/usr/local/bin:$PATH' 49 | echo 'export PATH=/usr/local/bin:$PATH' >>~/.bash_profile 50 | fi 51 | else 52 | echo "fio is already installed." 53 | fi 54 | -------------------------------------------------------------------------------- /kubecert/README.md: -------------------------------------------------------------------------------- 1 | Credit for the logic that retrieves the KUBECONFIG goes to [Superseb](https://github.com/superseb/) 2 | 3 | # kubecert 4 | This script will set you up with kubectl and retrieve your local kube config for a cluster provisioned by RKE or Rancher. Option -y will auto install kubectl and jq for linux. 5 | Usage: 6 | ```bash 7 | curl -LO https://github.com/rancherlabs/support-tools/raw/master/kubecert/kubecert.sh 8 | bash ./kubecert.sh -y 9 | ``` 10 | -------------------------------------------------------------------------------- /kubecert/base64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rancherlabs/support-tools/2fa26cd1c13cc329f5553f88adfe693ee978848e/kubecert/base64 -------------------------------------------------------------------------------- /kubecert/jq-linux64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rancherlabs/support-tools/2fa26cd1c13cc329f5553f88adfe693ee978848e/kubecert/jq-linux64 -------------------------------------------------------------------------------- /longhorn/PlaceHolder.md: -------------------------------------------------------------------------------- 1 | ## Longhorn Scripts -------------------------------------------------------------------------------- /migrate-vsphere-clusters/README.md: -------------------------------------------------------------------------------- 1 | # migrate-vsphere-clusters 2 | 3 | This script is to be used as part of the patch process for Rancher's 4 | [CVE-2022-45157]. 5 | 6 | 7 | [CVE-2022-45157]: https://github.com/rancher/rancher/security/advisories/GHSA-xj7w-r753-vj8v 8 | 9 | -------------------------------------------------------------------------------- /rancher-cleanup/README.md: -------------------------------------------------------------------------------- 1 | # Rancher resource cleanup script 2 | 3 | This has been moved to [rancher/rancher-cleanup](https://github.com/rancher/rancher-cleanup). 4 | -------------------------------------------------------------------------------- /rancher-crd/enumerate-resources/README.md: -------------------------------------------------------------------------------- 1 | # rancher-resource-enumerator 2 | 3 | Rancher Custom Resource enumeration script 4 | 5 | ## Dependencies 6 | 7 | * `kubectl` 8 | * Linux, MacOS or WSL2 9 | 10 | ## How to use 11 | 12 | * Download the script and save as: `rancher-resource-enumerator.sh` 13 | * Make sure the script is executable: `chmod u+x ./rancher-resource-enumerator.sh` 14 | * Run the script: `./rancher-resource-enumerator.sh -a` 15 | 16 | The script will output all Rancher custom resource data in the `/tmp/enum-cattle-resources-` directory by default. The `totals` file will give the total count for all resources. 17 | 18 | ## Flags 19 | 20 | ``` 21 | Rancher Resource Enumerator 22 | Usage: ./rancher-resource-enumerator.sh [ -d -n | -c | -a ] 23 | -h Display this help message. 24 | -a Enumerate all custom resources. 25 | -n Only enumerate resources in the specified namespace(s). 26 | -c Only enumerate cluster (non-namespaced) resources. 27 | -d Path to output directory (default: /tmp/enum-cattle-resources-). 28 | ``` 29 | -------------------------------------------------------------------------------- /rancher-crd/enumerate-resources/rancher-resource-enumerator.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | datenow="$(date "+%F-%H-%M-%S")" 4 | outputdir="/tmp/enum-cattle-resources-$datenow" 5 | export outputdir 6 | 7 | usage() { 8 | printf "Rancher Resource Enumerator \n" 9 | printf "Usage: ./rancher-resource-enumerator.sh [ -d -n | -c | -a ]\n" 10 | printf " -h Display this help message.\n" 11 | printf " -a Enumerate all custom resources.\n" 12 | printf " -n Only enumerate resources in the specified namespace(s).\n" 13 | printf " -c Only enumerate cluster (non-namespaced) resources.\n" 14 | printf " -d Path to output directory (default: /tmp/enum-cattle-resources-).\n" 15 | exit 0 16 | } 17 | 18 | # Arguments 19 | optstring="cahd:n:" 20 | while getopts ${optstring} opt; do 21 | case ${opt} in 22 | h) usage 23 | ;; 24 | d) path=${OPTARG} 25 | outputdir="$path-$datenow" 26 | export outputdir 27 | ;; 28 | a) all=1 29 | export all 30 | ;; 31 | n) namespaces=${OPTARG} 32 | export namespaces 33 | ;; 34 | c) cluster=1 35 | export cluster 36 | ;; 37 | *) printf "Invalid Option: %s.\n" "$1" 38 | usage 39 | ;; 40 | esac 41 | done 42 | 43 | 44 | # Setup 45 | setup() { 46 | # Create output directory 47 | echo "Output directory set to $outputdir" 48 | mkdir -p "$outputdir" 49 | } 50 | 51 | # Get cluster resources 52 | non_namespaced() { 53 | kubectl api-resources --verbs=list --namespaced=false -o name | grep cattle.io | xargs -I _ sh -c "echo '(cluster) enumerating _ resources...'; kubectl get _ -o custom-columns=KIND:.kind,NAME:.metadata.name --no-headers=true --ignore-not-found=true >> $outputdir/_" 54 | } 55 | 56 | # Get namespaced resources 57 | namespaced() { 58 | ns="$1" 59 | # Select all namespaces if no namespace is specified 60 | if [ -z "$ns" ]; then 61 | ns="$(kubectl get ns --no-headers -o jsonpath='{.items[*].metadata.name}')" 62 | fi 63 | # Get all custom resources for validated namespaces 64 | for n in $ns 65 | do 66 | kubectl get ns "$n" -o name && \ 67 | kubectl api-resources --verbs=list --namespaced=true -o name | grep cattle.io | xargs -I _ sh -c "echo '(namespaced) enumerating _ resources in $n...'; kubectl get _ -n $n -o custom-columns=KIND:.kind,NAME:.metadata.name,NAMESPACE:.metadata.namespace --no-headers=true --ignore-not-found=true >> $outputdir/_" 68 | done 69 | } 70 | 71 | # Get total counts 72 | totals() { 73 | countfiles="$outputdir/*" 74 | echo 'counting totals...' 75 | for f in $countfiles 76 | do 77 | wc -l "$f" >> "$outputdir"/totals 78 | done 79 | echo "results saved in $outputdir" 80 | exit 0 81 | } 82 | 83 | main() { 84 | if [ -n "$all" ]; then 85 | setup 86 | non_namespaced 87 | namespaced 88 | totals 89 | elif [ -n "$cluster" ]; then 90 | setup 91 | non_namespaced 92 | totals 93 | elif [ -n "$namespaces" ]; then 94 | setup 95 | namespaced "$namespaces" 96 | totals 97 | else 98 | usage 99 | fi 100 | } 101 | 102 | main -------------------------------------------------------------------------------- /rancher-metadata-syncer/Dockerfile: -------------------------------------------------------------------------------- 1 | ## Running builder to download metadata files 2 | FROM alpine AS builder 3 | MAINTAINER Matthew Mattox matt.mattox@suse.com 4 | RUN apk update && apk add --update-cache \ 5 | wget \ 6 | bash \ 7 | && rm -rf /var/cache/apk/* 8 | 9 | ADD *.sh /usr/local/bin/ 10 | RUN chmod +x /usr/local/bin/*.sh 11 | WORKDIR /root/ 12 | RUN /usr/local/bin/download.sh 13 | 14 | ## Building webserver 15 | FROM httpd:alpine 16 | MAINTAINER Matthew Mattox matt.mattox@suse.com 17 | RUN apk update && apk add --update-cache \ 18 | wget \ 19 | curl \ 20 | bash \ 21 | gzip \ 22 | && rm -rf /var/cache/apk/* 23 | 24 | WORKDIR /var/www/localhost 25 | COPY --from=builder /root/*.json /usr/local/apache2/htdocs/ 26 | COPY --from=builder /usr/local/bin/*.sh /usr/local/bin/ 27 | CMD /usr/local/bin/run.sh 28 | -------------------------------------------------------------------------------- /rancher-metadata-syncer/README.md: -------------------------------------------------------------------------------- 1 | # rancher-metadata-syncer 2 | Rancher Metadata Syncer is a simple pod for publishing the Rancher metadata.json in an airgap setup to allow Rancher to get updated metadata files without granting Rancher internet access or upgrading Rancher. 3 | 4 | ## Installation 5 | 6 | Note: The following tool should only be deployed on the Rancher Local cluster and not on a downstream cluster. 7 | 8 | ### Option A - Configmap 9 | The Configmap option is used when you would like to add the metadata files via a Configmap. 10 | Note: The following steps should be run from a server/workstation with internet access. 11 | 12 | - Download the metadata file(s) 13 | ```bash 14 | wget --no-check-certificate -O v2-4.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.4/data.json 15 | wget --no-check-certificate -O v2-5.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.5/data.json 16 | tar -czvf v2-4.json.tar.gz v2-4.json 17 | tar -czvf v2-5.json.tar.gz v2-5.json 18 | ``` 19 | 20 | - Create the Configmap with the metadata files. 21 | 22 | ```bash 23 | kubectl -n cattle-system create configmap rancher-metadata --from-file=v2-4.json=./v2-4.json.tar.gz --from-file=v2-5.json=./v2-5.json.tar.gz 24 | ``` 25 | 26 | - Deploy the workload 27 | ```bash 28 | kubectl apply -f deployment-configmap.yaml 29 | ``` 30 | 31 | - If you would update the metadata file, please do the following. 32 | 33 | ```bash 34 | wget --no-check-certificate -O v2-4.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.4/data.json 35 | wget --no-check-certificate -O v2-5.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.5/data.json 36 | tar -czvf v2-4.json.tar.gz v2-4.json 37 | tar -czvf v2-5.json.tar.gz v2-5.json 38 | kubectl -n cattle-system delete configmap rancher-metadata 39 | kubectl -n cattle-system create configmap rancher-metadata --from-file=v2-4.json.tar.gz=./v2-4.json.tar.gz --from-file=v2-5.json.tar.gz=./v2-5.json.tar.gz 40 | kubectl -n cattle-system patch deployment rancher-metadata -p "{\"spec\":{\"template\":{\"metadata\":{\"labels\":{\"date\":\"$(date +%s)\"}}}}}" 41 | ``` 42 | 43 | ### Option B - Proxy 44 | The proxy option is used if you would like the deployment to automatedly download the metadata files every 6 hours without opening all of Rancher to the internet via the Proxy. 45 | 46 | - Edit values HTTP_PROXY and HTTPS_PROXY in deployment-proxy.yaml match your environment requirements. 47 | ```bash 48 | - name: HTTPS_PROXY 49 | value: "https://:@:/" 50 | - name: HTTP_PROXY 51 | value: "http://:@:/" 52 | ``` 53 | 54 | - Deploy the workload 55 | ```bash 56 | kubectl apply -f deployment-proxy.yaml 57 | ``` 58 | 59 | ## Updating Rancher 60 | 61 | - Browse to the Rancher UI -> Global -> Settings -> rke-metadata-config 62 | 63 | - Update the value to the following for Rancher v2.4.x 64 | ``` 65 | { 66 | "refresh-interval-minutes": "60", 67 | "url": "http://rancher-metadata/v2-4.json" 68 | } 69 | ``` 70 | 71 | - Update the value to the following for Rancher v2.5.x 72 | ``` 73 | { 74 | "refresh-interval-minutes": "60", 75 | "url": "http://rancher-metadata/v2-5.json" 76 | } 77 | ``` 78 | -------------------------------------------------------------------------------- /rancher-metadata-syncer/apache.conf: -------------------------------------------------------------------------------- 1 | 2 | ServerAdmin admin@localhost 3 | ServerName localhost 4 | DocumentRoot /var/www/src 5 | 6 | Options Indexes FollowSymLinks MultiViews 7 | AllowOverride All 8 | Order allow,deny 9 | Allow from all 10 | Require all granted 11 | 12 | ErrorLog ${APACHE_LOG_DIR}/error.log 13 | CustomLog ${APACHE_LOG_DIR}/access.log combined 14 | 15 | -------------------------------------------------------------------------------- /rancher-metadata-syncer/deployment-configmap.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | labels: 6 | app: rancher-metadata 7 | name: rancher-metadata 8 | namespace: cattle-system 9 | spec: 10 | replicas: 1 11 | selector: 12 | matchLabels: 13 | app: rancher-metadata 14 | template: 15 | metadata: 16 | labels: 17 | app: rancher-metadata 18 | spec: 19 | containers: 20 | - image: rancher/metadata-syncer:latest 21 | imagePullPolicy: IfNotPresent 22 | name: rancher-metadata 23 | livenessProbe: 24 | httpGet: 25 | path: /healthz 26 | port: 80 27 | initialDelaySeconds: 3 28 | periodSeconds: 3 29 | readinessProbe: 30 | httpGet: 31 | path: /healthz 32 | port: 80 33 | initialDelaySeconds: 5 34 | periodSeconds: 5 35 | volumeMounts: 36 | - mountPath: /data 37 | name: metadata 38 | volumes: 39 | - configMap: 40 | defaultMode: 256 41 | name: rancher-metadata 42 | optional: false 43 | name: metadata 44 | --- 45 | apiVersion: v1 46 | kind: Service 47 | metadata: 48 | name: rancher-metadata 49 | namespace: cattle-system 50 | spec: 51 | selector: 52 | app: rancher-metadata 53 | ports: 54 | - protocol: TCP 55 | port: 80 56 | targetPort: 80 57 | -------------------------------------------------------------------------------- /rancher-metadata-syncer/deployment-proxy.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | labels: 6 | app: rancher-metadata 7 | name: rancher-metadata 8 | namespace: cattle-system 9 | spec: 10 | replicas: 1 11 | selector: 12 | matchLabels: 13 | app: rancher-metadata 14 | template: 15 | metadata: 16 | labels: 17 | app: rancher-metadata 18 | spec: 19 | containers: 20 | - env: 21 | - name: HTTPS_PROXY 22 | value: https://:@:/ 23 | - name: HTTP_PROXY 24 | value: http://:@:/ 25 | image: rancher/metadata-syncer:latest 26 | imagePullPolicy: IfNotPresent 27 | livenessProbe: 28 | failureThreshold: 3 29 | httpGet: 30 | path: /healthz 31 | port: 80 32 | scheme: HTTP 33 | initialDelaySeconds: 3 34 | periodSeconds: 3 35 | successThreshold: 1 36 | timeoutSeconds: 1 37 | name: rancher-metadata 38 | readinessProbe: 39 | failureThreshold: 3 40 | httpGet: 41 | path: /healthz 42 | port: 80 43 | scheme: HTTP 44 | initialDelaySeconds: 5 45 | periodSeconds: 5 46 | successThreshold: 1 47 | timeoutSeconds: 1 48 | 49 | --- 50 | apiVersion: v1 51 | kind: Service 52 | metadata: 53 | name: rancher-metadata 54 | namespace: cattle-system 55 | spec: 56 | selector: 57 | app: rancher-metadata 58 | ports: 59 | - protocol: TCP 60 | port: 80 61 | targetPort: 80 62 | -------------------------------------------------------------------------------- /rancher-metadata-syncer/download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "Downloading kontainer-driver-metadata for v2.4" 3 | wget --no-check-certificate -O v2-4.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.4/data.json 4 | 5 | echo "Downloading kontainer-driver-metadata for v2.5" 6 | wget --no-check-certificate -O v2-5.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.5/data.json 7 | -------------------------------------------------------------------------------- /rancher-metadata-syncer/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "Starting webserver..." 3 | apachectl start 4 | echo "ok" > /usr/local/apache2/htdocs/healthz 5 | if [[ ! -z $HTTP_PROXY ]] || [[ ! -z $HTTPS_PROXY ]] 6 | then 7 | echo "Detected proxy settings." 8 | echo "Starting downloader..." 9 | while true 10 | do 11 | /usr/local/bin/download.sh 12 | echo "Sleeping..." 13 | sleep 6h 14 | done 15 | fi 16 | 17 | if [[ -d /data ]] 18 | then 19 | echo "Configmap detected, loading json files from Configmap..." 20 | tar -zvxf v2-5.json.tar.gz -C /usr/local/apache2/htdocs/ 21 | tar -zvxf v2-5.json.tar.gz -C /usr/local/apache2/htdocs/ 22 | fi 23 | 24 | echo "Starting in static mode" 25 | while true 26 | do 27 | sleep 10000 28 | done 29 | -------------------------------------------------------------------------------- /reverse-rke-state-migrations/README.md: -------------------------------------------------------------------------------- 1 | # reverse-rke-state-migrations.sh 2 | This script can be used to reverse RKE cluster state migrations that are performed automatically by Rancher on all downstream RKE clusters as of releases `v2.7.14`, and `v2.8.5`. Running this script should only be necessary if you have upgraded to a Rancher version at or above the aforementioned versions and need to restore Rancher back to a version that is older than the aforementioned versions. For example, you're on `v2.8.0` and you take a backup of Rancher and then upgrade to `v2.8.5`, but then you restore Rancher from your backup. In this case, you'd have to use this script to reverse the RKE cluster state migrations that would have occurred during the upgrade to `v2.8.5`. 3 | 4 | ## Usage 5 | ⚠️ **WARNING:** Before running this script, please ensure that **you've backed up your downstream RKE clusters**. The script **will delete `full-cluster-state` secrets from downstream RKE clusters**. 6 | 7 | 1. Take backups of your downstream RKE clusters. 8 | 2. Ensure you have [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl), [jq](https://jqlang.github.io/jq/), and [yq](https://mikefarah.gitbook.io/yq/#install) installed. 9 | 3. Generate a Rancher API token and use it to set the `RANCHER_TOKEN` environment variable. 10 | 4. Run the script pointing to your Rancher server URL. 11 | 12 | ```shell 13 | export RANCHER_TOKEN= 14 | ./reverse-rke-state-migrations.sh --rancher-host 15 | ``` 16 | 17 | This script will iterate over all downstream RKE clusters and, for each one, it will ensure that a `full-cluster-state` ConfigMap exists inside the cluster as is expected by older versions of RKE. After doing this successfully for each of the targeted clusters, the script will remove a ConfigMap from the local cluster that marks the original migration as complete since it will effectively have been reversed. 18 | -------------------------------------------------------------------------------- /reverse-rke-state-migrations/reverse-rke-state-migrations.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -o pipefail 5 | 6 | # Use this to specify a custom kubectl base command or options. 7 | KUBECTL="kubectl" 8 | 9 | # Use this to specify a custom curl base command or options. 10 | # By default, we pass options that make curl silent, except when errors occur, 11 | # and we also force CURL to error if HTTP requests do not receive successful 12 | # (2xx) response codes. 13 | CURL="curl -sSf" 14 | 15 | function display_help() { 16 | echo 'This script can be used to reverse RKE cluster state migrations.' 17 | echo 'Please ensure the $RANCHER_TOKEN environment variable is set to a valid Rancher API admin token' 18 | echo 'Please also ensure the following tools are installed:' 19 | echo ' kubectl: https://kubernetes.io/docs/tasks/tools/#kubectl' 20 | echo ' jq: https://jqlang.github.io/jq' 21 | echo ' yq: https://mikefarah.gitbook.io/yq/#install' 22 | echo 23 | echo 24 | echo "Usage: $(basename $0) --rancher-host [Rancher hostname]" 25 | echo 26 | echo ' $RANCHER_TOKEN [Required] Environment variable containing Rancher admin token' 27 | echo ' -n, --rancher-host [Required] Rancher hostname' 28 | echo ' -k, --insecure-skip-tls-verify [Optional] Skip certificate verification' 29 | echo " -d, --debug [Optional] Calls 'set -x'" 30 | echo " -h, --help Print this message" 31 | } 32 | 33 | POSITIONAL_ARGS=() 34 | 35 | while [[ $# -gt 0 ]]; do 36 | case $1 in 37 | -n|--rancher-host) 38 | RANCHER_HOST="$2" 39 | shift # past argument 40 | shift # past value 41 | ;; 42 | -k|--insecure-skip-tls-verify) 43 | KUBECTL="$KUBECTL --insecure-skip-tls-verify" 44 | CURL="$CURL -k" 45 | shift # past argument 46 | ;; 47 | -d|--debug) 48 | set -x 49 | shift # past argument 50 | ;; 51 | -h|--help) 52 | display_help 53 | exit 1 54 | ;; 55 | -*|--*) 56 | echo "Unknown option $1" 57 | display_help 58 | exit 1 59 | ;; 60 | *) 61 | POSITIONAL_ARGS+=("$1") # save positional arg 62 | shift # past argument 63 | ;; 64 | esac 65 | done 66 | 67 | set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters 68 | 69 | # Make sure a Rancher API token was set 70 | if [[ -z "$RANCHER_TOKEN" ]]; then 71 | echo 'ERROR: $RANCHER_TOKEN is unset' 72 | display_help 73 | exit 1 74 | fi 75 | 76 | # Make sure a rancher host was set 77 | if [[ -z "$RANCHER_HOST" ]]; then 78 | echo 'ERROR: --rancher-host is unset' 79 | display_help 80 | exit 1 81 | fi 82 | 83 | # Make sure the jq command is available 84 | if ! command -v "jq" &> /dev/null; then 85 | echo "Missing jq command. See download/installation instructions at https://jqlang.github.io/jq/." 86 | exit 1 87 | fi 88 | 89 | # Make sure the yq command is available 90 | if ! command -v "yq" &> /dev/null; then 91 | echo "Missing yq command. See download/installation instructions at https://mikefarah.gitbook.io/yq/#install." 92 | exit 1 93 | fi 94 | 95 | # Make sure the kubectl command is available 96 | if ! command -v "kubectl" &> /dev/null; then 97 | echo "Missing kubectl command. See download/installation instructions at https://kubernetes.io/docs/tasks/tools/#kubectl." 98 | exit 1 99 | fi 100 | 101 | # Downloads kubeconfig for the cluster with ID $MANAGEMENT_CLUSTER_ID. 102 | downloadKubeConfig() { 103 | $CURL -X 'POST' -H 'accept: application/yaml' -u "$RANCHER_TOKEN" \ 104 | "https://${RANCHER_HOST}/v3/clusters/${MANAGEMENT_CLUSTER_ID}?action=generateKubeconfig" \ 105 | | yq -r '.config' > .kube/config-"$MANAGEMENT_CLUSTER_ID" 106 | } 107 | 108 | # Downloads kubeconfig for the local cluster. 109 | getLocalKubeConfig() { 110 | $CURL -X 'POST' -H 'accept: application/yaml' -u "$RANCHER_TOKEN" \ 111 | "https://${RANCHER_HOST}/v3/clusters/local?action=generateKubeconfig" \ 112 | | yq -r '.config' > .kube/config 113 | } 114 | 115 | # Moves downstream cluster state from a secret to a configmap. 116 | reverseMigrateClusterState() { 117 | # Load cluster state from the secret 118 | SECRET=$($KUBECTL get secret full-cluster-state -n kube-system -o yaml) 119 | if [ $? -ne 0 ]; then 120 | echo "[cluster=$MANAGEMENT_CLUSTER_ID] failed to fetch secret full-cluster-state, skipping this cluster" 121 | return 122 | fi 123 | 124 | # Make sure the cluster state is not empty or invalid 125 | CLUSTER_STATE=$(echo "$SECRET" | yq -r '.data.full-cluster-state' | base64 --decode) 126 | if [[ "$?" -ne 0 || "${PIPESTATUS[0]}" -ne 0 || "${PIPESTATUS[1]}" -ne 0 || "${PIPESTATUS[2]}" -ne 0 ]]; then 127 | echo "[cluster=$MANAGEMENT_CLUSTER_ID] failed to decode cluster state, skipping this cluster" 128 | return 129 | fi 130 | 131 | if [ -z "$CLUSTER_STATE" ]; then 132 | echo "[cluster=$MANAGEMENT_CLUSTER_ID] cluster state is empty, skipping this cluster" 133 | return 134 | fi 135 | 136 | # Copy cluster state to a configmap 137 | $KUBECTL create configmap full-cluster-state -n kube-system --from-literal=full-cluster-state="$CLUSTER_STATE" 138 | 139 | # Remove the secret 140 | $KUBECTL delete secret full-cluster-state -n kube-system 141 | } 142 | 143 | # Performs reverse migrations on all downstream RKE clusters. 144 | reverseMigrateRKEClusters() { 145 | # Download kubeconfig for the local cluster 146 | getLocalKubeConfig 147 | 148 | # Fetch all RKE cluster IDs 149 | MANAGEMENT_CLUSTER_IDS=($( 150 | $CURL -H 'accept: application/json' -u "$RANCHER_TOKEN" \ 151 | "https://${RANCHER_HOST}/v1/management.cattle.io.cluster?exclude=metadata.managedFields" \ 152 | | jq -r '.data[] | select(.spec.rancherKubernetesEngineConfig) | .id') 153 | ) 154 | 155 | # Migrate each RKE cluster's state 156 | for MANAGEMENT_CLUSTER_ID in "${MANAGEMENT_CLUSTER_IDS[@]}" 157 | do 158 | # Download and point to downstream cluster kubeconfig 159 | downloadKubeConfig 160 | export KUBECONFIG=".kube/config-$MANAGEMENT_CLUSTER_ID" 161 | 162 | echo "Moving state back to configmap for cluster $MANAGEMENT_CLUSTER_ID" 163 | set +e 164 | reverseMigrateClusterState 165 | set -e 166 | done 167 | 168 | # Remove the migration configmap since we've reversed the migrations 169 | if $KUBECTL get configmap migraterkeclusterstate -n cattle-system > /dev/null 2>&1; then 170 | echo "Deleting configmap migraterkeclusterstate" 171 | $KUBECTL delete configmap migraterkeclusterstate -n cattle-system 172 | fi 173 | } 174 | 175 | main() { 176 | # Create temp directory to which we'll download cluster kubeconfig files. 177 | cd "$(mktemp -d)" 178 | echo "Using temp directory $(pwd)" 179 | 180 | echo "WARNING: 'full-cluster-state' secrets will be deleted for downstream RKE clusters after being moved." 181 | echo -n "Please make sure you've backed them up before proceeding. Proceed? (yes/no) " 182 | read ANSWER 183 | 184 | if [ "$ANSWER" = "yes" ]; then 185 | mkdir -p .kube 186 | reverseMigrateRKEClusters 187 | rm -rf .kube 188 | elif [ "$ANSWER" = "no" ]; then 189 | echo "Aborting" 190 | exit 1 191 | else 192 | echo "Invalid response. Please type 'yes' or 'no'." 193 | exit 1 194 | fi 195 | } 196 | 197 | main 198 | -------------------------------------------------------------------------------- /rotate-tokens/README.md: -------------------------------------------------------------------------------- 1 | # rotate-tokens.sh 2 | 3 | This script is used to rotate the main service account and token for a Rancher 4 | downstream cluster. It may be used in the event of a known token exposure or as 5 | a routine preemptive measure. 6 | 7 | ## Usage 8 | 9 | Generate an API token in Rancher and use it to set the TOKEN environment 10 | variable. Set KUBECONFIG to point to your Rancher local cluster. Set 11 | RANCHER_SERVER to point to your Rancher service. The script can be run without 12 | any arguments. Example: 13 | 14 | ``` 15 | export TOKEN=token-ccabc:xyz123 16 | export KUBECONFIG=/path/to/kubeconfig 17 | export RANCHER_SERVER=https://rancher.example.com 18 | ./rotate-tokens.sh 19 | ``` 20 | 21 | For extra debugging information, run with DEBUG=y: 22 | 23 | ``` 24 | DEBUG=y ./rotate-tokens.sh 25 | ``` 26 | 27 | The script iterates over each downstream cluster sequentially. If you have many 28 | downstream clusters, this may take several minutes. Do not interrupt the script. 29 | 30 | The script generates kubeconfigs for each downstream cluster and stores them in 31 | `./kubeconfigs` in the current working directory. They can be removed with 32 | `rm -r kubeconfigs`. 33 | -------------------------------------------------------------------------------- /rotate-tokens/rotate-tokens.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | if [ -n "$DEBUG" ] 4 | then 5 | set -x 6 | fi 7 | 8 | usage() { 9 | echo 'TOKEN= KUBECONFIG= RANCHER_SERVER= ./rotate-tokens.sh' 10 | exit 0 11 | } 12 | 13 | if [ "$1" == "help" ] 14 | then 15 | usage 16 | fi 17 | 18 | if [ "$TOKEN" == "" ] 19 | then 20 | echo 'Create an API token in the Rancher UI and set the environment variable TOKEN before running this script.' 21 | exit 1 22 | fi 23 | 24 | if [ "$RANCHER_SERVER" == "" ] 25 | then 26 | echo 'Set $RANCHER_SERVER to point to the Rancher URL.' 27 | exit 1 28 | fi 29 | 30 | if curl --insecure -s -u $TOKEN "${RANCHER_SERVER}/v3" | grep Unauthorized >/dev/null 31 | then 32 | echo "Not authorized for Rancher server $RANCHER_SERVER." 33 | exit 1 34 | fi 35 | 36 | if ! which kubectl >/dev/null 37 | then 38 | echo 'kubectl and jq must be installed.' 39 | exit 1 40 | fi 41 | 42 | if ! which jq >/dev/null 43 | then 44 | echo 'kubectl and jq must be installed.' 45 | exit 1 46 | fi 47 | 48 | if ! kubectl get namespace cattle-global-data >/dev/null 2>&1 49 | then 50 | echo 'Set $KUBECONFIG to point to the Rancher local cluster.' 51 | exit 1 52 | fi 53 | 54 | cleanup() { 55 | kubectl --namespace cattle-system patch deployment cattle-cluster-agent --patch '{"spec": {"template": {"spec": {"serviceAccount": "cattle", "serviceAccountName": "cattle"}}}}' 56 | kubectl --namespace cattle-system rollout status deployment cattle-cluster-agent 57 | kubectl --namespace cattle-system delete serviceaccount cattle-tmp >/dev/null 2>&1 || true 58 | kubectl --namespace cattle-system delete secret cattle-tmp-token >/dev/null 2>&1 || true 59 | kubectl delete clusterrolebinding cattle-admin-binding-tmp >/dev/null 2>&1 || true 60 | rm -f .error 61 | } 62 | 63 | create_token_secret() { 64 | name=$1 65 | uid=$2 66 | cat < kubeconfigs/${c}.config 118 | KUBECONFIG=kubeconfigs/${c}.config 119 | 120 | # create temporary admin account 121 | tmpuid=$(kubectl --namespace cattle-system create serviceaccount cattle-tmp --output jsonpath='{.metadata.uid}' 2>.error || true) 122 | if [ -s .error ] 123 | then 124 | if grep 'already exists' .error >/dev/null 125 | then 126 | tmpuid=$(kubectl --namespace cattle-system get serviceaccount cattle-tmp --output jsonpath='{.metadata.uid}') 127 | else 128 | cat .error 129 | rm .error 130 | exit 1 131 | fi 132 | rm .error 133 | fi 134 | create_token_secret cattle-tmp $tmpuid 135 | kubectl create clusterrolebinding --clusterrole cattle-admin --serviceaccount cattle-system:cattle-tmp cattle-admin-binding-tmp 2>.error || true 136 | if [ -s .error ] 137 | then 138 | if ! grep 'already exists' .error >/dev/null 139 | then 140 | cat .error 141 | rm .error 142 | exit 1 143 | fi 144 | rm .error 145 | fi 146 | token=$(kubectl --namespace cattle-system get secret cattle-tmp-token --output jsonpath='{.data.token}') 147 | kubectl --namespace cattle-system patch deployment cattle-cluster-agent --patch '{"spec": {"template": {"spec": {"serviceAccount": "cattle-tmp", "serviceAccountName": "cattle-tmp"}}}}' 148 | kubectl --namespace cattle-system rollout status deployment cattle-cluster-agent 149 | 150 | # set cluster to use temporary account 151 | KUBECONFIG=$MAIN_KUBECONFIG 152 | old_secret=$(kubectl get clusters.management $c --output jsonpath='{.status.serviceAccountTokenSecret}') 153 | cluster_uid=$(kubectl get clusters.management $c --output jsonpath='{.metadata.uid}') 154 | secret=$(create_cluster_secret $c $cluster_uid $token) 155 | kubectl patch clusters.management $c --patch '{"status": {"serviceAccountTokenSecret": "'$secret'"}}' --type=merge 156 | kubectl --namespace cattle-global-data delete secret $old_secret 157 | 158 | # regenerate service account and secret 159 | KUBECONFIG=kubeconfigs/${c}.config 160 | if kubectl --namespace cattle-system get serviceaccount kontainer-engine >/dev/null 2>&1 161 | then 162 | serviceaccount=kontainer-engine 163 | elif kubectl --namespace cattle-system get serviceaccount cattle >/dev/null 2>&1 164 | then 165 | serviceaccount=cattle 166 | else 167 | echo "could not find admin service account to rotate on cluster $c" 168 | exit 1 169 | fi 170 | # 2.6 creates its own token 171 | if kubectl --namespace cattle-system get secret $serviceaccount-token >/dev/null 2>&1 172 | then 173 | kubectl --namespace cattle-system delete serviceaccount $serviceaccount 174 | uid=$(kubectl --namespace cattle-system create serviceaccount $serviceaccount --output jsonpath='{.metadata.uid}') 175 | create_token_secret $serviceaccount $uid 176 | tokensecret=$serviceaccount-token 177 | # 2.5 uses the k8s-generated token 178 | else 179 | kubectl --namespace cattle-system delete serviceaccount $serviceaccount 180 | kubectl --namespace cattle-system create serviceaccount $serviceaccount 181 | tokensecret=$(kubectl --namespace cattle-system get serviceaccount $serviceaccount --output jsonpath='{.secrets[0].name}') 182 | fi 183 | # restore back to old account 184 | token=$(kubectl --namespace cattle-system get secret $tokensecret --output jsonpath='{.data.token}') 185 | KUBECONFIG=$MAIN_KUBECONFIG 186 | secret=$(create_cluster_secret $c $cluster_uid $token) 187 | kubectl patch clusters.management $c --patch '{"status": {"serviceAccountTokenSecret": "'$secret'"}}' --type=merge 188 | 189 | # cleanup temporary artifacts 190 | KUBECONFIG=kubeconfigs/${c}.config 191 | cleanup 192 | done 193 | -------------------------------------------------------------------------------- /swiss-army-knife/README.md: -------------------------------------------------------------------------------- 1 | # Swiss-Army-Knife 2 | Rancher Support uses the image of a standard tool called `swiss-army-knife` to help you manage your Rancher/Kubernetes environment. You can learn more about this image by visiting its official repo at [rancherlabs/swiss-army-knife](https://github.com/rancherlabs/swiss-army-knife/) 3 | 4 | TLDR; This image has a lot of useful tools that can be used for scripting and troubleshooting. 5 | - [`kubectl`](https://kubernetes.io/docs/reference/kubectl/overview/) 6 | - [`helm`](https://helm.sh/docs/intro/) 7 | - [`curl`](https://curl.haxx.se/docs/manpage.html) 8 | - [`jq`](https://stedolan.github.io/jq/) 9 | - [`traceroute`](https://www.traceroute.org/about.html) 10 | - [`dig`](https://www.dig.com/products/dns/dig/) 11 | - [`nslookup`](https://www.google.com/search?q=nslookup) 12 | - [`ping`](https://www.google.com/search?q=ping) 13 | - [`netstat`](https://www.google.com/search?q=netstat) 14 | - And many more! 15 | 16 | ## Example deployments 17 | 18 | ### Overlay Test 19 | As part of Rancher's overlay test, which can be found [here](https://ranchermanager.docs.rancher.com/troubleshooting/other-troubleshooting-tips/networking#check-if-overlay-network-is-functioning-correctly). This can be deployed to the cluster by running the following command: 20 | ```bash 21 | kubectl apply -f https://raw.githubusercontent.com/rancherlabs/support-tools/master/swiss-army-knife/overlaytest.yaml 22 | ``` 23 | 24 | This will deploy a deamonset that will run on all nodes in the cluster. These pods will be running `tail -f /dev/null,` which will do nothing but keep the pod running. 25 | 26 | You can run the overlay test script by running the following command: 27 | ```bash 28 | curl -sfL https://raw.githubusercontent.com/rancherlabs/support-tools/master/swiss-army-knife/overlaytest.sh | bash 29 | ``` 30 | 31 | ### Admin Tools 32 | This deployment will deploy `swiss-army-knife` to all nodes in the cluster but with additional permissions and privileges. This is useful for troubleshooting and managing your Rancher environment. The pod will be running `tail -f /dev/null,` which will do nothing but keep the pod running. 33 | 34 | This can be deployed to the cluster by running the following command: 35 | ```bash 36 | kubectl apply -f https://raw.githubusercontent.com/rancherlabs/support-tools/master/swiss-army-knife/admin-tools.yaml 37 | ``` 38 | 39 | Inside the pod, you will be able to un `kubectl` commands with cluster-admin privileges. Along with this pod being able to gain full access to the node, including the ability to gain a root shell on the node. By running the following commands: 40 | - `kubectl -n kube-system get pods -l app=swiss-army-knife -o wide` 41 | - This will show you all pods running `swiss-army-knife` in the `kube-system` namespace. 42 | - Find the pod on the node you want to interact with. 43 | - `kubectl -n kube-system exec -it -- bash` 44 | - `chroot /rootfs` 45 | 46 | You are now running a root shell on the node with full privileges. 47 | 48 | **Important:** This deployment is designed for troubleshooting and management purposes and should not be left running on a cluster. 49 | -------------------------------------------------------------------------------- /swiss-army-knife/admin-tools.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: swiss-army-knife 6 | labels: 7 | app: swiss-army-knife 8 | --- 9 | apiVersion: rbac.authorization.k8s.io/v1 10 | kind: ClusterRole 11 | metadata: 12 | labels: 13 | app: swiss-army-knife 14 | name: swiss-army-knife 15 | rules: 16 | - apiGroups: 17 | - "*" 18 | resources: 19 | - "*" 20 | verbs: 21 | - "*" 22 | - nonResourceURLs: 23 | - "*" 24 | verbs: 25 | - "*" 26 | --- 27 | apiVersion: v1 28 | kind: ServiceAccount 29 | metadata: 30 | name: swiss-army-knife 31 | namespace: swiss-army-knife 32 | labels: 33 | app: swiss-army-knife 34 | --- 35 | apiVersion: rbac.authorization.k8s.io/v1 36 | kind: ClusterRoleBinding 37 | metadata: 38 | labels: 39 | app: swiss-army-knife 40 | name: swiss-army-knife 41 | roleRef: 42 | apiGroup: rbac.authorization.k8s.io 43 | kind: ClusterRole 44 | name: swiss-army-knife 45 | subjects: 46 | - kind: ServiceAccount 47 | name: swiss-army-knife 48 | namespace: swiss-army-knife 49 | --- 50 | apiVersion: apps/v1 51 | kind: DaemonSet 52 | metadata: 53 | name: swiss-army-knife 54 | namespace: swiss-army-knife 55 | labels: 56 | app: swiss-army-knife 57 | spec: 58 | selector: 59 | matchLabels: 60 | app: swiss-army-knife 61 | template: 62 | metadata: 63 | labels: 64 | app: swiss-army-knife 65 | spec: 66 | tolerations: 67 | - operator: Exists 68 | containers: 69 | - name: swiss-army-knife 70 | image: supporttools/swiss-army-knife 71 | imagePullPolicy: IfNotPresent 72 | securityContext: 73 | privileged: true 74 | resources: 75 | limits: 76 | cpu: 1000m 77 | memory: 1000Mi 78 | requests: 79 | cpu: 100m 80 | memory: 100Mi 81 | env: 82 | - name: POD_NAMESPACE 83 | valueFrom: 84 | fieldRef: 85 | fieldPath: metadata.namespace 86 | - name: POD_IP 87 | valueFrom: 88 | fieldRef: 89 | fieldPath: status.podIP 90 | - name: NODE_NAME 91 | valueFrom: 92 | fieldRef: 93 | fieldPath: spec.nodeName 94 | volumeMounts: 95 | - name: rootfs 96 | mountPath: /rootfs 97 | serviceAccountName: swiss-army-knife 98 | volumes: 99 | - name: rootfs 100 | hostPath: 101 | path: / 102 | --- 103 | apiVersion: v1 104 | kind: Service 105 | metadata: 106 | name: swiss-army-knife 107 | namespace: swiss-army-knife 108 | labels: 109 | app: swiss-army-knife 110 | spec: 111 | selector: 112 | name: swiss-army-knife 113 | ports: 114 | - protocol: TCP 115 | port: 80 116 | targetPort: 80 117 | type: ClusterIP -------------------------------------------------------------------------------- /swiss-army-knife/overlaytest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DNS_TEST=false 4 | NAMESPACE=default 5 | 6 | # Parse arguments 7 | while [[ $# -gt 0 ]]; do 8 | case $1 in 9 | --dns-test) 10 | DNS_TEST=true 11 | shift 12 | ;; 13 | *) 14 | echo "Unknown option: $1" 15 | exit 1 16 | ;; 17 | esac 18 | done 19 | 20 | echo "=> Start network overlay and DNS test" 21 | if $DNS_TEST 22 | then 23 | DNS_PASS=0; DNS_FAIL=0 24 | else 25 | echo "DNS tests are skipped. Use --dns-check to enable." 26 | fi 27 | echo 28 | NET_PASS=0; NET_FAIL=0 29 | 30 | while read spod shost sip 31 | do 32 | echo "Testing pod $spod on node $shost with IP $sip" 33 | 34 | # Overlay network test 35 | echo " => Testing overlay network connectivity" 36 | while read tip thost 37 | do 38 | if [[ ! $shost == $thost ]]; then 39 | kubectl -n $NAMESPACE exec $spod -c overlaytest -- /bin/sh -c "ping -c2 $tip > /dev/null 2>&1" 40 | RC=$? 41 | if [ $RC -ne 0 ]; then 42 | ((NET_FAIL+=1)); echo " FAIL: $spod on $shost cannot reach pod IP $tip on $thost" 43 | else 44 | ((NET_PASS+=1)); echo " PASS: $spod on $shost can reach pod IP $tip on $thost" 45 | fi 46 | fi 47 | done < <(kubectl get pods -n $NAMESPACE -l name=overlaytest -o jsonpath='{range .items[*]}{@.status.podIP}{" "}{@.spec.nodeName}{"\n"}{end}' | sort -k2) 48 | 49 | if $DNS_TEST; then 50 | # Internal DNS test 51 | echo " => Testing DNS" 52 | kubectl -n $NAMESPACE exec $spod -c overlaytest -- /bin/sh -c "nslookup kubernetes.default > /dev/null 2>&1" 53 | RC=$? 54 | if [ $RC -ne 0 ]; then 55 | ((DNS_FAIL+=1)); echo " FAIL: $spod cannot resolve internal DNS for 'kubernetes.default'" 56 | else 57 | ((DNS_PASS+=1)); echo " PASS: $spod can resolve internal DNS for 'kubernetes.default'" 58 | fi 59 | 60 | # External DNS test 61 | kubectl -n $NAMESPACE exec $spod -c overlaytest -- /bin/sh -c "nslookup rancher.com > /dev/null 2>&1" 62 | RC=$? 63 | if [ $RC -ne 0 ]; then 64 | ((DNS_FAIL+=1)); echo " FAIL: $spod cannot resolve external DNS for 'rancher.com'" 65 | else 66 | ((DNS_PASS+=1)); echo " PASS: $spod can resolve external DNS for 'rancher.com'" 67 | fi 68 | fi 69 | echo 70 | 71 | done < <(kubectl get pods -n $NAMESPACE -l name=overlaytest -o jsonpath='{range .items[*]}{@.metadata.name}{" "}{@.spec.nodeName}{" "}{@.status.podIP}{"\n"}{end}' | sort -k2) 72 | 73 | NET_TOTAL=$(($NET_PASS + $NET_FAIL)) 74 | echo "=> Network [$NET_PASS / $NET_TOTAL]" 75 | if $DNS_TEST; then 76 | DNS_TOTAL=$(($DNS_PASS + $DNS_FAIL)) 77 | echo "=> DNS [$DNS_PASS / $DNS_TOTAL]" 78 | fi 79 | echo; echo "=> End network overlay and DNS test" -------------------------------------------------------------------------------- /swiss-army-knife/overlaytest.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: DaemonSet 3 | metadata: 4 | name: overlaytest 5 | spec: 6 | selector: 7 | matchLabels: 8 | name: overlaytest 9 | template: 10 | metadata: 11 | labels: 12 | name: overlaytest 13 | spec: 14 | tolerations: 15 | - operator: Exists 16 | containers: 17 | - image: rancherlabs/swiss-army-knife 18 | imagePullPolicy: IfNotPresent 19 | name: overlaytest 20 | command: ["sh", "-c", "tail -f /dev/null"] 21 | terminationMessagePath: /dev/termination-log -------------------------------------------------------------------------------- /troubleshooting-scripts/README.md: -------------------------------------------------------------------------------- 1 | # Troubleshooting Scripts 2 | 3 | ## kube-scheduler 4 | 5 | ### Finding the current leader 6 | 7 | Command(s): `curl -s https://raw.githubusercontent.com/rancherlabs/support-tools/master/troubleshooting-scripts/kube-scheduler/find-leader.sh | bash` 8 | 9 | **Example Output** 10 | 11 | ```bash 12 | kube-scheduler is the leader on node a1ubk8slabl03 13 | ``` 14 | 15 | ## determine-leader 16 | 17 | Command(s): `curl -s https://raw.githubusercontent.com/rancherlabs/support-tools/master/troubleshooting-scripts/determine-leader/rancher2_determine_leader.sh | bash` 18 | 19 | **Example Output** 20 | 21 | ```bash 22 | NAME POD-IP HOST-IP 23 | cattle-cluster-agent-776d795ff8-x77nq 10.42.0.93 10.10.100.83 24 | cattle-node-agent-4bsx6 10.10.100.83 10.10.100.83 25 | rancher-54d47dc9cf-d4qt9 10.42.0.92 10.10.100.83 26 | rancher-54d47dc9cf-prn4d 10.42.0.90 10.10.100.83 27 | rancher-54d47dc9cf-rsn4g 10.42.0.91 10.10.100.83 28 | 29 | rancher-54d47dc9cf-prn4d is the leader in this Rancher instance 30 | ``` 31 | -------------------------------------------------------------------------------- /troubleshooting-scripts/determine-leader/rancher2_determine_leader.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | RANCHER_LEADER="$(kubectl -n kube-system get lease cattle-controllers -o json | jq -r '.spec.holderIdentity')" 3 | # Display Rancher Pods Information 4 | kubectl get pod -n cattle-system $RANCHER_LEADER -o custom-columns=NAME:.metadata.name,POD-IP:.status.podIP,HOST-IP:.status.hostIP 5 | printf "\n$RANCHER_LEADER is the leader in this Rancher instance\n" 6 | -------------------------------------------------------------------------------- /troubleshooting-scripts/etcd/README.md: -------------------------------------------------------------------------------- 1 | # etcd-troubleshooting 2 | 3 | ## Check etcd members 4 | Command(s): `docker exec etcd etcdctl member list` 5 | 6 | **Example Output of a healthy cluster** 7 | ```bash 8 | 2f080bc6ec98f39b, started, etcd-a1ubrkeat03, https://172.27.5.33:2380, https://172.27.5.33:2379,https://172.27.5.33:4001, false 9 | 9d7204f89b221ba3, started, etcd-a1ubrkeat01, https://172.27.5.31:2380, https://172.27.5.31:2379,https://172.27.5.31:4001, false 10 | bd37bc0dc2e990b6, started, etcd-a1ubrkeat02, https://172.27.5.32:2380, https://172.27.5.32:2379,https://172.27.5.32:4001, false 11 | ``` 12 | 13 | ## Check etcd endpoints 14 | Command(s): `curl https://raw.githubusercontent.com/rancherlabs/support-tools/master/troubleshooting-scripts/etcd/check-endpoints.sh | bash ` 15 | 16 | **Example Output of a healthy cluster** 17 | ```bash 18 | Validating connection to https://172.27.5.33:2379/health 19 | {"health":"true"} 20 | Validating connection to https://172.27.5.31:2379/health 21 | {"health":"true"} 22 | Validating connection to https://172.27.5.32:2379/health 23 | {"health":"true"} 24 | ``` 25 | 26 | ## Check etcd logs 27 | 28 | `health check for peer xxx could not connect: dial tcp IP:2380: getsockopt: connection refused` 29 | 30 | A connection to the address shown on port 2380 cannot be established. Check if the etcd container is running on the host with the address shown. 31 | 32 | 33 | `xxx is starting a new election at term x` 34 | 35 | The etcd cluster has lost it’s quorum and is trying to establish a new leader. This can happen when the majority of the nodes running etcd go down/unreachable. 36 | 37 | 38 | `connection error: desc = "transport: Error while dialing dial tcp 0.0.0.0:2379: i/o timeout"; Reconnecting to {0.0.0.0:2379 0 }` 39 | 40 | The host firewall is preventing network communication. 41 | 42 | 43 | `rafthttp: request cluster ID mismatch` 44 | 45 | The node with the etcd instance logging `rafthttp: request cluster ID mismatch` is trying to join a cluster that has already been formed with another peer. The node should be removed from the cluster, and re-added. 46 | 47 | 48 | `rafthttp: failed to find member` 49 | 50 | The cluster state (`/var/lib/etcd`) contains wrong information to join the cluster. The node should be removed from the cluster, the state directory should be cleaned and the node should be re-added. 51 | 52 | ## Enabling debug logging 53 | `curl -XPUT -d '{"Level":"DEBUG"}' --cacert $(docker exec etcd printenv ETCDCTL_CACERT) --cert $(docker exec etcd printenv ETCDCTL_CERT) --key $(docker exec etcd printenv ETCDCTL_KEY) https://localhost:2379/config/local/log` 54 | 55 | ## Disabling debug logging 56 | `curl -XPUT -d '{"Level":"INFO"}' --cacert $(docker exec etcd printenv ETCDCTL_CACERT) --cert $(docker exec etcd printenv ETCDCTL_CERT) --key $(docker exec etcd printenv ETCDCTL_KEY) https://localhost:2379/config/local/log` 57 | 58 | ## Getting etcd metrics 59 | `curl -X GET --cacert $(docker exec etcd printenv ETCDCTL_CACERT) --cert $(docker exec etcd printenv ETCDCTL_CERT) --key $(docker exec etcd printenv ETCDCTL_KEY) https://localhost:2379/metrics` 60 | 61 | 62 | **wal_fsync_duration_seconds (99% under 10 ms)** 63 | 64 | A wal_fsync is called when etcd persists its log entries to disk before applying them. 65 | 66 | 67 | **backend_commit_duration_seconds (99% under 25 ms)** 68 | 69 | A backend_commit is called when etcd commits an incremental snapshot of its most recent changes to disk. 70 | -------------------------------------------------------------------------------- /troubleshooting-scripts/etcd/check-endpoints.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for endpoint in $(docker exec etcd /bin/sh -c "etcdctl member list | cut -d, -f5"); 3 | do 4 | echo "Validating connection to ${endpoint}/health"; 5 | docker run --net=host -v $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl:/etc/kubernetes/ssl:ro appropriate/curl -s -w "\n" --cacert $(docker exec etcd printenv ETCDCTL_CACERT) --cert $(docker exec etcd printenv ETCDCTL_CERT) --key $(docker exec etcd printenv ETCDCTL_KEY) "${endpoint}/health"; 6 | done 7 | -------------------------------------------------------------------------------- /troubleshooting-scripts/kube-apiserver/check_apiserver-to-etcd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for i in $(docker inspect kube-apiserver | grep -m 1 "\--etcd-servers" | grep -Po '(?<=https://)[^:]*') 4 | do 5 | echo -n "Checking $i " 6 | curl --cacert /etc/kubernetes/ssl/kube-ca.pem --cert /etc/kubernetes/ssl/kube-node.pem --key /etc/kubernetes/ssl/kube-node-key.pem https://"$i":2379/health 7 | echo "" 8 | done 9 | -------------------------------------------------------------------------------- /troubleshooting-scripts/kube-apiserver/check_endpoints.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Getting IPs from endpoint..." 4 | EndPointIPs=`kubectl get endpoints kubernetes -o jsonpath='{.subsets[].addresses[*].ip}'` 5 | 6 | for EndPointIP in $EndPointIPs 7 | do 8 | if kubectl get nodes --selector=node-role.kubernetes.io/controlplane=true -o jsonpath={.items[*].status.addresses[?\(@.type==\"InternalIP\"\)].address} | grep $EndPointIP > /dev/null 9 | then 10 | echo "Good - $EndPointIP" 11 | else 12 | echo "Problem - $EndPointIP" 13 | fi 14 | done 15 | -------------------------------------------------------------------------------- /troubleshooting-scripts/kube-apiserver/responsiveness.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for cip in $(kubectl get nodes -l "node-role.kubernetes.io/controlplane=true" -o jsonpath='{range.items[*].status.addresses[?(@.type=="InternalIP")]}{.address}{"\n"}{end}'); 3 | do 4 | kubectl --server https://${cip}:6443 get nodes -v6 2>&1| grep round_trippers; 5 | done 6 | -------------------------------------------------------------------------------- /troubleshooting-scripts/kube-scheduler/find-leader.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | NODE="$(kubectl -n kube-system get endpoints kube-scheduler -o jsonpath='{.metadata.annotations.control-plane\.alpha\.kubernetes\.io/leader}' | jq -r .holderIdentity | sed 's/_[^_]*$//')" 3 | echo "kube-scheduler is the leader on node $NODE" 4 | -------------------------------------------------------------------------------- /windows-access-control-lists/README.md: -------------------------------------------------------------------------------- 1 | # Securing file ACLs on RKE2 Windows nodes 2 | 3 | In certain cases, Windows nodes joined to RKE2 clusters may not have appropriate Access Control Lists (ACLs) configured for important files and directories, allowing improper access by unprivileged user accounts such as `NT AUTHORITY\Authenticated Users`. This occurs in the following configurations 4 | 5 | + Standalone RKE2 nodes (i.e. RKE2 nodes **_not_** provisioned using Rancher) which run on Windows that were _initially_ provisioned using a version older than `1.27.15`, `1.28.11`, `1.29.6`, or `1.30.2` 6 | 7 | + Rancher provisioned RKE2 nodes that run on Windows that were created using a Rancher version older than `2.9.3` or `2.8.9`. 8 | 9 | This issue has been resolved for standalone RKE2 clusters starting with versions `1.27.15`, `1.28.1`, `1.29.6`, `1.30.2` and above. Rancher `2.9.3`, `2.8.9`, and above, have also been updated to properly configure ACLs on Windows nodes during initial provisioning as well as to retroactively update ACLs on existing nodes. 10 | 11 | If you are maintaining a standalone RKE2 Windows cluster which was provisioned using a version of RKE2 older than `1.27.15`, `1.28.11`, `1.29.6`, `1.30.2`, or if you maintain a Rancher provisioned RKE2 Windows cluster but are unable to upgrade to at least `2.9.3` or `2.8.9`, then you can use the below powershell script to manually update the relevant ACLs. 12 | 13 | This script only needs to be run once per node. If desired, additional files and directories can be secured by updating the `$restrictedPaths` variable. After running the script, only the `NT AUTHORITY\SYSTEM` and `BUILTIN\Administrators` group will have access to the specified files and directories. Directories will be configured with inheritance enabled to ensure child files and directories utilize the same restrictive ACL. 14 | 15 | Add the below script to a PowerShell file and run it using the PowerShell console as an Administrator. 16 | 17 | ```powershell 18 | function Set-RestrictedPermissions { 19 | [CmdletBinding()] 20 | param ( 21 | [Parameter(Mandatory=$true)] 22 | [string] 23 | $Path, 24 | [Parameter(Mandatory=$true)] 25 | [Boolean] 26 | $Directory 27 | ) 28 | $Owner = "BUILTIN\Administrators" 29 | $Group = "NT AUTHORITY\SYSTEM" 30 | $acl = Get-Acl $Path 31 | 32 | foreach ($rule in $acl.GetAccessRules($true, $true, [System.Security.Principal.SecurityIdentifier])) { 33 | $acl.RemoveAccessRule($rule) | Out-Null 34 | } 35 | $acl.SetAccessRuleProtection($true, $false) 36 | $acl.SetOwner((New-Object System.Security.Principal.NTAccount($Owner))) 37 | $acl.SetGroup((New-Object System.Security.Principal.NTAccount($Group))) 38 | 39 | Set-FileSystemAccessRule -Directory $Directory -acl $acl 40 | 41 | $FullPath = Resolve-Path $Path 42 | Write-Host "Setting restricted ACL on $FullPath" 43 | Set-Acl -Path $Path -AclObject $acl 44 | } 45 | 46 | function Set-FileSystemAccessRule() { 47 | [CmdletBinding()] 48 | param ( 49 | [Parameter(Mandatory=$true)] 50 | [Boolean] 51 | $Directory, 52 | [Parameter(Mandatory=$false)] 53 | [System.Security.AccessControl.ObjectSecurity] 54 | $acl 55 | ) 56 | $users = @( 57 | $acl.Owner, 58 | $acl.Group 59 | ) 60 | if ($Directory -eq $true) { 61 | foreach ($user in $users) { 62 | $rule = New-Object System.Security.AccessControl.FileSystemAccessRule( 63 | $user, 64 | [System.Security.AccessControl.FileSystemRights]::FullControl, 65 | [System.Security.AccessControl.InheritanceFlags]'ObjectInherit,ContainerInherit', 66 | [System.Security.AccessControl.PropagationFlags]::None, 67 | [System.Security.AccessControl.AccessControlType]::Allow 68 | ) 69 | $acl.AddAccessRule($rule) 70 | } 71 | } else { 72 | foreach ($user in $users) { 73 | $rule = New-Object System.Security.AccessControl.FileSystemAccessRule( 74 | $user, 75 | [System.Security.AccessControl.FileSystemRights]::FullControl, 76 | [System.Security.AccessControl.AccessControlType]::Allow 77 | ) 78 | $acl.AddAccessRule($rule) 79 | } 80 | } 81 | } 82 | 83 | function Confirm-ACL { 84 | [CmdletBinding()] 85 | param ( 86 | [Parameter(Mandatory=$true)] 87 | [String] 88 | $Path 89 | ) 90 | foreach ($a in (Get-Acl $path).Access) { 91 | $ref = $a.IdentityReference 92 | if (($ref -ne "BUILTIN\Administrators") -and ($ref -ne "NT AUTHORITY\SYSTEM")) { 93 | return $false 94 | } 95 | } 96 | return $true 97 | } 98 | 99 | $RKE2_DATA_DIR="c:\var\lib\rancher\rke2" 100 | $SYSTEM_AGENT_DIR="c:\var\lib\rancher\agent" 101 | $RANCHER_PROVISIONING_DIR="c:\var\lib\rancher\capr" 102 | 103 | $restrictedPaths = @( 104 | [PSCustomObject]@{ 105 | Path = "c:\etc\rancher\wins\config" 106 | Directory = $false 107 | } 108 | [PSCustomObject]@{ 109 | Path = "c:\etc\rancher\node\password" 110 | Directory = $false 111 | } 112 | [PSCustomObject]@{ 113 | Path = "$SYSTEM_AGENT_DIR\rancher2_connection_info.json" 114 | Directory = $false 115 | } 116 | [PSCustomObject]@{ 117 | Path = "c:\etc\rancher\rke2\config.yaml.d\50-rancher.yaml" 118 | Directory = $false 119 | } 120 | [PSCustomObject]@{ 121 | Path = "c:\usr\local\bin\rke2.exe" 122 | Directory = $false 123 | } 124 | [PSCustomObject]@{ 125 | Path = "$RANCHER_PROVISIONING_DIR" 126 | Directory = $true 127 | } 128 | [PSCustomObject]@{ 129 | Path = "$SYSTEM_AGENT_DIR" 130 | Directory = $true 131 | } 132 | [PSCustomObject]@{ 133 | Path = "$RKE2_DATA_DIR" 134 | Directory = $true 135 | } 136 | ) 137 | 138 | foreach ($path in $restrictedPaths) { 139 | # Some paths will not exist on standalone RKE2 clusters 140 | if (-Not (Test-Path -Path $path.Path)) { 141 | continue 142 | } 143 | 144 | if (-Not (Confirm-ACL -Path $path.Path)) { 145 | Set-RestrictedPermissions -Path $path.Path -Directory $path.Directory 146 | } else { 147 | Write-Host "ACLs have been properly configured for the $($path.Path) $(if($path.Directory){ "directory" } else { "file" })" 148 | } 149 | } 150 | ``` 151 | -------------------------------------------------------------------------------- /windows-agent-strict-verify/README.md: -------------------------------------------------------------------------------- 1 | # Enabling agent strict TLS verification on existing Windows nodes 2 | 3 | In certain conditions, Windows nodes will not respect the Agent TLS Mode value set on the Rancher server. This setting was implemented in Rancher 2.9.0 and 2.8.6 4 | 5 | Windows nodes will not respect this setting if the following two conditions are true 6 | 7 | 1. The node was provisioned using a Rancher version older than 2.9.2 or 2.8.8, and continues to be used after a Rancher upgrade to 2.9.2, 2.8.8, or greater 8 | 2. The node is running a version of rke2 _older_ than the August 2024 patches. (i.e. any version _lower_ than v1.30.4, v1.29.8, v1.28.13, v1.27.16.) 9 | 10 | ## Workaround 11 | 12 | In order to retroactively enable strict TLS verification on Windows nodes, the following process must be followed. A Powershell script, `update-node.ps1` has been included to automate some parts of this process, however some steps (such as retrieving the required credentials used by the script) must be done manually. 13 | 14 | 15 | This process needs to be repeated for each Windows node joined to the cluster, but does not need to be done for newly provisioned nodes after Rancher has been upgraded to at least 2.9.2 or 2.8.8 - even if the rke2 version is older than the August patches. In scenarios where it is possible / safe to reprovision the impacted Windows nodes, this process may not be needed. 16 | 17 | 1. Stop the `rancher-wins` service using the `Stop-Service` PowerShell Command (`Stop-Service rancher-wins`) 18 | 19 | 2. Update the version of `wins.exe` running on the node. This can either be done manually, or via the `update-node.ps1` PowerShell script by passing the `-DownloadWins` flag 20 | 1. If a manual approach is taken, download the latest [version of rancher-wins from GitHub](https://github.com/rancher/wins/releases) (at least version `0.4.18`) and place the updated binary in the `c:/usr/local/bin` and `c:/Windows` directories, replacing the existing binaries. 21 | 22 | 2. If the automatic approach is taken, then you must include the `-DownloadWins` flag when invoking `update-node.ps1`. The version of `rancher-wins` packaged within your Rancher server will then be downloaded. 23 | + You must ensure that you are running a version of Rancher which embeds at _least_ `rancher-wins` `v0.4.18`. This version is included in Rancher v2.9.2, v2.8.8, and above. 24 | + Refer to the [`Obtaining the CATTLE_TOKEN and CATTLE_SERVER variables`](#obtaining-the-cattle_token-and-cattle_server-variables) section below to retrieve the required `CATTLE_TOKEN` and `CATTLE_SERVER` variables. 25 | 26 | 3. Manually update the `rancher-wins` config file to enable strict tls verification 27 | 1. This file is located in `c:/etc/rancher/wins/config`. 28 | 1. At the root level (i.e. a new line just before the `system-agent` field) add the following value `agentStrictTLSMode: true` 29 | 2. An [example configuration file](#example-updated-wins-config-file) can be seen at the bottom of this file 30 | 31 | 4. If needed, regenerate the rancher connection file 32 | 1. To determine if you need to do this, look at the `/var/lib/rancher/agent/rancher2_connection_info.json` file. If you intend to use strict validation, this file must contain a valid `ca-certificate-data` field. 33 | 2. If this field is missing 34 | 1. Refer to the [`Obtaining the CATTLE_TOKEN and CATTLE_SERVER variables`](#obtaining-the-cattle_token-and-cattle_server-variables) section to retrieve the required `CATTLE_TOKEN` and `CATTLE_SERVER` parameters 35 | 2. Create a new file containing the `update-node.ps1` script and run it, ensuring you properly pass the `CATTLE_SERVER` value to the `-RancherServerURL` flag, and the `CATTLE_TOKEN` value to the `-Token` flag. 36 | 1. Depending on whether you wish to manually update `rancher-wins`, run one of the following two commands 37 | 1. `./update-node.ps1 -RancherServerURL $CATTLE_SERVER -Token $CATTLE_TOKEN` 38 | 2. `./update-node.ps1 -RancherServerURL $CATTLE_SERVER -Token $CATTLE_TOKEN -DownloadWins` 39 | 2. Confirm that the `rancher2_connection_info.json` file contains the correct CA data. 40 | 41 | 5. Confirm the proper version of `rancher-wins` has been installed by running `win.exe --version` 42 | 6. Restart the node (`Restart-Computer`). 43 | 1. If the node is running an RKE2 version older than the August patches, you **must** restart the node otherwise pod networking will be impacted. 44 | 45 | ### Obtaining the `CATTLE_TOKEN` and `CATTLE_SERVER` variables 46 | 47 | - You must be a cluster administrator or have an account permitted to view cluster secrets in order to use this script, as the `CATTLE_TOKEN` is stored in a Kubernetes secret. You cannot simply generate an API token using the Rancher UI. 48 | - To obtain the `CATTLE_TOKEN` and `CATTLE_SERVER` values using the Rancher UI 49 | 1. Open Rancher's Cluster Explorer UI for the cluster which contains the relevant Windows nodes. 50 | 2. In the left hand section, under `More Resources`, go to `Core`, and then finally, `Secrets`. 51 | 3. Find the secret named `stv-aggregation`, and copy the `CATTLE_SERVER` and `CATTLE_TOKEN` fields. 52 | 4. Pass `CATTLE_TOKEN` to the `-Token` flag, and `CATTLE_SERVER` to the `-RancherServerURL` flag. 53 | - To obtain the `CATTLE_TOKEN` and `CATTLE_SERVER` values using kubectl 54 | 1. `kubectl get secret -n cattle-system stv-aggregation --template={{.data.CATTLE_TOKEN}} | base64 -d` 55 | 2. `kubectl get secret -n cattle-system stv-aggregation --template={{.data.CATTLE_SERVER}} | base64 -d` 56 | 57 | ### Example updated wins config file 58 | 59 | ```yaml 60 | # This file is located at c:/etc/rancher/wins/config 61 | white_list: 62 | processPaths: 63 | - C:/etc/rancher/wins/powershell.exe 64 | - C:/etc/rancher/wins/wins-upgrade.exe 65 | - C:/etc/wmi-exporter/wmi-exporter.exe 66 | - C:/etc/windows-exporter/windows-exporter.exe 67 | proxyPorts: 68 | - 9796 69 | agentStrictTLSMode: true 70 | systemagent: 71 | workDirectory: C:/var/lib/rancher/agent/work 72 | appliedPlanDirectory: C:/var/lib/rancher/agent/applied 73 | remoteEnabled: true 74 | preserveWorkDirectory: false 75 | connectionInfoFile: C:/var/lib/rancher/agent/rancher2_connection_info.json 76 | csi-proxy: 77 | url: https://haffel-rancher.cp-dev.rancher.space/assets/csi-proxy-%[1]s.tar.gz 78 | version: v1.1.3 79 | kubeletPath: C:/bin/kubelet.exe 80 | ``` -------------------------------------------------------------------------------- /windows-agent-strict-verify/update-node.ps1: -------------------------------------------------------------------------------- 1 | <# 2 | .SYNOPSIS 3 | Updates the rancher_connection_info.json file on Windows nodes and optionally downloads the latest version of rancher-wins from the specified Rancher server 4 | 5 | .PARAMETER RancherServerURL 6 | The HTTPs URL of the Rancher server which manages the cluster this node is joined to 7 | 8 | .PARAMETER Token 9 | The Rancher API token tracked in the stv-aggregation secret 10 | 11 | .PARAMETER ForceRegeneration 12 | When set to true, this script will overwrite the rancher2_connection_info.json file, even if the cetificate-authority-data field is present 13 | 14 | .PARAMETER DownloadWins 15 | When set to true, this script will reach out to the RancherServerURL API and download the version of rancher-wins embedded in that sever 16 | #> 17 | 18 | param ( 19 | [Parameter()] 20 | [String] 21 | $RancherServerURL, 22 | 23 | [Parameter()] 24 | [String] 25 | $Token, 26 | 27 | [Parameter()] 28 | [Switch] 29 | $ForceRegeneration, 30 | 31 | [Parameter()] 32 | [Switch] 33 | $DownloadWins 34 | ) 35 | 36 | if ($DownloadWins -eq $true) { 37 | # Download the latest verson of wins from the rancher server 38 | $responseCode = $(curl.exe --connect-timeout 60 --max-time 300 --write-out "%{http_code}\n" --ssl-no-revoke -sfL "$RancherServerURL/assets/wins.exe" -o "/usr/local/bin/wins.exe") 39 | switch ( $responseCode ) { 40 | { "ok200", 200 } { 41 | Write-LogInfo "Successfully downloaded the wins binary." 42 | break 43 | } 44 | default { 45 | Write-LogError "$responseCode received while downloading the wins binary. Double check that the correct RancherServerURL has been provided" 46 | exit 1 47 | } 48 | } 49 | Copy-Item -Path "/usr/local/bin/wins.exe" -Destination "c:\Windows\wins.exe" -Force 50 | } 51 | 52 | # Check the current connection file to determine if CA data is already present. 53 | $info = (Get-Content C:\var\lib\rancher\agent\rancher2_connection_info.json -ErrorAction Ignore) 54 | if (($null -ne $info) -and (($info | ConvertFrom-Json).kubeConfig).Contains("certificate-authority-data")) { 55 | if (-Not $ForceRegeneration) { 56 | Write-Host "certificate-authority-data is already present in rancher2_connection_info.json" 57 | exit 0 58 | } 59 | } 60 | 61 | $CATTLE_ID=(Get-Content /etc/rancher/wins/cattle-id -ErrorAction Ignore) 62 | if (($null -eq $CATTLE_ID) -or ($CATTLE_ID -eq "")) { 63 | Write-Host "Could not obtain required CATTLE_ID value from node" 64 | exit 1 65 | } 66 | 67 | Write-Host "Updating rancher2_connection_info.json file" 68 | 69 | $responseCode = $(curl.exe --connect-timeout 60 --max-time 60 --write-out "%{http_code}\n " --ssl-no-revoke -sfL "$RancherServerURL/v3/connect/agent" -o /var/lib/rancher/agent/rancher2_connection_info.json -H "Authorization: Bearer $Token" -H "X-Cattle-Id: $CATTLE_ID" -H "Content-Type: application/json") 70 | 71 | switch ( $responseCode ) { 72 | { $_ -in "ok200", 200 } { 73 | Write-Host "Successfully downloaded Rancher connection information." 74 | exit 0 75 | } 76 | default { 77 | Write-Host "$responseCode received while downloading Rancher connection information. Double check that the correct RancherServerURL and Token have been provided" 78 | exit 1 79 | } 80 | } 81 | --------------------------------------------------------------------------------