├── longhorn
└── PlaceHolder.md
├── .gitignore
├── charts
├── robots.txt
├── rancher-0.0.1.tgz
├── systems-info
│ ├── templates
│ │ ├── namespace.yaml
│ │ ├── serviceaccount.yaml
│ │ ├── secret.yaml
│ │ ├── clusterrolebinding.yaml
│ │ ├── deployment.yaml
│ │ └── _helpers.tpl
│ ├── Chart.yaml
│ ├── .helmignore
│ ├── values.yaml
│ └── questions.yml
├── systems-information
│ ├── templates
│ │ ├── namespace.yaml
│ │ ├── serviceaccount.yaml
│ │ ├── secret.yaml
│ │ ├── clusterrolebinding.yaml
│ │ └── deployment.yaml
│ ├── values.yaml
│ ├── Chart.yaml
│ └── questions.yml
└── index.yaml
├── kubecert
├── base64
├── jq-linux64
└── README.md
├── eks-upgrade-using-api
├── demo.gif
├── README.md
└── common.sh
├── cleanup-evicted-pods
├── serviceaccount.yaml
├── deploy.yaml
└── README.md
├── rancher-cleanup
└── README.md
├── migrate-vsphere-clusters
└── README.md
├── troubleshooting-scripts
├── kube-scheduler
│ └── find-leader.sh
├── kube-apiserver
│ ├── responsiveness.sh
│ ├── check_apiserver-to-etcd.sh
│ └── check_endpoints.sh
├── determine-leader
│ └── rancher2_determine_leader.sh
├── etcd
│ ├── check-endpoints.sh
│ └── README.md
└── README.md
├── rancher-metadata-syncer
├── download.sh
├── apache.conf
├── run.sh
├── Dockerfile
├── deployment-configmap.yaml
├── deployment-proxy.yaml
└── README.md
├── collection
├── rancher
│ ├── v1.6
│ │ └── logs-collector
│ │ │ └── README.md
│ └── v2.x
│ │ ├── systems-information
│ │ ├── README.md
│ │ ├── Dockerfile
│ │ ├── run.sh
│ │ └── systems_summary.sh
│ │ ├── RBAC-role-collector
│ │ ├── README.md
│ │ └── role-dump.sh
│ │ ├── rancher-pod-collector
│ │ ├── README.md
│ │ └── rancher-pod-collector.sh
│ │ ├── logs-collector
│ │ ├── collection-details.md
│ │ └── README.md
│ │ ├── systems-information-v2
│ │ ├── deploy.yaml
│ │ └── README.md
│ │ ├── profile-collector
│ │ └── README.md
│ │ ├── windows-log-collector
│ │ └── README.md
│ │ ├── supportability-review
│ │ ├── collection-details.md
│ │ ├── security-policies.md
│ │ └── collect.sh
│ │ └── scc-operator-collector
│ │ ├── collection-details.md
│ │ ├── analyzer.sh
│ │ └── README.md
└── longhorn
│ ├── run.sh
│ └── README.md
├── swiss-army-knife
├── overlaytest.yaml
├── overlaytest.sh
├── admin-tools.yaml
└── README.md
├── fleet-delete-cluster-registration
├── README.md
└── delete_old_resources.sh
├── bad-ingress-scanner
├── Dockerfile
├── bad-ingress.yaml
├── run.sh
├── README.md
└── deployment.yaml
├── files
└── curl-format.txt
├── change-nodetemplate-owner
├── Dockerfile
└── README.md
├── README.md
├── rotate-tokens
└── README.md
├── rancher-crd
└── enumerate-resources
│ ├── README.md
│ └── rancher-resource-enumerator.sh
├── cleanup-etcd-part-files
├── README.md
├── delete-part-files.yaml
└── alt-s3-sync.yaml
├── eks-upgrade-using-kubectl
├── README.md
└── common.sh
├── adjust-downstream-webhook
├── README.md
└── adjust-downstream-webhook.sh
├── fleet-secrets-bro-patch
├── patch_gitrepo_secrets.sh
└── README.md
├── extended-rancher-2-cleanup
├── README.md
└── extended-cleanup-rancher2.sh
├── reverse-rke-state-migrations
├── README.md
└── reverse-rke-state-migrations.sh
├── NGINX-to-pods-check
├── README.md
├── example-deployment.yml
└── check.sh
├── how-to-retrieve-kubeconfig-from-custom-cluster
├── rke-node-kubeconfig.sh
└── README.md
├── windows-agent-strict-verify
├── update-node.ps1
└── README.md
└── windows-access-control-lists
└── README.md
/longhorn/PlaceHolder.md:
--------------------------------------------------------------------------------
1 | ## Longhorn Scripts
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | **/scc-support-bundle-*
3 |
--------------------------------------------------------------------------------
/charts/robots.txt:
--------------------------------------------------------------------------------
1 | "User-Agent: *nDisallow: /"
2 |
--------------------------------------------------------------------------------
/kubecert/base64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rancherlabs/support-tools/HEAD/kubecert/base64
--------------------------------------------------------------------------------
/kubecert/jq-linux64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rancherlabs/support-tools/HEAD/kubecert/jq-linux64
--------------------------------------------------------------------------------
/charts/rancher-0.0.1.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rancherlabs/support-tools/HEAD/charts/rancher-0.0.1.tgz
--------------------------------------------------------------------------------
/charts/systems-info/templates/namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 | name: systems-info
5 |
--------------------------------------------------------------------------------
/eks-upgrade-using-api/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rancherlabs/support-tools/HEAD/eks-upgrade-using-api/demo.gif
--------------------------------------------------------------------------------
/charts/systems-information/templates/namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 | name: systems-info
5 |
--------------------------------------------------------------------------------
/cleanup-evicted-pods/serviceaccount.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ServiceAccount
3 | metadata:
4 | name: rke-job-deployer
5 | namespace: kube-system
6 |
--------------------------------------------------------------------------------
/charts/systems-info/templates/serviceaccount.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ServiceAccount
3 | metadata:
4 | name: systems-info
5 | namespace: systems-info
6 |
--------------------------------------------------------------------------------
/rancher-cleanup/README.md:
--------------------------------------------------------------------------------
1 | # Rancher resource cleanup script
2 |
3 | This has been moved to [rancher/rancher-cleanup](https://github.com/rancher/rancher-cleanup).
4 |
--------------------------------------------------------------------------------
/charts/systems-information/templates/serviceaccount.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ServiceAccount
3 | metadata:
4 | name: systems-info
5 | namespace: systems-info
6 |
--------------------------------------------------------------------------------
/migrate-vsphere-clusters/README.md:
--------------------------------------------------------------------------------
1 | # migrate-vsphere-clusters
2 |
3 | This script is to be used as part of the patch process for Rancher's
4 | [CVE-2022-45157].
5 |
6 |
7 | [CVE-2022-45157]: https://github.com/rancher/rancher/security/advisories/GHSA-xj7w-r753-vj8v
8 |
9 |
--------------------------------------------------------------------------------
/troubleshooting-scripts/kube-scheduler/find-leader.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | NODE="$(kubectl -n kube-system get endpoints kube-scheduler -o jsonpath='{.metadata.annotations.control-plane\.alpha\.kubernetes\.io/leader}' | jq -r .holderIdentity | sed 's/_[^_]*$//')"
3 | echo "kube-scheduler is the leader on node $NODE"
4 |
--------------------------------------------------------------------------------
/charts/systems-info/templates/secret.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Secret
3 | metadata:
4 | name: mail-config
5 | namespace: systems-info
6 | type: "Opaque"
7 | data:
8 | smtp_user: {{ default "" .Values.smtp_user | b64enc | quote }}
9 | smtp_pass: {{ default "" .Values.smtp_pass | b64enc | quote }}
10 |
--------------------------------------------------------------------------------
/charts/systems-information/templates/secret.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Secret
3 | metadata:
4 | name: mail-config
5 | namespace: systems-info
6 | type: "Opaque"
7 | data:
8 | smtp_user: {{ default "" .Values.smtp_user | b64enc | quote }}
9 | smtp_pass: {{ default "" .Values.smtp_pass | b64enc | quote }}
10 |
--------------------------------------------------------------------------------
/troubleshooting-scripts/kube-apiserver/responsiveness.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | for cip in $(kubectl get nodes -l "node-role.kubernetes.io/controlplane=true" -o jsonpath='{range.items[*].status.addresses[?(@.type=="InternalIP")]}{.address}{"\n"}{end}');
3 | do
4 | kubectl --server https://${cip}:6443 get nodes -v6 2>&1| grep round_trippers;
5 | done
6 |
--------------------------------------------------------------------------------
/charts/systems-info/templates/clusterrolebinding.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: ClusterRoleBinding
3 | metadata:
4 | name: systems-info
5 | roleRef:
6 | apiGroup: rbac.authorization.k8s.io
7 | kind: ClusterRole
8 | name: cluster-admin
9 | subjects:
10 | - kind: ServiceAccount
11 | name: systems-info
12 | namespace: systems-info
13 |
--------------------------------------------------------------------------------
/charts/systems-information/templates/clusterrolebinding.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: ClusterRoleBinding
3 | metadata:
4 | name: systems-info
5 | roleRef:
6 | apiGroup: rbac.authorization.k8s.io
7 | kind: ClusterRole
8 | name: cluster-admin
9 | subjects:
10 | - kind: ServiceAccount
11 | name: systems-info
12 | namespace: systems-info
13 |
--------------------------------------------------------------------------------
/rancher-metadata-syncer/download.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | echo "Downloading kontainer-driver-metadata for v2.4"
3 | wget --no-check-certificate -O v2-4.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.4/data.json
4 |
5 | echo "Downloading kontainer-driver-metadata for v2.5"
6 | wget --no-check-certificate -O v2-5.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.5/data.json
7 |
--------------------------------------------------------------------------------
/troubleshooting-scripts/kube-apiserver/check_apiserver-to-etcd.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | for i in $(docker inspect kube-apiserver | grep -m 1 "\--etcd-servers" | grep -Po '(?<=https://)[^:]*')
4 | do
5 | echo -n "Checking $i "
6 | curl --cacert /etc/kubernetes/ssl/kube-ca.pem --cert /etc/kubernetes/ssl/kube-node.pem --key /etc/kubernetes/ssl/kube-node-key.pem https://"$i":2379/health
7 | echo ""
8 | done
9 |
--------------------------------------------------------------------------------
/collection/rancher/v1.6/logs-collector/README.md:
--------------------------------------------------------------------------------
1 | # rancher-logs-collector
2 |
3 | The script needs to be downloaded and run directly on the host using the `root` user or using `sudo`.
4 |
5 | ## How to use
6 |
7 | * Download the script and save as: `rancher16_logs_collector.sh`
8 | * Make sure the script is executable: `chmod +x rancher16_logs_collector.sh`
9 | * Run the script: `./rancher16_logs_collector.sh`
10 |
--------------------------------------------------------------------------------
/troubleshooting-scripts/determine-leader/rancher2_determine_leader.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | RANCHER_LEADER="$(kubectl -n kube-system get lease cattle-controllers -o json | jq -r '.spec.holderIdentity')"
3 | # Display Rancher Pods Information
4 | kubectl get pod -n cattle-system $RANCHER_LEADER -o custom-columns=NAME:.metadata.name,POD-IP:.status.podIP,HOST-IP:.status.hostIP
5 | printf "\n$RANCHER_LEADER is the leader in this Rancher instance\n"
6 |
--------------------------------------------------------------------------------
/charts/systems-info/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | name: rancher
3 | description: Rancher Systems Info
4 | version: 0.0.1
5 | appVersion: 0.0.1
6 | home: https://rancher.com
7 | icon: https://github.com/rancher/ui/blob/master/public/assets/images/logos/welcome-cow.svg
8 | keywords:
9 | - rancher
10 | - support
11 | sources:
12 | - https://github.com/rancherlabs/systems-info
13 | maintainers:
14 | - name: Rancher Labs
15 | email: charts@rancher.com
16 |
--------------------------------------------------------------------------------
/kubecert/README.md:
--------------------------------------------------------------------------------
1 | Credit for the logic that retrieves the KUBECONFIG goes to [Superseb](https://github.com/superseb/)
2 |
3 | # kubecert
4 | This script will set you up with kubectl and retrieve your local kube config for a cluster provisioned by RKE or Rancher. Option -y will auto install kubectl and jq for linux.
5 | Usage:
6 | ```bash
7 | curl -LO https://github.com/rancherlabs/support-tools/raw/master/kubecert/kubecert.sh
8 | bash ./kubecert.sh -y
9 | ```
10 |
--------------------------------------------------------------------------------
/rancher-metadata-syncer/apache.conf:
--------------------------------------------------------------------------------
1 |
2 | ServerAdmin admin@localhost
3 | ServerName localhost
4 | DocumentRoot /var/www/src
5 |
6 | Options Indexes FollowSymLinks MultiViews
7 | AllowOverride All
8 | Order allow,deny
9 | Allow from all
10 | Require all granted
11 |
12 | ErrorLog ${APACHE_LOG_DIR}/error.log
13 | CustomLog ${APACHE_LOG_DIR}/access.log combined
14 |
15 |
--------------------------------------------------------------------------------
/charts/systems-info/.helmignore:
--------------------------------------------------------------------------------
1 | # Patterns to ignore when building packages.
2 | # This supports shell glob matching, relative path matching, and
3 | # negation (prefixed with !). Only one pattern per line.
4 | .DS_Store
5 | # Common VCS dirs
6 | .git/
7 | .gitignore
8 | .bzr/
9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *.orig
18 | *~
19 | # Various IDEs
20 | .project
21 | .idea/
22 | *.tmproj
23 | .vscode/
24 |
--------------------------------------------------------------------------------
/troubleshooting-scripts/kube-apiserver/check_endpoints.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | echo "Getting IPs from endpoint..."
4 | EndPointIPs=`kubectl get endpoints kubernetes -o jsonpath='{.subsets[].addresses[*].ip}'`
5 |
6 | for EndPointIP in $EndPointIPs
7 | do
8 | if kubectl get nodes --selector=node-role.kubernetes.io/controlplane=true -o jsonpath={.items[*].status.addresses[?\(@.type==\"InternalIP\"\)].address} | grep $EndPointIP > /dev/null
9 | then
10 | echo "Good - $EndPointIP"
11 | else
12 | echo "Problem - $EndPointIP"
13 | fi
14 | done
15 |
--------------------------------------------------------------------------------
/swiss-army-knife/overlaytest.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: DaemonSet
3 | metadata:
4 | name: overlaytest
5 | spec:
6 | selector:
7 | matchLabels:
8 | name: overlaytest
9 | template:
10 | metadata:
11 | labels:
12 | name: overlaytest
13 | spec:
14 | tolerations:
15 | - operator: Exists
16 | containers:
17 | - image: rancherlabs/swiss-army-knife
18 | imagePullPolicy: IfNotPresent
19 | name: overlaytest
20 | command: ["sh", "-c", "tail -f /dev/null"]
21 | terminationMessagePath: /dev/termination-log
--------------------------------------------------------------------------------
/troubleshooting-scripts/etcd/check-endpoints.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | for endpoint in $(docker exec etcd /bin/sh -c "etcdctl member list | cut -d, -f5");
3 | do
4 | echo "Validating connection to ${endpoint}/health";
5 | docker run --net=host -v $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl:/etc/kubernetes/ssl:ro appropriate/curl -s -w "\n" --cacert $(docker exec etcd printenv ETCDCTL_CACERT) --cert $(docker exec etcd printenv ETCDCTL_CERT) --key $(docker exec etcd printenv ETCDCTL_KEY) "${endpoint}/health";
6 | done
7 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/systems-information/README.md:
--------------------------------------------------------------------------------
1 | # Rancher 2.x Systems Summary
2 |
3 | The script needs to be downloaded and run directly on a host running a Rancher server container, either as a single node install or a Rancher Pod as part of a High Availability install. The script needs to be run by a user with access to the Docker socket or using `sudo`.
4 |
5 | ## How to use
6 |
7 | * Download the script and save as: `rancher2_systems_information.sh`
8 | * Make sure the script is executable: `chmod +x rancher2_systems_information.sh`
9 | * Run the script: `./rancher2_systems_information.sh`
10 |
--------------------------------------------------------------------------------
/fleet-delete-cluster-registration/README.md:
--------------------------------------------------------------------------------
1 | # Fleet | Registration Resource Cleanup
2 |
3 | This is a cleanup script to work around a known Fleet bug whereby patching a downstream cluster, for instance when re-deploying a Fleet agent in such a cluster, causes new resources to be created without obsolete resources being deleted. Ultimately, this clutters the upstream cluster.
4 |
5 | This script retrieves all cluster registration resources, orders them by cluster then by creation timestamp, and deletes all but the youngest cluster registration for each cluster. This causes obsolete cluster registrations and their child resources to be deleted.
--------------------------------------------------------------------------------
/bad-ingress-scanner/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:20.04
2 | MAINTAINER Matthew Mattox
3 |
4 | ENV DEBIAN_FRONTEND=noninteractive
5 |
6 | RUN apt-get update && apt-get install -yq --no-install-recommends \
7 | apt-utils \
8 | curl \
9 | && apt-get clean && rm -rf /var/lib/apt/lists/*
10 |
11 | ## Install kubectl
12 | RUN curl -kLO "https://storage.googleapis.com/kubernetes-release/release/$(curl -ks https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl" && \
13 | chmod u+x kubectl && \
14 | mv kubectl /usr/local/bin/kubectl
15 |
16 | COPY *.sh /root/
17 | RUN chmod +x /root/*.sh
18 | CMD /root/run.sh
19 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/systems-information/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:18.04
2 | MAINTAINER Rancher Support support@rancher.com
3 | ENV DEBIAN_FRONTEND=noninteractive
4 |
5 | RUN apt-get update && apt-get install -yq --no-install-recommends \
6 | curl \
7 | msmtp \
8 | && apt-get clean && rm -rf /var/lib/apt/lists/*
9 |
10 | ##Installing kubectl
11 | RUN curl -k -LO https://storage.googleapis.com/kubernetes-release/release/`curl -k -s https://storage.googleapis.com/kubernetes-release/release/stable.txt`/bin/linux/amd64/kubectl && mv kubectl /bin/kubectl && chmod +x /bin/kubectl
12 |
13 | ADD *.sh /usr/bin/
14 | RUN chmod +x /usr/bin/*.sh
15 |
16 | WORKDIR /root
17 | CMD /usr/bin/run.sh
18 |
--------------------------------------------------------------------------------
/files/curl-format.txt:
--------------------------------------------------------------------------------
1 | http_code: %{http_code}\n
2 | http_connect: %{http_connect}\n
3 | time_total: %{time_total}\n
4 | time_namelookup: %{time_namelookup}\n
5 | time_connect: %{time_connect}\n
6 | time_appconnect: %{time_appconnect}\n
7 | time_pretransfer: %{time_pretransfer}\n
8 | time_redirect: %{time_redirect}\n
9 | time_starttransfer: %{time_starttransfer}\n
10 | size_download: %{size_download}\n
11 | size_upload: %{size_upload}\n
12 | size_header: %{size_header}\n
13 | size_request: %{size_request}\n
14 | speed_download: %{speed_download}\n
15 | speed_upload: %{speed_upload}\n
16 | content_type: %{content_type}\n
17 | num_connects: %{num_connects}\n
18 | num_redirects :%{num_redirects}\n
19 |
--------------------------------------------------------------------------------
/change-nodetemplate-owner/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:20.04
2 | MAINTAINER patrick0057
3 | ENV TERM xterm
4 | RUN apt-get update && apt-get install -y apt-transport-https curl gnupg2 && \
5 | curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
6 | echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" | tee -a /etc/apt/sources.list.d/kubernetes.list && \
7 | apt-get update && \
8 | apt-get install -y kubectl jq && \
9 | rm -rf /var/lib/apt/lists/* && \
10 | mkdir /root/.kube/
11 | COPY change-nodetemplate-owner.sh /usr/bin/
12 | WORKDIR /root
13 | RUN chmod +x /usr/bin/change-nodetemplate-owner.sh
14 | ENTRYPOINT ["/usr/bin/change-nodetemplate-owner.sh"]
15 | CMD []
16 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # support-tools
2 |
3 | This repository contains Rancher support-tools to assist with investigating and troubleshooting issues with Rancher clusters, as well as other maintenance tasks.
4 |
5 | ## Caution:
6 |
7 | This repository contains scripts that can cause harm if used without the guidance of Rancher Support. We advise reaching out to Rancher Support before executing any of these scripts. Failure to reach out could incur production downtime.
8 |
9 | The repository consists of the following directories of tools:
10 | - collection: non-mutating, non-destructive scripts for the purpose of collecting information/logs from a cluster or node.
11 | - files: common files used in conjunction with troubleshooting commands.
12 |
--------------------------------------------------------------------------------
/rancher-metadata-syncer/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | echo "Starting webserver..."
3 | apachectl start
4 | echo "ok" > /usr/local/apache2/htdocs/healthz
5 | if [[ ! -z $HTTP_PROXY ]] || [[ ! -z $HTTPS_PROXY ]]
6 | then
7 | echo "Detected proxy settings."
8 | echo "Starting downloader..."
9 | while true
10 | do
11 | /usr/local/bin/download.sh
12 | echo "Sleeping..."
13 | sleep 6h
14 | done
15 | fi
16 |
17 | if [[ -d /data ]]
18 | then
19 | echo "Configmap detected, loading json files from Configmap..."
20 | tar -zvxf v2-5.json.tar.gz -C /usr/local/apache2/htdocs/
21 | tar -zvxf v2-5.json.tar.gz -C /usr/local/apache2/htdocs/
22 | fi
23 |
24 | echo "Starting in static mode"
25 | while true
26 | do
27 | sleep 10000
28 | done
29 |
--------------------------------------------------------------------------------
/charts/systems-info/values.yaml:
--------------------------------------------------------------------------------
1 | replicaCount: 1
2 |
3 | image:
4 | repository: docker.io/rancher/systems-info
5 | tag: v0.0.1
6 | pullPolicy: IfNotPresent
7 |
8 | imagePullSecrets: []
9 |
10 | Name: systems-info
11 | Namespace: systems-info
12 |
13 | rbac:
14 | create: true
15 | clusterAdminRole: true
16 |
17 | serviceAccount:
18 | create: true
19 | name: systems-info
20 |
21 | resources:
22 | limits:
23 | cpu: 100m
24 | memory: 100Mi
25 | requests:
26 | cpu: 100m
27 | memory: 100Mi
28 |
29 | secretName: "mail-config"
30 | smtp_user: ""
31 | smtp_pass: ""
32 |
33 | rancher_name: ""
34 | schedule: "0 0 * * 1"
35 | smtp_host: ""
36 | smtp_port: "587"
37 | to_address: ""
38 | from_address: ""
39 | send_to_support: "true"
40 |
--------------------------------------------------------------------------------
/charts/systems-information/values.yaml:
--------------------------------------------------------------------------------
1 | replicaCount: 1
2 |
3 | image:
4 | repository: docker.io/rancher/systems-info
5 | tag: v0.0.1
6 | pullPolicy: IfNotPresent
7 |
8 | imagePullSecrets: []
9 |
10 | Name: systems-info
11 | Namespace: systems-info
12 |
13 | rbac:
14 | create: true
15 | clusterAdminRole: true
16 |
17 | serviceAccount:
18 | create: true
19 | name: systems-info
20 |
21 | resources:
22 | limits:
23 | cpu: 100m
24 | memory: 100Mi
25 | requests:
26 | cpu: 100m
27 | memory: 100Mi
28 |
29 | secretName: "mail-config"
30 | smtp_user: ""
31 | smtp_pass: ""
32 |
33 | rancher_name: ""
34 | schedule: "0 0 * * 1"
35 | smtp_host: ""
36 | smtp_port: "587"
37 | to_address: ""
38 | from_address: ""
39 | send_to_support: "true"
40 |
--------------------------------------------------------------------------------
/cleanup-evicted-pods/deploy.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: batch/v1
2 | kind: CronJob
3 | metadata:
4 | name: cleanup-evicted-pods
5 | namespace: kube-system
6 | labels:
7 | app: cleanup-evicted-pods
8 | spec:
9 | schedule: "*/30 * * * *"
10 | concurrencyPolicy: Forbid
11 | jobTemplate:
12 | spec:
13 | template:
14 | spec:
15 | containers:
16 | - name: cleanup-evicted-pods
17 | image: rancherlabs/swiss-army-knife
18 | imagePullPolicy: IfNotPresent
19 | command: ["sh", "-c", "kubectl get pods --all-namespaces --field-selector 'status.phase==Failed' -o json | kubectl delete -f -"]
20 | restartPolicy: OnFailure
21 | serviceAccount: rke-job-deployer
22 | serviceAccountName: rke-job-deployer
23 |
--------------------------------------------------------------------------------
/charts/index.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | entries:
3 | rancher:
4 | - apiVersion: v1
5 | appVersion: 0.0.1
6 | created: "2021-03-04T11:39:06.0872245-06:00"
7 | description: Rancher Systems Info
8 | digest: b2d217c3c7ab839bb11151cdd1d99ab8920f0241dcda74764e3932969f17fe0c
9 | home: https://rancher.com
10 | icon: https://github.com/rancher/ui/blob/master/public/assets/images/logos/welcome-cow.svg
11 | keywords:
12 | - rancher
13 | - support
14 | maintainers:
15 | - email: charts@rancher.com
16 | name: Rancher Labs
17 | name: rancher
18 | sources:
19 | - https://github.com/rancherlabs/systems-info
20 | urls:
21 | - https://rancherlabs.github.io/systems-info/charts/rancher-0.0.1.tgz
22 | version: 0.0.1
23 | generated: "2021-03-04T11:39:06.0847725-06:00"
24 |
--------------------------------------------------------------------------------
/rancher-metadata-syncer/Dockerfile:
--------------------------------------------------------------------------------
1 | ## Running builder to download metadata files
2 | FROM alpine AS builder
3 | MAINTAINER Matthew Mattox matt.mattox@suse.com
4 | RUN apk update && apk add --update-cache \
5 | wget \
6 | bash \
7 | && rm -rf /var/cache/apk/*
8 |
9 | ADD *.sh /usr/local/bin/
10 | RUN chmod +x /usr/local/bin/*.sh
11 | WORKDIR /root/
12 | RUN /usr/local/bin/download.sh
13 |
14 | ## Building webserver
15 | FROM httpd:alpine
16 | MAINTAINER Matthew Mattox matt.mattox@suse.com
17 | RUN apk update && apk add --update-cache \
18 | wget \
19 | curl \
20 | bash \
21 | gzip \
22 | && rm -rf /var/cache/apk/*
23 |
24 | WORKDIR /var/www/localhost
25 | COPY --from=builder /root/*.json /usr/local/apache2/htdocs/
26 | COPY --from=builder /usr/local/bin/*.sh /usr/local/bin/
27 | CMD /usr/local/bin/run.sh
28 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/systems-information/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 |
4 | if [[ -z $smtp_user ]]
5 | then
6 | cat << EOF > /etc/msmtprc
7 | account default
8 | host ${smtp_host}
9 | port ${smtp_port}
10 | from ${from_address}
11 | logfile /var/log/msmtp.log
12 | EOF
13 |
14 | else
15 | cat << EOF > /etc/msmtprc
16 | account default
17 | host ${smtp_host}
18 | port ${smtp_port}
19 | tls on
20 | tls_starttls on
21 | tls_certcheck off
22 | auth on
23 | user ${smtp_user}
24 | password ${smtp_pass}
25 | from ${from_address}
26 | logfile /var/log/msmtp.log
27 | EOF
28 | fi
29 | chmod 600 /etc/msmtprc
30 |
31 | echo "Running Summary Report..."
32 | /usr/bin/systems_summary.sh | tee report.txt
33 |
34 | echo "To: ${to_address}" > email.txt
35 | if [[ "$send_to_support" == "true" ]]
36 | then
37 | echo "CC: support@support.tools" >> email.txt
38 | fi
39 | echo "From: ${from_address}" >> email.txt
40 | echo "Subject: Rancher Systems Summary Report - ${rancher_name}" >> email.txt
41 | cat report.txt >> email.txt
42 | cat email.txt | msmtp -a default ${to_address}
43 |
--------------------------------------------------------------------------------
/cleanup-evicted-pods/README.md:
--------------------------------------------------------------------------------
1 | # Cleanup evicted pods left behind after disk pressure
2 | When a node starts to evict pods under disk pressure, the evicted pods are left behind. All the resources like volumes, IP, containers, etc will be cleaned up and delete. But the pod object will be left behind in "evicted" status. Per upstream this is [intentional](https://github.com/kubernetes/kubernetes/issues/54525#issuecomment-340035375)
3 |
4 | ## Workaround
5 |
6 | ### Manual cleanup
7 | NOTE: This script is designed to work on Linux machines.
8 | ```bash
9 | kubectl get pods --all-namespaces -ojson | jq -r '.items[] | select(.status.reason!=null) | select(.status.reason | contains("Evicted")) | .metadata.name + " " + .metadata.namespace' | xargs -n2 -l bash -c 'kubectl delete pods $0 --namespace=$1'
10 | ```
11 |
12 | ### Automatic cleanup
13 | This is a cronjob that runs every 30 mins inside the cluster that will find and remove any pods with the status of "Evicted."
14 |
15 | ```bash
16 | kubectl apply -f deploy.yaml
17 | ```
18 |
19 | NOTE: This YAML uses the image `rancherlabs/swiss-army-knife`.
20 |
--------------------------------------------------------------------------------
/troubleshooting-scripts/README.md:
--------------------------------------------------------------------------------
1 | # Troubleshooting Scripts
2 |
3 | ## kube-scheduler
4 |
5 | ### Finding the current leader
6 |
7 | Command(s): `curl -s https://raw.githubusercontent.com/rancherlabs/support-tools/master/troubleshooting-scripts/kube-scheduler/find-leader.sh | bash`
8 |
9 | **Example Output**
10 |
11 | ```bash
12 | kube-scheduler is the leader on node a1ubk8slabl03
13 | ```
14 |
15 | ## determine-leader
16 |
17 | Command(s): `curl -s https://raw.githubusercontent.com/rancherlabs/support-tools/master/troubleshooting-scripts/determine-leader/rancher2_determine_leader.sh | bash`
18 |
19 | **Example Output**
20 |
21 | ```bash
22 | NAME POD-IP HOST-IP
23 | cattle-cluster-agent-776d795ff8-x77nq 10.42.0.93 10.10.100.83
24 | cattle-node-agent-4bsx6 10.10.100.83 10.10.100.83
25 | rancher-54d47dc9cf-d4qt9 10.42.0.92 10.10.100.83
26 | rancher-54d47dc9cf-prn4d 10.42.0.90 10.10.100.83
27 | rancher-54d47dc9cf-rsn4g 10.42.0.91 10.10.100.83
28 |
29 | rancher-54d47dc9cf-prn4d is the leader in this Rancher instance
30 | ```
31 |
--------------------------------------------------------------------------------
/rotate-tokens/README.md:
--------------------------------------------------------------------------------
1 | # rotate-tokens.sh
2 |
3 | This script is used to rotate the main service account and token for a Rancher
4 | downstream cluster. It may be used in the event of a known token exposure or as
5 | a routine preemptive measure.
6 |
7 | ## Usage
8 |
9 | Generate an API token in Rancher and use it to set the TOKEN environment
10 | variable. Set KUBECONFIG to point to your Rancher local cluster. Set
11 | RANCHER_SERVER to point to your Rancher service. The script can be run without
12 | any arguments. Example:
13 |
14 | ```
15 | export TOKEN=token-ccabc:xyz123
16 | export KUBECONFIG=/path/to/kubeconfig
17 | export RANCHER_SERVER=https://rancher.example.com
18 | ./rotate-tokens.sh
19 | ```
20 |
21 | For extra debugging information, run with DEBUG=y:
22 |
23 | ```
24 | DEBUG=y ./rotate-tokens.sh
25 | ```
26 |
27 | The script iterates over each downstream cluster sequentially. If you have many
28 | downstream clusters, this may take several minutes. Do not interrupt the script.
29 |
30 | The script generates kubeconfigs for each downstream cluster and stores them in
31 | `./kubeconfigs` in the current working directory. They can be removed with
32 | `rm -r kubeconfigs`.
33 |
--------------------------------------------------------------------------------
/rancher-crd/enumerate-resources/README.md:
--------------------------------------------------------------------------------
1 | # rancher-resource-enumerator
2 |
3 | Rancher Custom Resource enumeration script
4 |
5 | ## Dependencies
6 |
7 | * `kubectl`
8 | * Linux, MacOS or WSL2
9 |
10 | ## How to use
11 |
12 | * Download the script and save as: `rancher-resource-enumerator.sh`
13 | * Make sure the script is executable: `chmod u+x ./rancher-resource-enumerator.sh`
14 | * Run the script: `./rancher-resource-enumerator.sh -a`
15 |
16 | The script will output all Rancher custom resource data in the `/tmp/enum-cattle-resources-` directory by default. The `totals` file will give the total count for all resources.
17 |
18 | ## Flags
19 |
20 | ```
21 | Rancher Resource Enumerator
22 | Usage: ./rancher-resource-enumerator.sh [ -d -n | -c | -a ]
23 | -h Display this help message.
24 | -a Enumerate all custom resources.
25 | -n Only enumerate resources in the specified namespace(s).
26 | -c Only enumerate cluster (non-namespaced) resources.
27 | -d Path to output directory (default: /tmp/enum-cattle-resources-).
28 | ```
29 |
--------------------------------------------------------------------------------
/charts/systems-information/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | name: systems-info
3 | description: Rancher Systems Info
4 |
5 | # A chart can be either an 'application' or a 'library' chart.
6 | #
7 | # Application charts are a collection of templates that can be packaged into versioned archives
8 | # to be deployed.
9 | #
10 | # Library charts provide useful utilities or functions for the chart developer. They're included as
11 | # a dependency of application charts to inject those utilities and functions into the rendering
12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed.
13 | type: application
14 |
15 | # This is the chart version. This version number should be incremented each time you make changes
16 | # to the chart and its templates, including the app version.
17 | version: 0.0.1
18 |
19 | # This is the version number of the application being deployed. This version number should be
20 | # incremented each time you make changes to the application.
21 | appVersion: 0.0.1
22 |
23 | keywords:
24 | - rancher
25 | - support
26 | home: https://support.rancher.com
27 | sources:
28 | - https://github.com/rancherlabs/systems-info
29 | maintainers:
30 | - name: mattmattox
31 | email: matt.mattox@suse.com
32 | icon: https://rancher.com/img/brand-guidelines/assets/logos/png/color/rancher-logo-stacked-color.png
33 |
--------------------------------------------------------------------------------
/cleanup-etcd-part-files/README.md:
--------------------------------------------------------------------------------
1 | # Workaround ETCD Snapshots Part Files Issue
2 | To workaround issue [gh-30662](https://github.com/rancher/rancher/issues/30662) please select one of the following deployment options.
3 |
4 | ## Option A - cleanup file temp files
5 | This script runs on each etcd node in a while true loop every 5 minutes looking for leftover part files. If it finds part files older than 15 minutes, it will delete them. This is to prevent deleting a part file that is currently in-use.
6 |
7 | ### Changes to restore process
8 | None, the restore process is unchanged.
9 |
10 | ### Installation
11 | ```
12 | kubectl apply -f delete-part-files.yaml
13 | ```
14 |
15 | ## Option B - alternative s3 snapshots
16 | This script replaces the recurring snapshot functionality in RKE with a Kubernetes job that runs every 12 hours.
17 |
18 | ### Changes to restore process
19 | - You will need to manually take a new snapshot
20 | - Download the snapshot from S3 on all etcd nodes
21 | - Rename the old snapshot to the new snapshot filename
22 | - Restore the S3 snapshot in Rancher UI by selecting the new snapshot name
23 |
24 | ### Installation
25 | - Disable recurring snapshots in Rancher/RKE
26 | - At a minimum, `alt-s3-sync.yaml` must be modified (remember to base64 the values) to reflect the s3 details
27 | ```
28 | kubectl apply -f alt-s3-sync.yaml
29 | ```
30 |
--------------------------------------------------------------------------------
/bad-ingress-scanner/bad-ingress.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | apiVersion: networking.k8s.io/v1
3 | kind: Ingress
4 | metadata:
5 | name: test-01
6 | spec:
7 | rules:
8 | - host: test-01.example.com
9 | http:
10 | paths:
11 | - backend:
12 | service:
13 | name: test-01-example-com
14 | port:
15 | number: 80
16 | path: /
17 | pathType: Prefix
18 | ---
19 | apiVersion: networking.k8s.io/v1
20 | kind: Ingress
21 | metadata:
22 | name: test-02
23 | spec:
24 | rules:
25 | - host: test-02.example.com
26 | http:
27 | paths:
28 | - backend:
29 | service:
30 | name: test-02-example-com
31 | port:
32 | number: 80
33 | path: /
34 | pathType: Prefix
35 | tls:
36 | - hosts:
37 | - test-02.example.com
38 | secretName: test-02-example-com
39 | ---
40 | apiVersion: networking.k8s.io/v1
41 | kind: Ingress
42 | metadata:
43 | name: test-02-dup
44 | spec:
45 | rules:
46 | - host: test-02.example.com
47 | http:
48 | paths:
49 | - backend:
50 | service:
51 | name: test-02-example-com
52 | port:
53 | number: 80
54 | path: /
55 | pathType: Prefix
56 | tls:
57 | - hosts:
58 | - test-02.example.com
59 | secretName: test-02-example-com
60 |
--------------------------------------------------------------------------------
/rancher-metadata-syncer/deployment-configmap.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | apiVersion: apps/v1
3 | kind: Deployment
4 | metadata:
5 | labels:
6 | app: rancher-metadata
7 | name: rancher-metadata
8 | namespace: cattle-system
9 | spec:
10 | replicas: 1
11 | selector:
12 | matchLabels:
13 | app: rancher-metadata
14 | template:
15 | metadata:
16 | labels:
17 | app: rancher-metadata
18 | spec:
19 | containers:
20 | - image: rancher/metadata-syncer:latest
21 | imagePullPolicy: IfNotPresent
22 | name: rancher-metadata
23 | livenessProbe:
24 | httpGet:
25 | path: /healthz
26 | port: 80
27 | initialDelaySeconds: 3
28 | periodSeconds: 3
29 | readinessProbe:
30 | httpGet:
31 | path: /healthz
32 | port: 80
33 | initialDelaySeconds: 5
34 | periodSeconds: 5
35 | volumeMounts:
36 | - mountPath: /data
37 | name: metadata
38 | volumes:
39 | - configMap:
40 | defaultMode: 256
41 | name: rancher-metadata
42 | optional: false
43 | name: metadata
44 | ---
45 | apiVersion: v1
46 | kind: Service
47 | metadata:
48 | name: rancher-metadata
49 | namespace: cattle-system
50 | spec:
51 | selector:
52 | app: rancher-metadata
53 | ports:
54 | - protocol: TCP
55 | port: 80
56 | targetPort: 80
57 |
--------------------------------------------------------------------------------
/eks-upgrade-using-kubectl/README.md:
--------------------------------------------------------------------------------
1 | # SURE-5880 Support Script
2 |
3 | ## Purpose
4 |
5 | This script is designed to be used to upgrade EKS clusters using kubectl. Its been specifically designed for Rancher v2.6.10 and upgrading EKS clusters from 1.22 to 1.23 (whilst a UI issue prevents this).
6 |
7 | ## Requirements
8 |
9 | This script requires the following:
10 |
11 | - jq
12 | - kubectl
13 |
14 | ## Usage
15 |
16 | 1. Open a terminal
17 | 2. Export environment variables for the path to the kubeconfig for your Rancher cluster
18 |
19 | ```bash
20 | export RANCHER_KUBE=""
21 | ```
22 |
23 | ### Upgrading EKS Clusters
24 |
25 | 1. Get a list of your EKS clusters using this command
26 |
27 | ```bash
28 | # For v2
29 | ./eks-support.sh list -k $RANCHER_KUBE
30 | # For v1
31 | ./eks-support.sh list -k $RANCHER_KUBE --kev1
32 | ```
33 |
34 | 2. For each EKS cluster you want to upgrade run the following command:
35 |
36 | ```bash
37 | # For v2
38 | ./eks-support.sh upgrade -k $RANCHER_KUBE --from 1.22 --to 1.23 --nname
39 | # For v1
40 | ./eks-support.sh upgrade -k $RANCHER_KUBE --from 1.22 --to 1.23 --name --kev1
41 | ```
42 |
43 | > Replace the values of --from, --to and --name with your values.
44 |
45 | ### Unsetting Node Groups as managed fields for imported EKS Clusters (only for KEv2)
46 |
47 | ```bash
48 | # For v2
49 | ./eks-support.sh unset_nodegroups -k $RANCHER_KUBE --name
50 | ```
51 |
--------------------------------------------------------------------------------
/adjust-downstream-webhook/README.md:
--------------------------------------------------------------------------------
1 | # Adjust downstream webhook
2 | This script adjusts the version of the rancher-webhook release in downstream clusters.
3 | It decides what to do with the webhook deployment in each downstream cluster based on Rancher server version.
4 |
5 | ## Background
6 | The `rancher-webhook` chart is deployed in downstream clusters beginning with Rancher v2.7.2.
7 | On a rollback from a version >=2.7.2 to a version <2.7.2, the webhook will stay in the downstream clusters.
8 | Since each version of the webhook is one-to-one compatible with a specific version of Rancher, this can result in unexpected behavior.
9 |
10 | ## Usage
11 |
12 | ```bash
13 | ## Create a token through the UI. The token should have no scope and be made for a user who is a global admin.
14 | read -s RANCHER_TOKEN && export RANCHER_TOKEN
15 | ## The server URL for Rancher - you can get this value in the "server-url" setting. You can find it by going to Global Settings => Settings => server-url. The example format should be: https://rancher-test.home
16 | read -s RANCHER_URL && export RANCHER_URL
17 | bash adjust-downstream-webhook.sh
18 | ```
19 | For Rancher setups using self-signed certificates, you can specify `--insecure-skip-tls-verify` to force the script to
20 | ignore TLS certificate verification. Note that this option is insecure, and should be avoided for production setups.
21 |
22 | ## Notes
23 | This script should be run after rolling back Rancher to the desired version
24 | (for example, when going from v2.7.2 to v2.7.0, only run this script after v2.7.0 is running).
25 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/RBAC-role-collector/README.md:
--------------------------------------------------------------------------------
1 | # Rancher 2.x RBAC role collector
2 |
3 | This project was created to collect RABC roles in a Kubernetes cluster to assist troubleshooting
4 |
5 | ## Usage
6 |
7 | 1. Download the script to a location from where you can run `kubectl` against the intended cluster, and save it as: `role-dump.sh`
8 | `curl -OLs https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/RBAC-role-collector/role-dump.sh`
9 | 2. Set kubectl context to the cluster where you see the issue you are investigating. You will likely want to run this against the Rancher local cluster as well as the downstream cluster where you see the issues
10 | 3. Run the script `bash ./role-dump.sh`
11 |
12 | ### What is collected
13 |
14 | The output will contain:
15 |
16 | - JSON files for each role type (in the following list) containing all the roles in the cluster
17 | - Listing (`rolebindings.list`) of all the rolebindings ordered by type
18 | - A tar.gz file that can be provided to support, an uncompressed directory will remain with all the data gathered for your inspection
19 |
20 | Having this information and a list of the user IDs of any users affected by the issue can help in troubleshooting.
21 |
22 | #### CRDs collected:
23 |
24 | ```
25 | clusterroletemplatebindings
26 | globalrolebindings
27 | globalroles
28 | projectroletemplatebindings
29 | roletemplates.management.cattle.io
30 | roletemplatebindings
31 | clusterrolebindings
32 | clusterroles
33 | roletemplates.rancher.cattle.io
34 | rolebindings
35 | roles
36 | ```
--------------------------------------------------------------------------------
/collection/rancher/v2.x/rancher-pod-collector/README.md:
--------------------------------------------------------------------------------
1 | # Rancher v2.x rancher-pod-collector
2 |
3 | This project was created to collect output for the Rancher installation in a Rancher Management (local) cluster when troubleshooting support cases
4 |
5 | This script needs to be downloaded and run on one of the following locations:
6 |
7 | - A server or workstation with kubectl access to the Rancher Management (local) cluster
8 | - Directly on one of the cluster nodes using the `root` user or using `sudo`
9 | - As a k8s deployment on the local cluster
10 |
11 | ## Usage
12 |
13 | - Download the script and save as: `rancher-pod-collector.sh`
14 | - Make sure the script is executable: `chmod +x rancher-pod-collector.sh`
15 | - Run the script: `./rancher-pod-collector.sh`
16 |
17 | Output will be written to `/tmp` as a tar.gz archive named `-.tar.gz`, the default output directory can be changed with the `-d` flag.
18 |
19 | ## Flags
20 |
21 | ```
22 | Rancher Pod Collector
23 | Usage: rancher-pod-collector.sh [ -d -k KUBECONFIG -t -w -f ]
24 |
25 | All flags are optional.
26 |
27 | -d Output directory for temporary storage and .tar.gz archive (ex: -d /var/tmp)
28 | -k Override the kubeconfig (ex: ~/.kube/custom)
29 | -t Enable trace logs
30 | -w Live tailing Rancher logs
31 | -f Force log collection if the minimum space isn't available."
32 | ```
33 |
34 | ## Important disclaimer
35 |
36 | The flag `-t` will enables trace logging. This can capture sensitive information about your Rancher install, including but not limited to usernames, passwords, encryption keys, etc.
37 |
--------------------------------------------------------------------------------
/bad-ingress-scanner/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | echo "####################################################################"
4 | echo "Scanning ingress controllers..."
5 | for ingressPod in `kubectl -n ingress-nginx get pods -l app=ingress-nginx -o name | awk -F'/' '{print $2}'`
6 | do
7 | echo "Pod: $ingressPod"
8 | kubectl -n ingress-nginx logs "$ingressPod" | grep 'Error obtaining Endpoints for Service' | awk -F '"' '{print $2}' > ./bad-endpoints.list
9 | kubectl -n ingress-nginx logs "$ingressPod" | grep 'Error getting SSL certificate' | awk -F '"' '{print $2}' > ./bad-certs.list
10 | done
11 | echo "####################################################################"
12 | echo "Sorting and removing duplicates from lists..."
13 | cat ./bad-endpoints.list | sort | uniq > ./bad-endpoints.list2
14 | mv ./bad-endpoints.list2 ./bad-endpoints.list
15 | cat ./bad-certs.list | sort | uniq > ./bad-certs.list2
16 | mv ./bad-certs.list2 ./bad-certs.list
17 |
18 | if [[ ! -z `cat ./bad-endpoints.list` ]]
19 | then
20 | echo "####################################################################"
21 | echo "Found bad endpoints."
22 | cat ./bad-endpoints.list
23 | else
24 | echo "####################################################################"
25 | echo "No bad endpoints found."
26 | fi
27 |
28 | if [[ ! -z `cat ./bad-certs.list` ]]
29 | then
30 | echo "####################################################################"
31 | echo "Found bad certs."
32 | cat ./bad-certs.list
33 | else
34 | echo "####################################################################"
35 | echo "No bad endpoints found."
36 | fi
37 |
--------------------------------------------------------------------------------
/fleet-secrets-bro-patch/patch_gitrepo_secrets.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | DRYRUN=0
4 |
5 | while getopts "D" opt; do
6 | case $opt in
7 | D) DRYRUN=1;;
8 | \?) echo "Invalid option: -$OPTARG"; exit 1;;
9 | esac
10 | done
11 |
12 | output=$(kubectl get gitrepo -A -o custom-columns=NAMESPACE:.metadata.namespace,CLIENT:.spec.clientSecretName,HELM:.spec.helmSecretName,HELMPATHS:.spec.helmSecretNameForPaths --no-headers)
13 |
14 | secret_combinations=()
15 | while read -r row; do
16 | # Extract the namespace and potential secret names from each row
17 | namespace=$(echo "$row" | awk '{print $1}')
18 | read -r -a secrets <<< "$(echo "$row" | awk '{print $2, $3, $4}')"
19 | # Create a list of secret combinations for this namespace
20 | for secret in "${secrets[@]}"; do
21 | if [ "$secret" != "" ]; then
22 | secret_combinations+=("$namespace:$secret")
23 | fi
24 | done
25 | done <<< "$(echo "$output" | awk '{print $0}')"
26 |
27 | # Sort and uniq the list of secret combinations
28 | sorted_secret_combinations=($(printf "%s\n" "${secret_combinations[@]}" | sort -u))
29 |
30 | echo "Patching unique secret combinations:"
31 | for combination in "${sorted_secret_combinations[@]}"; do
32 | # Set the delimiter
33 | IFS=':'
34 | # Read the input string into two variables
35 | read -r namespace name <<< "$combination"
36 | if [ $DRYRUN -eq 1 ]; then
37 | echo "[DRY-RUN] Would patch secret: $namespace/$name"
38 | else
39 | echo "Patching secret: $combination"
40 | kubectl patch secret -n "$namespace" "$name" -p '{"metadata": {"labels": {"fleet.cattle.io/managed": "true"}}}'
41 | fi
42 | done
--------------------------------------------------------------------------------
/charts/systems-info/templates/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: batch/v1beta1
2 | kind: CronJob
3 | metadata:
4 | name: systems-info
5 | spec:
6 | schedule: {{ .Values.schedule | quote }}
7 | jobTemplate:
8 | spec:
9 | template:
10 | spec:
11 | containers:
12 | - env:
13 | - name: from_address
14 | value: {{ .Values.from_address | quote }}
15 | - name: rancher_name
16 | value: {{ .Values.rancher_name | quote }}
17 | - name: send_to_support
18 | value: {{ .Values.send_to_support | quote }}
19 | - name: smtp_host
20 | value: {{ .Values.smtp_host | quote }}
21 | - name: smtp_port
22 | value: {{ .Values.smtp_port | quote }}
23 | - name: to_address
24 | value: {{ .Values.to_address | quote }}
25 | - name: smtp_user
26 | valueFrom:
27 | secretKeyRef:
28 | key: smtp_user
29 | name: mail-config
30 | optional: false
31 | - name: smtp_pass
32 | valueFrom:
33 | secretKeyRef:
34 | key: smtp_pass
35 | name: mail-config
36 | optional: false
37 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
38 | imagePullPolicy: {{ .Values.image.pullPolicy }}
39 | name: system-information
40 | dnsPolicy: ClusterFirst
41 | restartPolicy: Never
42 | schedulerName: default-scheduler
43 | serviceAccountName: systems-info
44 |
--------------------------------------------------------------------------------
/rancher-metadata-syncer/deployment-proxy.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | apiVersion: apps/v1
3 | kind: Deployment
4 | metadata:
5 | labels:
6 | app: rancher-metadata
7 | name: rancher-metadata
8 | namespace: cattle-system
9 | spec:
10 | replicas: 1
11 | selector:
12 | matchLabels:
13 | app: rancher-metadata
14 | template:
15 | metadata:
16 | labels:
17 | app: rancher-metadata
18 | spec:
19 | containers:
20 | - env:
21 | - name: HTTPS_PROXY
22 | value: https://:@:/
23 | - name: HTTP_PROXY
24 | value: http://:@:/
25 | image: rancher/metadata-syncer:latest
26 | imagePullPolicy: IfNotPresent
27 | livenessProbe:
28 | failureThreshold: 3
29 | httpGet:
30 | path: /healthz
31 | port: 80
32 | scheme: HTTP
33 | initialDelaySeconds: 3
34 | periodSeconds: 3
35 | successThreshold: 1
36 | timeoutSeconds: 1
37 | name: rancher-metadata
38 | readinessProbe:
39 | failureThreshold: 3
40 | httpGet:
41 | path: /healthz
42 | port: 80
43 | scheme: HTTP
44 | initialDelaySeconds: 5
45 | periodSeconds: 5
46 | successThreshold: 1
47 | timeoutSeconds: 1
48 |
49 | ---
50 | apiVersion: v1
51 | kind: Service
52 | metadata:
53 | name: rancher-metadata
54 | namespace: cattle-system
55 | spec:
56 | selector:
57 | app: rancher-metadata
58 | ports:
59 | - protocol: TCP
60 | port: 80
61 | targetPort: 80
62 |
--------------------------------------------------------------------------------
/charts/systems-information/templates/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: batch/v1beta1
2 | kind: CronJob
3 | metadata:
4 | name: systems-info
5 | spec:
6 | schedule: {{ .Values.schedule | quote }}
7 | jobTemplate:
8 | spec:
9 | template:
10 | spec:
11 | containers:
12 | - env:
13 | - name: from_address
14 | value: {{ .Values.from_address | quote }}
15 | - name: rancher_name
16 | value: {{ .Values.rancher_name | quote }}
17 | - name: send_to_support
18 | value: {{ .Values.send_to_support | quote }}
19 | - name: smtp_host
20 | value: {{ .Values.smtp_host | quote }}
21 | - name: smtp_port
22 | value: {{ .Values.smtp_port | quote }}
23 | - name: to_address
24 | value: {{ .Values.to_address | quote }}
25 | - name: smtp_user
26 | valueFrom:
27 | secretKeyRef:
28 | key: smtp_user
29 | name: mail-config
30 | optional: false
31 | - name: smtp_pass
32 | valueFrom:
33 | secretKeyRef:
34 | key: smtp_pass
35 | name: mail-config
36 | optional: false
37 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
38 | imagePullPolicy: {{ .Values.image.pullPolicy }}
39 | name: system-information
40 | dnsPolicy: ClusterFirst
41 | restartPolicy: Never
42 | schedulerName: default-scheduler
43 | serviceAccountName: systems-info
44 |
--------------------------------------------------------------------------------
/fleet-secrets-bro-patch/README.md:
--------------------------------------------------------------------------------
1 | # Fleet | GitRepo Secret Backup Restore Patch
2 |
3 | This is a patching script to ensure all secrets used by Fleet `GitRepos` are backed up by the Rancher Backups tool.
4 |
5 | From Rancher v2.8.?? (TBD) and v2.9.0 all `Secrets` created via the Fleet UI in Rancher will be included in Rancher Backups.
6 |
7 | Any GitRepo `Secrets` created before this, or outside of the Fleet UI in Rancher, will not be included in Rancher Backups.
8 |
9 | By running this patching script on your Rancher cluster, it will identify all secrets used by GitRepos and label them as managed by Fleet. This labeling ensures they are backed up by Rancher Backups.
10 |
11 | ## Running the script
12 | To run this script you simply need a valid KUBECONFIG to connect to your Rancher cluster. Then execute the shell script:
13 | > ./patch_gitrepo_secrets.sh
14 |
15 | When run you should see output similar to:
16 |
17 | ```bash
18 | # ./patch_gitrepo_secrets.sh
19 | Patching unique secret combinations:
20 | Patching secret: fleet-default:auth-helm-creds
21 | secret/auth-helm-creds patched
22 | Patching secret: fleet-local:auth-gitlab-creds
23 | secret/auth-gitlab-creds patched (no change)
24 | ```
25 |
26 | Note: If the secret already has the necessary label it will look like the `secret/auth-gitlab-creds` line above.
27 |
28 | ### Dry-run
29 | Optionally you can run the script with dry-run flag `-D`, it will produce output like:
30 | ```bash
31 | # ./patch_gitrepo_secrets.sh -D
32 | Patching unique secret combinations:
33 | Would patch secret: fleet-default/auth-6w5gn
34 | Would patch secret: fleet-default/auth-lfkdr
35 | Would patch secret: fleet-local/auth-gitlab-creds
36 | ```
--------------------------------------------------------------------------------
/cleanup-etcd-part-files/delete-part-files.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: DaemonSet
3 | metadata:
4 | name: cleanup-etcd
5 | namespace: kube-system
6 | labels:
7 | app: cleanup-etcd
8 | spec:
9 | selector:
10 | matchLabels:
11 | app: cleanup-etcd
12 | template:
13 | metadata:
14 | labels:
15 | app: cleanup-etcd
16 | spec:
17 | affinity:
18 | nodeAffinity:
19 | requiredDuringSchedulingIgnoredDuringExecution:
20 | nodeSelectorTerms:
21 | - matchExpressions:
22 | - key: beta.kubernetes.io/os
23 | operator: NotIn
24 | values:
25 | - windows
26 | - key: node-role.kubernetes.io/etcd
27 | operator: In
28 | values:
29 | - "true"
30 | containers:
31 | - args:
32 | - while true;
33 | do
34 | echo "Before cleanup";
35 | ls -lh /opt/rke/etcd-snapshots/;
36 | find /opt/rke/etcd-snapshots/ -mindepth 1 -type f -name *.* -mmin +15 | grep -v '.zip' | xargs rm -f;
37 | echo "Post cleanup";
38 | ls -lh /opt/rke/etcd-snapshots/;
39 | echo "Sleeping...";
40 | sleep 360;
41 | done;
42 | command:
43 | - /bin/sh
44 | - -c
45 | image: busybox
46 | name: cleanup-etcd
47 | volumeMounts:
48 | - mountPath: /opt/rke
49 | name: rke
50 | tolerations:
51 | - effect: NoExecute
52 | key: node-role.kubernetes.io/etcd
53 | operator: Equal
54 | value: "true"
55 | volumes:
56 | - hostPath:
57 | path: /opt/rke
58 | type: ""
59 | name: rke
60 |
--------------------------------------------------------------------------------
/bad-ingress-scanner/README.md:
--------------------------------------------------------------------------------
1 | [](https://hub.docker.com/r/rancher/bad-ingress-scanner)
2 | [](https://drone-publish.rancher.io/rancherlabs/support-tools)
3 |
4 | # Bad ingress scanner
5 | This tool is designed to scan for misbehaving ingresses. An example being an ingress that was deployed referencing a non-existent SSL cert or an ingress with an empty/missing backend service.
6 |
7 | ## Running report - remotely
8 | ```bash
9 | wget -o ingress-scanner.sh https://raw.githubusercontent.com/rancherlabs/support-tools/master/bad-ingress-scanner/run.sh
10 | chmod +x ./ingress-scanner.sh
11 | ./ingress-scanner.sh
12 | ```
13 |
14 | ## Running report - in-cluster
15 | ```bash
16 | kubectl -n ingress-nginx delete job ingress-scanner
17 | kubectl apply -f deployment.yaml
18 | kubectl -n ingress-nginx logs -l app=ingress-scanner
19 | ```
20 |
21 | ## Example output
22 | ```bash
23 | Pod: nginx-ingress-controller-r8kkz
24 | ####################################################################
25 | Found bad endpoints.
26 | default/ingress-75f627ce3d0ccd29dd268e0ab2b37008
27 | default/test-01-example-com
28 | default/test-02-example-com
29 | ####################################################################
30 | Found bad certs.
31 | default/test-01-example-com
32 | default/test-02-example-com
33 | ```
34 |
35 | ## Removing
36 | ```bash
37 | kubectl delete -f deployment.yaml
38 | ```
39 |
40 | ## Deploying test ingress rules
41 | Note: These rules are designed to be broken/invalid and are deployed to the default namespace.
42 | ```bash
43 | kubectl apply -f bad-ingress.yaml
44 | ```
45 |
46 | ## Removing test ingress rules
47 | ```bash
48 | kubectl delete -f bad-ingress.yaml
49 | ```
50 |
--------------------------------------------------------------------------------
/extended-rancher-2-cleanup/README.md:
--------------------------------------------------------------------------------
1 | ## Extended Rancher 2 Cleanup
2 |
3 | This script is designed to clean a node provisioned with the RKE1 distribution using Rancher or the RKE CLI.
4 |
5 | The node will be cleaned of all state to ensure it is consistent to reuse in a cluster or other use case.
6 |
7 | For [RKE2](https://docs.rke2.io/install/uninstall) and [K3s](https://rancher.com/docs/k3s/latest/en/installation/uninstall/) nodes, use the uninstall.sh script created during installation
8 |
9 | > **Warning** this script will delete all containers, volumes, network interfaces, and directories that relate to Rancher and Kubernetes. It will also flush all iptables rules and optionally delete container images.
10 |
11 | > It is important to perform pre-checks, and backup the node as needed before proceeding with any steps below.
12 |
13 | ### Running the script
14 |
15 | #### Download the script
16 | ```bash
17 | curl -LO https://github.com/rancherlabs/support-tools/raw/master/extended-rancher-2-cleanup/extended-cleanup-rancher2.sh
18 | ```
19 | #### Run the script as root, or prefix with sudo
20 | ```bash
21 | bash extended-cleanup-rancher2.sh
22 | ```
23 |
24 | ### Usage
25 |
26 | ```bash
27 | # bash extended-cleanup-rancher2.sh -h
28 | Rancher 2.x extended cleanup
29 | Usage: bash extended-cleanup-rancher2.sh [ -f -i -s ]
30 |
31 | All flags are optional
32 |
33 | -f | --skip-iptables Skip flush of iptables rules
34 | -i | --delete-images Cleanup all container images
35 | -s | --delete-snapshots Cleanup all etcd snapshots
36 | -h This help menu
37 |
38 | !! Warning, this script flushes iptables rules, removes containers, and all data specific to Kubernetes and Rancher
39 | !! Docker will be restarted when flushing iptables rules
40 | !! Backup data as needed before running this script
41 | !! Use at your own risk
42 | ```
43 |
--------------------------------------------------------------------------------
/reverse-rke-state-migrations/README.md:
--------------------------------------------------------------------------------
1 | # reverse-rke-state-migrations.sh
2 | This script can be used to reverse RKE cluster state migrations that are performed automatically by Rancher on all downstream RKE clusters as of releases `v2.7.14`, and `v2.8.5`. Running this script should only be necessary if you have upgraded to a Rancher version at or above the aforementioned versions and need to restore Rancher back to a version that is older than the aforementioned versions. For example, you're on `v2.8.0` and you take a backup of Rancher and then upgrade to `v2.8.5`, but then you restore Rancher from your backup. In this case, you'd have to use this script to reverse the RKE cluster state migrations that would have occurred during the upgrade to `v2.8.5`.
3 |
4 | ## Usage
5 | ⚠️ **WARNING:** Before running this script, please ensure that **you've backed up your downstream RKE clusters**. The script **will delete `full-cluster-state` secrets from downstream RKE clusters**.
6 |
7 | 1. Take backups of your downstream RKE clusters.
8 | 2. Ensure you have [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl), [jq](https://jqlang.github.io/jq/), and [yq](https://mikefarah.gitbook.io/yq/#install) installed.
9 | 3. Generate a Rancher API token and use it to set the `RANCHER_TOKEN` environment variable.
10 | 4. Run the script pointing to your Rancher server URL.
11 |
12 | ```shell
13 | export RANCHER_TOKEN=
14 | ./reverse-rke-state-migrations.sh --rancher-host
15 | ```
16 |
17 | This script will iterate over all downstream RKE clusters and, for each one, it will ensure that a `full-cluster-state` ConfigMap exists inside the cluster as is expected by older versions of RKE. After doing this successfully for each of the targeted clusters, the script will remove a ConfigMap from the local cluster that marks the original migration as complete since it will effectively have been reversed.
18 |
--------------------------------------------------------------------------------
/bad-ingress-scanner/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ServiceAccount
3 | metadata:
4 | name: ingress-scanner
5 | namespace: ingress-nginx
6 | ---
7 | kind: ClusterRole
8 | apiVersion: rbac.authorization.k8s.io/v1
9 | metadata:
10 | name: ingress-scanner
11 | namespace: ingress-nginx
12 | rules:
13 | - apiGroups:
14 | - ""
15 | resources:
16 | - pods
17 | - pods/log
18 | verbs:
19 | - get
20 | - list
21 | - apiGroups:
22 | - networking.k8s.io
23 | resources:
24 | - ingresses
25 | verbs:
26 | - list
27 | - watch
28 | - get
29 | ---
30 | apiVersion: rbac.authorization.k8s.io/v1
31 | kind: ClusterRoleBinding
32 | metadata:
33 | name: ingress-scanner
34 | roleRef:
35 | apiGroup: rbac.authorization.k8s.io
36 | kind: ClusterRole
37 | name: ingress-scanner
38 | subjects:
39 | - kind: ServiceAccount
40 | name: ingress-scanner
41 | namespace: ingress-nginx
42 | ---
43 | apiVersion: batch/v1
44 | kind: Job
45 | metadata:
46 | name: ingress-scanner
47 | namespace: ingress-nginx
48 | spec:
49 | backoffLimit: 10
50 | completions: 1
51 | parallelism: 1
52 | template:
53 | metadata:
54 | labels:
55 | app: ingress-scanner
56 | job-name: ingress-scanner
57 | spec:
58 | affinity:
59 | nodeAffinity:
60 | requiredDuringSchedulingIgnoredDuringExecution:
61 | nodeSelectorTerms:
62 | - matchExpressions:
63 | - key: beta.kubernetes.io/os
64 | operator: NotIn
65 | values:
66 | - windows
67 | - key: node-role.kubernetes.io/worker
68 | operator: Exists
69 | containers:
70 | - image: rancher/bad-ingress-scanner:latest
71 | imagePullPolicy: IfNotPresent
72 | name: ingress-scanner
73 | restartPolicy: Never
74 | serviceAccount: ingress-scanner
75 | serviceAccountName: ingress-scanner
76 | tolerations:
77 | - effect: NoExecute
78 | operator: Exists
79 | - effect: NoSchedule
80 | operator: Exists
81 |
--------------------------------------------------------------------------------
/NGINX-to-pods-check/README.md:
--------------------------------------------------------------------------------
1 | # NGINX-to-pods-check
2 | This script is designed to walk through all the ingresses in a cluster and test that it can curl the backend pods from the NGINX pods. This is mainly done to verify the overlay network is working along with checking the overall configuration.
3 |
4 | ## Run script
5 | ```
6 | curl https://raw.githubusercontent.com/rancherlabs/support-tools/master/NGINX-to-pods-check/check.sh | bash
7 | ```
8 |
9 | ## Example output
10 |
11 | ### Broken pod
12 |
13 | ```
14 | bash ./check.sh -F Table
15 | ####################################################
16 | Pod: webserver-bad-85cf9ccdf8-8v4mh
17 | PodIP: 10.42.0.252
18 | Port: 80
19 | Endpoint: ingress-1d8af467b8b7c9682fda18c8d5053db7
20 | Ingress: test-bad
21 | Ingress Pod: nginx-ingress-controller-b2s2d
22 | Node: a1ubphylbp01
23 | Status: Fail!
24 | ####################################################
25 | ```
26 |
27 | ```
28 | bash ./check.sh -F Inline
29 | Checking Pod webserver-bad-8v4mh PodIP 10.42.0.252 on Port 80 in endpoint ingress-bad for ingress test-bad from nginx-ingress-controller-b2s2d on node a1ubphylbp01 NOK
30 | ```
31 |
32 | ### Working pod
33 |
34 | ```
35 | bash ./check.sh -F Table
36 | ####################################################
37 | Pod: webserver-bad-85cf9ccdf8-8v4mh
38 | PodIP: 10.42.0.252
39 | Port: 80
40 | Endpoint: ingress-1d8af467b8b7c9682fda18c8d5053db7
41 | Ingress: test-bad
42 | Ingress Pod: nginx-ingress-controller-b2s2d
43 | Node: a1ubphylbp01
44 | Status: Pass!
45 | ####################################################
46 | ```
47 |
48 | ```
49 | bash ./check.sh -F Inline
50 | Checking Pod webserver-good-65644cffd4-gbpkj PodIP 10.42.0.251 on Port 80 in endpoint ingress-good for ingress test-good from nginx-ingress-controller-b2s2d on node a1ubphylbp01 OK
51 | ```
52 |
53 | ## Testing
54 |
55 | The following commands will deploy two workloads and ingresses. One that is working with a web server that is responding on port 80. And the other will have the webserver disabled, so it will fail to connect.
56 |
57 | ```
58 | kubectl apply -f https://raw.githubusercontent.com/rancherlabs/support-tools/master/NGINX-to-pods-check/example-deployment.yml
59 | ```
60 |
--------------------------------------------------------------------------------
/charts/systems-info/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/* vim: set filetype=mustache: */}}
2 | {{/*
3 | Expand the name of the chart.
4 | */}}
5 | {{- define "systems-information.name" -}}
6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
7 | {{- end -}}
8 |
9 | {{/*
10 | Create a default fully qualified app name.
11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
12 | If release name contains chart name it will be used as a full name.
13 | */}}
14 | {{- define "systems-information.fullname" -}}
15 | {{- if .Values.fullnameOverride -}}
16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
17 | {{- else -}}
18 | {{- $name := default .Chart.Name .Values.nameOverride -}}
19 | {{- if contains $name .Release.Name -}}
20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}}
21 | {{- else -}}
22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
23 | {{- end -}}
24 | {{- end -}}
25 | {{- end -}}
26 |
27 | {{/*
28 | Create chart name and version as used by the chart label.
29 | */}}
30 | {{- define "systems-information.chart" -}}
31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
32 | {{- end -}}
33 |
34 | {{/*
35 | Common labels
36 | */}}
37 | {{- define "systems-information.labels" -}}
38 | helm.sh/chart: {{ include "systems-information.chart" . }}
39 | {{ include "systems-information.selectorLabels" . }}
40 | {{- if .Chart.AppVersion }}
41 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
42 | {{- end }}
43 | app.kubernetes.io/managed-by: {{ .Release.Service }}
44 | {{- end -}}
45 |
46 | {{/*
47 | Selector labels
48 | */}}
49 | {{- define "systems-information.selectorLabels" -}}
50 | app.kubernetes.io/name: {{ include "systems-information.name" . }}
51 | app.kubernetes.io/instance: {{ .Release.Name }}
52 | {{- end -}}
53 |
54 | {{/*
55 | Create the name of the service account to use
56 | */}}
57 | {{- define "systems-information.serviceAccountName" -}}
58 | {{- if .Values.serviceAccount.create -}}
59 | {{ default (include "systems-information.fullname" .) .Values.serviceAccount.name }}
60 | {{- else -}}
61 | {{ default "default" .Values.serviceAccount.name }}
62 | {{- end -}}
63 | {{- end -}}
64 |
--------------------------------------------------------------------------------
/fleet-delete-cluster-registration/delete_old_resources.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | namespace=${1-fleet-default}
4 | chunk_size=${2-100}
5 |
6 | if [ "$chunk_size" -le 1 ]; then
7 | chunk_size=1
8 | fi
9 |
10 | # We output the cluster name first, then the creation timestamp, then the
11 | # resource name for ordering to work by cluster, then by ascending creation
12 | # timestamp, which is in "YYYY-MM-DDTHH:mm:SSZ" format.
13 | jsonPath='{range .items[*]}{@.status.clusterName}{"_"}{@.metadata.creationTimestamp}{"_"}{@.metadata.name}{"\n"}{end}'
14 | cluster_regs=$(kubectl get clusterregistration -o=jsonpath="$jsonPath" -n "$namespace" | sort)
15 |
16 | read -ra regs -d '' <<< "${cluster_regs}"
17 |
18 | # delete_chunk deletes cluster registrations, extracting their names from $regs
19 | # This function operates on set of indexes between first_idx (first argument)
20 | # and last_chunk_idx (second argument), both included.
21 | delete_chunk() {
22 | first_idx=$1
23 | last_idx=$2
24 |
25 | for (( i = first_idx; i < last_idx; i++ )); do
26 | IFS=_ read -r cluster_name creation_timestamp name <<< "${regs[i]}"
27 | IFS=_ read -r next_cluster_name next_creation_timestamp next_name <<< "${regs[i+1]}"
28 |
29 | if [[ "$next_cluster_name" = "$cluster_name" ]]; then
30 | # The most recent cluster registration is still ahead of us: deletion is safe.
31 | echo -n "Cluster: $cluster_name"
32 | echo -e "\t$(kubectl delete --ignore-not-found=true clusterregistration "$name" -n "$namespace")"
33 | fi
34 | done
35 | }
36 |
37 | declare -a pids
38 |
39 | # The only resource we do not want to delete for each cluster is the last
40 | # element, most recently created.
41 | last_idx=$(( ${#regs[@]} - 1 ))
42 | if [ $chunk_size -ge $last_idx ]; then
43 | chunk_size=$last_idx
44 | fi
45 |
46 | # Start an async deletion process for each chunk.
47 | for (( i = 0; i < last_idx; i+= chunk_size )); do
48 | last_chunk_idx=$(( i + chunk_size - 1 ))
49 | if [ $last_chunk_idx -ge $last_idx ]; then
50 | last_chunk_idx="$last_idx"
51 | fi
52 |
53 | delete_chunk $i $last_chunk_idx &
54 | pids[${i}]=$!
55 | done
56 |
57 | # wait for deletion to complete on all chunks.
58 | for pid in ${pids[@]}; do
59 | wait $pid
60 | done
61 |
--------------------------------------------------------------------------------
/eks-upgrade-using-api/README.md:
--------------------------------------------------------------------------------
1 | # SURE-5880 Support Script
2 |
3 | ## Purpose
4 |
5 | This script is designed to be used to upgrade EKS clusters using the Rancher API. Its been specifically designed for Rancher v2.6.10 and upgrading EKS clusters from 1.22 to 1.23 (whilst a UI issue prevents this).
6 |
7 | ## Requirements
8 |
9 | This script requires the following:
10 |
11 | - jq
12 | - cURL
13 | - Rancher API Endpoint
14 | - Rancher API Token
15 |
16 | ## Demo
17 |
18 | 
19 |
20 | ## Usage
21 |
22 | 1. Create an API key in Rancher.The key can be scoped per cluster or with no scope. Its easier to have no scope as you can use the same API key for all cluster upgrades.
23 | 2. Note down the **Bearer Token** API key
24 | 3. Note down the API Endpoint
25 | 2. Open a terminal
26 | 3. Export environment variables for the key and endpoint
27 |
28 | ```bash
29 | export RANCHER_TOKEN=""
30 | export RANCHER_API=""
31 | ```
32 |
33 | 4. Get a list of your EKS clusters using this command
34 |
35 | ```bash
36 | # For v2
37 | ./eks-support.sh list -t $RANCHER_TOKEN --endpoint $RANCHER_API
38 | # For v1
39 | ./eks-support.sh list -t $RANCHER_TOKEN --endpoint $RANCHER_API --kev1
40 | ```
41 |
42 | > The output will list all the found EKS clusters with their name, id, current version and state.
43 |
44 | ### Upgrading EKS Clusters
45 |
46 | 1. For each EKS cluster you want to upgrade run the following command:
47 |
48 | ```bash
49 | # For v2
50 | ./eks-support.sh upgrade -t $RANCHER_TOKEN --endpoint $RANCHER_API --from 1.22 --to 1.23 --name
51 | # For v1
52 | ./eks-support.sh upgrade -t $RANCHER_TOKEN --endpoint $RANCHER_API --from 1.22 --to 1.23 --name --aws-secret-key "" --kev1
53 | ```
54 |
55 | > Replace the values of --from, --to and --name with your values.
56 |
57 | 2. The cluster will start to upgrade. You can check the status of a specific cluster using this command:
58 |
59 | ```bash
60 | ./eks-support.sh status -t $RANCHER_TOKEN --endpoint $RANCHER_API --name richtest1
61 | ```
62 |
63 | ### Unsetting Node Groups as managed fields for imported EKS Clusters (only for KEv2)
64 |
65 | ```bash
66 | # For v2
67 | ./eks-support.sh unset_nodegroups -t $RANCHER_TOKEN --endpoint $RANCHER_API --name
68 | ```
69 |
70 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/RBAC-role-collector/role-dump.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 |
3 | # Troubleshooting Bash settings in case of unexpected failure
4 | # set -o errexit # Set to exit on error. Do not enable this unless running against upstream Rancher cluster
5 | # set -o xtrace # Set to output every line Bash runs as it runs the script
6 |
7 | # Unset variables used in the script to be safe
8 | unset crd cluster wd dir file role i
9 |
10 | _declare_variables () {
11 | # Role types to collect
12 | crd=(\
13 | clusterroletemplatebindings \
14 | globalrolebindings \
15 | globalroles \
16 | projectroletemplatebindings \
17 | roletemplates.management.cattle.io \
18 | roletemplatebindings \
19 | clusterrolebindings \
20 | clusterroles \
21 | roletemplates.rancher.cattle.io \
22 | rolebindings \
23 | roles
24 | )
25 |
26 | # Store filename friendly cluster name
27 | cluster=$(_slugify "$(kubectl config current-context)") #
28 |
29 | # Working directory
30 | wd="$cluster"_role-bindings_$(date +"%Y-%m-%d_%H_%M_%S")
31 | }
32 |
33 |
34 | # Slugify strings (replace any special characters with `-`)
35 | _slugify () {
36 | echo "$1" |
37 | iconv -t ascii//TRANSLIT |
38 | sed -r s/[^a-zA-Z0-9]+/-/g |
39 | sed -r s/^-+\|-+$//g |
40 | tr A-Z a-z
41 | }
42 |
43 | # Generate a list (`rolebindings.list`) of all the role bindings and template bindings in the cluster
44 | _list_rolebindings () {
45 | for i in ${crd[*]} ; do
46 | echo "Listing $i"
47 | printf "\n\n# $i\n" >> "$wd"/rolebindings.list
48 | kubectl get $i -A >> "$wd"/rolebindings.list
49 | done
50 | }
51 |
52 | # Generate a JSON per role type containing all the rolebindings
53 | _get_rolebindings () {
54 | for i in ${crd[*]} ; do
55 | echo "Getting $i JSON"
56 | file=$(_slugify "$i")
57 | kubectl get "$i" -A -o json > "$wd"/"$file".json
58 | done
59 | }
60 |
61 | # Archive and compress the report
62 | _tarball_wd () {
63 | echo "Compressing $wd"
64 | tar -czvf "$wd".tar.gz "$wd"
65 | }
66 |
67 |
68 | # Runs all the things
69 | main () {
70 | _declare_variables
71 | # Create working directory
72 | if [[ ! -e "$wd" ]]; then
73 | mkdir "$wd"
74 | fi
75 | _list_rolebindings >& "$wd"/list.log
76 | _get_rolebindings >& "$wd"/rolebindings.log
77 | _tarball_wd
78 | }
79 |
80 | # ACTUALLY run all the things
81 | main
82 |
--------------------------------------------------------------------------------
/NGINX-to-pods-check/example-deployment.yml:
--------------------------------------------------------------------------------
1 | ---
2 | apiVersion: apps/v1
3 | kind: Deployment
4 | metadata:
5 | labels:
6 | app: webserver-good
7 | name: webserver-good
8 | spec:
9 | selector:
10 | matchLabels:
11 | app: webserver-good
12 | template:
13 | metadata:
14 | labels:
15 | app: webserver-good
16 | spec:
17 | containers:
18 | - image: httpd
19 | name: webserver-good
20 |
21 | ---
22 | apiVersion: apps/v1
23 | kind: Deployment
24 | metadata:
25 | labels:
26 | app: webserver-bad
27 | name: webserver-bad
28 | spec:
29 | selector:
30 | matchLabels:
31 | app: webserver-bad
32 | template:
33 | metadata:
34 | labels:
35 | app: webserver-bad
36 | spec:
37 | containers:
38 | - args:
39 | - while true; do sleep 100000; done;
40 | command:
41 | - /bin/sh
42 | - -c
43 | image: httpd
44 | imagePullPolicy: Always
45 | name: webserver-bad
46 |
47 | ---
48 | apiVersion: v1
49 | kind: Service
50 | metadata:
51 | labels:
52 | app: webserver-good
53 | name: webserver-good
54 | spec:
55 | ports:
56 | - name: "80"
57 | port: 80
58 | targetPort: 80
59 | selector:
60 | app: webserver-good
61 |
62 | ---
63 | apiVersion: v1
64 | kind: Service
65 | metadata:
66 | labels:
67 | app: webserver-bad
68 | name: webserver-bad
69 | spec:
70 | ports:
71 | - name: "80"
72 | port: 80
73 | targetPort: 80
74 | selector:
75 | app: webserver-bad
76 |
77 | ---
78 | apiVersion: networking.k8s.io/v1
79 | kind: Ingress
80 | metadata:
81 | name: webserver-good
82 | spec:
83 | rules:
84 | - host: webserver-good.local
85 | http:
86 | paths:
87 | - backend:
88 | service:
89 | name: webserver-good
90 | port:
91 | number: 80
92 | path: /
93 | pathType: ImplementationSpecific
94 | ---
95 | apiVersion: networking.k8s.io/v1
96 | kind: Ingress
97 | metadata:
98 | name: webserver-bad
99 | spec:
100 | rules:
101 | - host: webserver-bad.local
102 | http:
103 | paths:
104 | - backend:
105 | service:
106 | name: webserver-bad
107 | port:
108 | number: 80
109 | path: /
110 | pathType: ImplementationSpecific
111 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/logs-collector/collection-details.md:
--------------------------------------------------------------------------------
1 | # Collection Details
2 |
3 | ## Overview
4 | This document provides transparency about the output collected when running the logs collector script. The collection is designed to gather necessary troubleshooting information while respecting privacy and security concerns
5 |
6 | Where possible output from the collection is sanitized, however we recommend you check a log collection and remove or edit any sensitive data
7 |
8 | ### Node-level collection
9 |
10 | Output that is collected only from the node where the logs collector script is run
11 |
12 | #### Operating System
13 | - General OS configuration, for example: the hostname, resources, process list, service list, packages, limits and tunables
14 | - Networking, iptables, netstat, interfaces, CNI configuration
15 | - Journalctl output for related services if available, a list of services is listed in [the `JOURNALD_LOGS` variable](https://github.com/rancherlabs/support-tools/blob/master/collection/rancher/v2.x/logs-collector/rancher2_logs_collector.sh#L12)
16 | - OS logs from /var/logs, a list of log files is listed in [the `VAR_LOG_FILES` variable](https://github.com/rancherlabs/support-tools/blob/master/collection/rancher/v2.x/logs-collector/rancher2_logs_collector.sh#L15)
17 |
18 | #### Kubernetes
19 | - Distribution logs, for example rke2 and k3s agent/server journalctl logs
20 | - Distribution configuration, rke2 and k3s configuration files, static pod manifests
21 | - Container runtime logs and configuration, containerd or docker
22 |
23 | ### Cluster-level collection
24 |
25 | Output that is collected from the cluster
26 |
27 | Note, pod logs from other nodes and additional kubectl output can only be collected when running on a control plane/server node
28 |
29 | #### Kubernetes
30 | - Kubernetes control plane and worker component configuration and logs, for example: kubelet etcd, kube-apiserver
31 | - Kubernetes pod logs from related namespaces, a list of namespaces is listed in [the `SYSTEM_NAMESPACE` variable](https://github.com/rancherlabs/support-tools/blob/master/collection/rancher/v2.x/logs-collector/rancher2_logs_collector.sh#L6) located in the script
32 | - Directory listings, for example: rke2 manifests directory, SSL certificates, etcd snapshots
33 |
34 | #### Kubectl output
35 | - Kubectl list of nodes, pods, services, RBAC roles, persistent volumes, events, ingress and deployments
36 | - Cluster provisioning CRD objects
--------------------------------------------------------------------------------
/charts/systems-info/questions.yml:
--------------------------------------------------------------------------------
1 | labels:
2 | io.cattle.role: project # options are cluster/project
3 | categories:
4 | - Support
5 | Name: systems-info
6 | Namespace: systems-info
7 | questions:
8 | - variable: defaultImage
9 | default: true
10 | description: "Use default Docker image"
11 | label: Use Default Image
12 | type: boolean
13 | show_subquestion_if: false
14 | group: "Container Images"
15 | subquestions:
16 | - variable: image.repository
17 | default: "docker.io/rancher/systems-info"
18 | description: "Docker image repository"
19 | type: string
20 | label: Image Repository
21 | - variable: image.tag
22 | default: "v0.0.1"
23 | description: "Docker image tag"
24 | type: string
25 | label: Image Tag
26 | - variable: schedule
27 | required: true
28 | default: '0 0 * * 1'
29 | description: "Backup schedule in crontab format"
30 | type: string
31 | label: "Backup schedule"
32 | - variable: rancher_name
33 | default: "Rancher"
34 | description: "Your human readable for this install"
35 | type: string
36 | label: Rancher Name
37 | required: true
38 | group: "General Settings"
39 | - variable: to_address
40 | default: ""
41 | description: "Email address for sending report to"
42 | type: string
43 | label: To address
44 | required: true
45 | group: "General Settings"
46 | - variable: send_to_support
47 | default: "true"
48 | description: "Would you like this report sent to Rancher Support?"
49 | type: string
50 | label: Send report to Rancher Support
51 | required: false
52 | group: "General Settings"
53 | - variable: smtp_host
54 | default: ""
55 | description: "SMTP server hostname"
56 | type: string
57 | label: SMTP Server
58 | required: true
59 | group: "Mail Server Settings"
60 | - variable: smtp_port
61 | default: "587"
62 | description: "SMTP server port"
63 | type: string
64 | label: SMTP port
65 | required: true
66 | group: "Mail Server Settings"
67 | - variable: smtp_user
68 | default: ""
69 | description: "SMTP username"
70 | type: string
71 | label: SMTP username
72 | required: true
73 | group: "Mail Server Settings"
74 | - variable: smtp_pass
75 | default: ""
76 | description: "SMTP password"
77 | type: password
78 | label: SMTP password
79 | required: true
80 | group: "Mail Server Settings"
81 | - variable: from_address
82 | default: ""
83 | description: "From address"
84 | type: string
85 | label: From address
86 | required: true
87 | group: "Mail Server Settings"
88 |
--------------------------------------------------------------------------------
/charts/systems-information/questions.yml:
--------------------------------------------------------------------------------
1 | labels:
2 | io.cattle.role: project # options are cluster/project
3 | categories:
4 | - Support
5 | Name: systems-info
6 | Namespace: systems-info
7 | questions:
8 | - variable: defaultImage
9 | default: true
10 | description: "Use default Docker image"
11 | label: Use Default Image
12 | type: boolean
13 | show_subquestion_if: false
14 | group: "Container Images"
15 | subquestions:
16 | - variable: image.repository
17 | default: "docker.io/rancher/systems-info"
18 | description: "Docker image repository"
19 | type: string
20 | label: Image Repository
21 | - variable: image.tag
22 | default: "v0.0.1"
23 | description: "Docker image tag"
24 | type: string
25 | label: Image Tag
26 | - variable: schedule
27 | required: true
28 | default: '0 0 * * 1'
29 | description: "Backup schedule in crontab format"
30 | type: string
31 | label: "Backup schedule"
32 | - variable: rancher_name
33 | default: "Rancher"
34 | description: "Your human readable for this install"
35 | type: string
36 | label: Rancher Name
37 | required: true
38 | group: "General Settings"
39 | - variable: to_address
40 | default: ""
41 | description: "Email address for sending report to"
42 | type: string
43 | label: To address
44 | required: true
45 | group: "General Settings"
46 | - variable: send_to_support
47 | default: "true"
48 | description: "Would you like this report sent to Rancher Support?"
49 | type: string
50 | label: Send report to Rancher Support
51 | required: false
52 | group: "General Settings"
53 | - variable: smtp_host
54 | default: ""
55 | description: "SMTP server hostname"
56 | type: string
57 | label: SMTP Server
58 | required: true
59 | group: "Mail Server Settings"
60 | - variable: smtp_port
61 | default: "587"
62 | description: "SMTP server port"
63 | type: string
64 | label: SMTP port
65 | required: true
66 | group: "Mail Server Settings"
67 | - variable: smtp_user
68 | default: ""
69 | description: "SMTP username"
70 | type: string
71 | label: SMTP username
72 | required: true
73 | group: "Mail Server Settings"
74 | - variable: smtp_pass
75 | default: ""
76 | description: "SMTP password"
77 | type: password
78 | label: SMTP password
79 | required: true
80 | group: "Mail Server Settings"
81 | - variable: from_address
82 | default: ""
83 | description: "From address"
84 | type: string
85 | label: From address
86 | required: true
87 | group: "Mail Server Settings"
88 |
--------------------------------------------------------------------------------
/cleanup-etcd-part-files/alt-s3-sync.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | apiVersion: v1
3 | kind: Secret
4 | metadata:
5 | labels:
6 | app: alt-etcd-s3-sync
7 | name: etcd-s3
8 | namespace: kube-system
9 | type: Opaque
10 | data:
11 | # all values should be base64 encoded (ie: echo -n '123456' | base64)
12 | S3_ACCESS_KEY:
13 | S3_BUCKET_NAME:
14 | S3_BUCKET_REGION:
15 | S3_ENDPOINT:
16 | S3_FOLDER:
17 | S3_SECRET_KEY:
18 | ---
19 | apiVersion: apps/v1
20 | kind: Deployment
21 | metadata:
22 | labels:
23 | app: rolling-etcd-snapshots
24 | name: rolling-etcd-snapshots
25 | namespace: kube-system
26 | spec:
27 | replicas: 1
28 | selector:
29 | matchLabels:
30 | app: rolling-etcd-snapshots
31 | template:
32 | metadata:
33 | labels:
34 | app: rolling-etcd-snapshots
35 | spec:
36 | affinity:
37 | nodeAffinity:
38 | requiredDuringSchedulingIgnoredDuringExecution:
39 | nodeSelectorTerms:
40 | - matchExpressions:
41 | - key: beta.kubernetes.io/os
42 | operator: NotIn
43 | values:
44 | - windows
45 | - key: node-role.kubernetes.io/etcd
46 | operator: In
47 | values:
48 | - "true"
49 | containers:
50 | - args:
51 | - /opt/rke-tools/rke-etcd-backup
52 | - etcd-backup
53 | - save
54 | - --cacert
55 | - /etc/kubernetes/ssl/kube-ca.pem
56 | - --cert
57 | - /etc/kubernetes/ssl/kube-node.pem
58 | - --key
59 | - /etc/kubernetes/ssl/kube-node-key.pem
60 | - --s3-backup=true
61 | - --creation=12h
62 | - --retention=72h
63 | envFrom:
64 | - secretRef:
65 | name: etcd-s3
66 | optional: false
67 | image: rancher/rke-tools:v0.1.66
68 | imagePullPolicy: IfNotPresent
69 | name: rolling-etcd-snapshots
70 | volumeMounts:
71 | - mountPath: /backup
72 | name: rke-tools
73 | - mountPath: /etc/kubernetes
74 | name: k8s-certs
75 | hostNetwork: true
76 | tolerations:
77 | - effect: NoExecute
78 | key: node-role.kubernetes.io/etcd
79 | operator: Equal
80 | value: "true"
81 | volumes:
82 | - hostPath:
83 | path: /opt/rke/etcd-snapshots
84 | type: ""
85 | name: rke-tools
86 | - hostPath:
87 | path: /etc/kubernetes
88 | type: ""
89 | name: k8s-certs
90 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/systems-information-v2/deploy.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: rancher-systems-summary-pod
5 | namespace: cattle-system
6 | spec:
7 | serviceAccountName: rancher
8 | containers:
9 | - name: rancher-systems-summary
10 | image: rancherlabs/swiss-army-knife
11 | command: ["/bin/bash", "-c"]
12 | args:
13 | - |
14 | echo 'Rancher Systems Summary Report';
15 | echo '==============================';
16 | echo "Run on $(date)";
17 | echo;
18 | kubectl -n cattle-system get pods -l app=rancher;
19 | echo "Rancher version: $(kubectl get settings.management.cattle.io server-version --no-headers -o custom-columns=version:value)";
20 | echo "Rancher id: $(kubectl get settings.management.cattle.io install-uuid --no-headers -o custom-columns=id:value)";
21 | echo;
22 | kubectl get clusters.management.cattle.io -o custom-columns=Cluster\ Id:metadata.name,Name:spec.displayName,K8s\ Version:status.version.gitVersion,Provider:status.provider,Created:metadata.creationTimestamp,Nodes:status.appliedSpec.rancherKubernetesEngineConfig.nodes[*].address;
23 | CLUSTER_IDS=$(kubectl get cluster.management.cattle.io --no-headers -o custom-columns=id:metadata.name);
24 | for ID in $CLUSTER_IDS; do
25 | CLUSTER_NAME=$(kubectl get cluster.management.cattle.io ${ID} --no-headers -o custom-columns=name:spec.displayName);
26 | NODE_COUNT=$(kubectl get nodes.management.cattle.io -n ${ID} --no-headers 2>/dev/null | wc -l );
27 | ((TOTAL_NODE_COUNT += NODE_COUNT));
28 | echo;
29 | echo '--------------------------------------------------------------------------------';
30 | echo "Cluster: ${CLUSTER_NAME} (${ID})";
31 | kubectl get nodes.management.cattle.io -n ${ID} -o custom-columns=Node\ Id:metadata.name,Address:status.internalNodeStatus.addresses[*].address,etcd:spec.etcd,Control\ Plane:spec.controlPlane,Worker:spec.worker,CPU:status.internalNodeStatus.capacity.cpu,RAM:status.internalNodeStatus.capacity.memory,OS:status.internalNodeStatus.nodeInfo.osImage,Container\ Runtime\ Version:status.internalNodeStatus.nodeInfo.containerRuntimeVersion,Created:metadata.creationTimestamp;
32 | echo "Node count: ${NODE_COUNT}";
33 | done;
34 | echo '--------------------------------------------------------------------------------';
35 | echo "Total node count: ${TOTAL_NODE_COUNT}";
36 | restartPolicy: Never
37 |
--------------------------------------------------------------------------------
/eks-upgrade-using-api/common.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # This containers common functions for shell scripts. Its
4 | # meant to be source included into another script.
5 |
6 | ## HELPER FUNCS
7 |
8 | # Send a green message to stdout, followed by a new line
9 | say() {
10 | [ -t 1 ] && [ -n "$TERM" ] &&
11 | echo "$(tput setaf 2)[$MY_NAME]$(tput sgr0) $*" ||
12 | echo "[$MY_NAME] $*"
13 | }
14 |
15 | # Send a green message to stdout, without a trailing new line
16 | say_noln() {
17 | [ -t 1 ] && [ -n "$TERM" ] &&
18 | echo -n "$(tput setaf 2)[$MY_NAME]$(tput sgr0) $*" ||
19 | echo "[$MY_NAME] $*"
20 | }
21 |
22 | # Send a red message to stdout, followed by a new line
23 | say_err() {
24 | [ -t 2 ] && [ -n "$TERM" ] &&
25 | echo -e "$(tput setaf 1)[$MY_NAME] $*$(tput sgr0)" 1>&2 ||
26 | echo -e "[$MY_NAME] $*" 1>&2
27 | }
28 |
29 | # Send a yellow message to stdout, followed by a new line
30 | say_warn() {
31 | [ -t 1 ] && [ -n "$TERM" ] &&
32 | echo "$(tput setaf 3)[$MY_NAME] $*$(tput sgr0)" ||
33 | echo "[$MY_NAME] $*"
34 | }
35 |
36 | # Send a yellow message to stdout, without a trailing new line
37 | say_warn_noln() {
38 | [ -t 1 ] && [ -n "$TERM" ] &&
39 | echo -n "$(tput setaf 3)[$MY_NAME] $*$(tput sgr0)" ||
40 | echo "[$MY_NAME] $*"
41 | }
42 |
43 | # Exit with an error message and (optional) code
44 | # Usage: die [-c ]
45 | die() {
46 | code=1
47 | [[ "$1" = "-c" ]] && {
48 | code="$2"
49 | shift 2
50 | }
51 | say_err "$@"
52 | exit "$code"
53 | }
54 |
55 | # Exit with an error message if the last exit code is not 0
56 | ok_or_die() {
57 | code=$?
58 | [[ $code -eq 0 ]] || die -c $code "$@"
59 | }
60 |
61 | ## MAIN
62 | main() {
63 | if [ $# = 0 ]; then
64 | die "No command provided. Please use \`$0 help\` for help."
65 | fi
66 |
67 | # Parse main command line args.
68 | while [ $# -gt 0 ]; do
69 | case "$1" in
70 | -h | --help)
71 | cmd_help
72 | exit 1
73 | ;;
74 | -*)
75 | die "Unknown arg: $1. Please use \`$0 help\` for help."
76 | ;;
77 | *)
78 | break
79 | ;;
80 | esac
81 | shift
82 | done
83 |
84 | # $1 is now a command name. Check if it is a valid command and, if so,
85 | # run it.
86 | #
87 | declare -f "cmd_$1" >/dev/null
88 | ok_or_die "Unknown command: $1. Please use \`$0 help\` for help."
89 |
90 | cmd=cmd_$1
91 | shift
92 |
93 | # $@ is now a list of command-specific args
94 | #
95 | $cmd "$@"
96 | }
--------------------------------------------------------------------------------
/eks-upgrade-using-kubectl/common.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # This containers common functions for shell scripts. Its
4 | # meant to be source included into another script.
5 |
6 | ## HELPER FUNCS
7 |
8 | # Send a green message to stdout, followed by a new line
9 | say() {
10 | [ -t 1 ] && [ -n "$TERM" ] &&
11 | echo "$(tput setaf 2)[$MY_NAME]$(tput sgr0) $*" ||
12 | echo "[$MY_NAME] $*"
13 | }
14 |
15 | # Send a green message to stdout, without a trailing new line
16 | say_noln() {
17 | [ -t 1 ] && [ -n "$TERM" ] &&
18 | echo -n "$(tput setaf 2)[$MY_NAME]$(tput sgr0) $*" ||
19 | echo "[$MY_NAME] $*"
20 | }
21 |
22 | # Send a red message to stdout, followed by a new line
23 | say_err() {
24 | [ -t 2 ] && [ -n "$TERM" ] &&
25 | echo -e "$(tput setaf 1)[$MY_NAME] $*$(tput sgr0)" 1>&2 ||
26 | echo -e "[$MY_NAME] $*" 1>&2
27 | }
28 |
29 | # Send a yellow message to stdout, followed by a new line
30 | say_warn() {
31 | [ -t 1 ] && [ -n "$TERM" ] &&
32 | echo "$(tput setaf 3)[$MY_NAME] $*$(tput sgr0)" ||
33 | echo "[$MY_NAME] $*"
34 | }
35 |
36 | # Send a yellow message to stdout, without a trailing new line
37 | say_warn_noln() {
38 | [ -t 1 ] && [ -n "$TERM" ] &&
39 | echo -n "$(tput setaf 3)[$MY_NAME] $*$(tput sgr0)" ||
40 | echo "[$MY_NAME] $*"
41 | }
42 |
43 | # Exit with an error message and (optional) code
44 | # Usage: die [-c ]
45 | die() {
46 | code=1
47 | [[ "$1" = "-c" ]] && {
48 | code="$2"
49 | shift 2
50 | }
51 | say_err "$@"
52 | exit "$code"
53 | }
54 |
55 | # Exit with an error message if the last exit code is not 0
56 | ok_or_die() {
57 | code=$?
58 | [[ $code -eq 0 ]] || die -c $code "$@"
59 | }
60 |
61 | ## MAIN
62 | main() {
63 | if [ $# = 0 ]; then
64 | die "No command provided. Please use \`$0 help\` for help."
65 | fi
66 |
67 | # Parse main command line args.
68 | while [ $# -gt 0 ]; do
69 | case "$1" in
70 | -h | --help)
71 | cmd_help
72 | exit 1
73 | ;;
74 | -*)
75 | die "Unknown arg: $1. Please use \`$0 help\` for help."
76 | ;;
77 | *)
78 | break
79 | ;;
80 | esac
81 | shift
82 | done
83 |
84 | # $1 is now a command name. Check if it is a valid command and, if so,
85 | # run it.
86 | #
87 | declare -f "cmd_$1" >/dev/null
88 | ok_or_die "Unknown command: $1. Please use \`$0 help\` for help."
89 |
90 | cmd=cmd_$1
91 | shift
92 |
93 | # $@ is now a list of command-specific args
94 | #
95 | $cmd "$@"
96 | }
--------------------------------------------------------------------------------
/change-nodetemplate-owner/README.md:
--------------------------------------------------------------------------------
1 | ## Update
2 | Note: As of Rancher v2.3.3 this should no longer be necessary.
3 | https://github.com/rancher/rancher/issues/12186
4 |
5 | ## Change node template owner
6 | This script will change your node template owner in Rancher 2.x. You can run this script as a Docker image or directly as a bash script. You'll need the cluster ID and the user ID you want to change the ownership to.
7 | 1. To obtain the cluster ID in the Rancher user interface, Navigate to Global> "Your Cluster Name"> then grab the cluster ID from your address bar. I have listed an example of the URL and a cluster ID derrived from the URL below.
8 | * Example URL: `https:///c/c-48x9z/monitoring`
9 | * Derrived cluster ID from above URL: **c-48x9z**
10 | 2. Now we need the user ID of the user to become the new node template owner, navigate to Global> Users> to find the ID.
11 | 3. To run the script using a docker image, make sure your $KUBECONFIG is set to the full path of your Rancher local cluster kube config then run the following command.
12 |
13 | ```bash
14 | docker run -ti -v $KUBECONFIG:/root/.kube/config patrick0057/change-nodetemplate-owner -c -n
15 | ```
16 | 4. To run the script directly, just download change-nodetemplate-owner.sh, make sure your $KUBECONFIG or ~/.kube/config is pointing to the correct Rancher local cluster then run the following command:
17 |
18 | ```bash
19 | curl -LO https://github.com/rancherlabs/support-tools/raw/master/change-nodetemplate-owner/change-nodetemplate-owner.sh
20 | ./change-nodetemplate-owner.sh -c -n
21 | ```
22 | ## Assign a node template to a cluster's node pool.
23 | Assign a node template to a cluster's node pool. This is useful for situations where the original owner of a cluster has been deleted which also deletes their node templates. To use this task successfully it is recommended that you create a new node template in the UI before
24 | using it. Make sure the node template matches the original ones as closely as possible. You will be shown options to choose from and
25 | prompted for confirmation.
26 |
27 | Run script with docker image
28 |
29 | ```bash
30 | docker run -ti -v $KUBECONFIG:/root/.kube/config patrick0057/change-nodetemplate-owner -t changenodetemplate -c
31 | ```
32 | Run script from bash command line:
33 |
34 | ```bash
35 | curl -LO https://github.com/rancherlabs/support-tools/raw/master/change-nodetemplate-owner/change-nodetemplate-owner.sh
36 | ./change-nodetemplate-owner.sh -t changenodetemplate -c
37 | ```
38 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/profile-collector/README.md:
--------------------------------------------------------------------------------
1 | # Rancher v2.x profiles-collector
2 |
3 | This profiles collector project was created to collect:
4 | - [Golang profiles](https://github.com/pkg/profile) for [Rancher Manager](https://github.com/rancher/rancher/), Rancher Cluster Agent, Fleet Controller and Fleet Agent
5 | - Rancher debug or trace logs when collecting Rancher profiles
6 | - Rancher audit logs when available
7 | - Events from the cattle-system namespace
8 | - metrics with kubectl top from pods and nodes
9 | - Rancher metrics exposed on /metrics
10 |
11 | ## Usage
12 |
13 | The script needs to be downloaded and run with a kubeconfig file for the Rancher Management (local) cluster, or a downstream cluster where cattle-cluster-agent pods are running
14 |
15 | ### Download and run the script
16 | - Save the script as: `continuous_profiling.sh`
17 |
18 | Using `wget`:
19 | ```bash
20 | wget https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/profile-collector/continuous_profiling.sh
21 | ```
22 | Using `curl`:
23 | ```bash
24 | curl -OLs https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/profile-collector/continuous_profiling.sh
25 | ```
26 |
27 | - Run the script:
28 | ```bash
29 | bash continuous_profiling.sh
30 | ```
31 | The script will run until it receives a SIGKILL (Ctrl-C)
32 | A tarball will be generated at the same folder where the script is running. Please share that file with Rancher support.
33 |
34 | ## Flags
35 |
36 | ```
37 | Rancher 2.x profile-collector
38 | Usage: profile-collector.sh [-a rancher -p goroutine,heap ]
39 |
40 | All flags are optional
41 |
42 | -a Application, rancher, cattle-cluster-agent, fleet-controller, fleet-agent
43 | -p Profiles to be collected (comma separated): goroutine,heap,threadcreate,block,mutex,profile
44 | -s Sleep time between loops in seconds
45 | -t Time of CPU profile collections
46 | -l Log level of the Rancher pods: debug or trace
47 | -h This help
48 | ```
49 |
50 | ## Examples
51 | - The default collection is equivalent of:
52 | ```bash continuous_profiling -a rancher -p goroutine,heap,profile -s 120 -t 30```
53 |
54 | - Collecting Upstream Rancher profiles every 30 minutes, and collect trace level logs
55 | ```bash continuous_profiling -s 1800 -l trace```
56 |
57 | - Collecting cattle-cluster-agent heap and profile
58 | ```bash continuous_profiling -a cattle-cluster-agent -p heap,profile ```
59 |
60 | - Collecting fleet-agent profile profile (cpu) over a minute
61 | ```bash continuous_profiling -a fleet-agent -t 60```
62 |
--------------------------------------------------------------------------------
/swiss-army-knife/overlaytest.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DNS_TEST=false
4 | NAMESPACE=default
5 |
6 | # Parse arguments
7 | while [[ $# -gt 0 ]]; do
8 | case $1 in
9 | --dns-test)
10 | DNS_TEST=true
11 | shift
12 | ;;
13 | *)
14 | echo "Unknown option: $1"
15 | exit 1
16 | ;;
17 | esac
18 | done
19 |
20 | echo "=> Start network overlay and DNS test"
21 | if $DNS_TEST
22 | then
23 | DNS_PASS=0; DNS_FAIL=0
24 | else
25 | echo "DNS tests are skipped. Use --dns-check to enable."
26 | fi
27 | echo
28 | NET_PASS=0; NET_FAIL=0
29 |
30 | while read spod shost sip
31 | do
32 | echo "Testing pod $spod on node $shost with IP $sip"
33 |
34 | # Overlay network test
35 | echo " => Testing overlay network connectivity"
36 | while read tip thost
37 | do
38 | if [[ ! $shost == $thost ]]; then
39 | kubectl -n $NAMESPACE exec $spod -c overlaytest -- /bin/sh -c "ping -c2 $tip > /dev/null 2>&1"
40 | RC=$?
41 | if [ $RC -ne 0 ]; then
42 | ((NET_FAIL+=1)); echo " FAIL: $spod on $shost cannot reach pod IP $tip on $thost"
43 | else
44 | ((NET_PASS+=1)); echo " PASS: $spod on $shost can reach pod IP $tip on $thost"
45 | fi
46 | fi
47 | done < <(kubectl get pods -n $NAMESPACE -l name=overlaytest -o jsonpath='{range .items[*]}{@.status.podIP}{" "}{@.spec.nodeName}{"\n"}{end}' | sort -k2)
48 |
49 | if $DNS_TEST; then
50 | # Internal DNS test
51 | echo " => Testing DNS"
52 | kubectl -n $NAMESPACE exec $spod -c overlaytest -- /bin/sh -c "nslookup kubernetes.default > /dev/null 2>&1"
53 | RC=$?
54 | if [ $RC -ne 0 ]; then
55 | ((DNS_FAIL+=1)); echo " FAIL: $spod cannot resolve internal DNS for 'kubernetes.default'"
56 | else
57 | ((DNS_PASS+=1)); echo " PASS: $spod can resolve internal DNS for 'kubernetes.default'"
58 | fi
59 |
60 | # External DNS test
61 | kubectl -n $NAMESPACE exec $spod -c overlaytest -- /bin/sh -c "nslookup rancher.com > /dev/null 2>&1"
62 | RC=$?
63 | if [ $RC -ne 0 ]; then
64 | ((DNS_FAIL+=1)); echo " FAIL: $spod cannot resolve external DNS for 'rancher.com'"
65 | else
66 | ((DNS_PASS+=1)); echo " PASS: $spod can resolve external DNS for 'rancher.com'"
67 | fi
68 | fi
69 | echo
70 |
71 | done < <(kubectl get pods -n $NAMESPACE -l name=overlaytest -o jsonpath='{range .items[*]}{@.metadata.name}{" "}{@.spec.nodeName}{" "}{@.status.podIP}{"\n"}{end}' | sort -k2)
72 |
73 | NET_TOTAL=$(($NET_PASS + $NET_FAIL))
74 | echo "=> Network [$NET_PASS / $NET_TOTAL]"
75 | if $DNS_TEST; then
76 | DNS_TOTAL=$(($DNS_PASS + $DNS_FAIL))
77 | echo "=> DNS [$DNS_PASS / $DNS_TOTAL]"
78 | fi
79 | echo; echo "=> End network overlay and DNS test"
--------------------------------------------------------------------------------
/collection/rancher/v2.x/systems-information/systems_summary.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | echo "Rancher Systems Summary Report"
4 | echo "=============================="
5 | echo "Run on `date`"
6 | echo
7 |
8 | if [[ ! -z $KUBERNETES_PORT ]];
9 | then
10 | RANCHER_POD=$(kubectl -n cattle-system get pods -l app=rancher --no-headers -o custom-columns=id:metadata.name --field-selector status.phase=Running | head -n1)
11 | KUBECTL_CMD="kubectl -n cattle-system exec ${RANCHER_POD} -c rancher -- kubectl"
12 | else
13 | if $(command -v rke2 >/dev/null 2>&1)
14 | then
15 | KUBECTL_CMD="/var/lib/rancher/rke2/bin/kubectl --kubeconfig=/etc/rancher/rke2/rke2.yaml"
16 | elif $(command -v k3s >/dev/null 2>&1)
17 | then
18 | KUBECTL_CMD="k3s kubectl"
19 | else
20 | # Get docker id for rancher single node install
21 | DOCKER_ID=$(docker ps | grep "rancher/rancher:" | cut -d' ' -f1)
22 | if [ -z "${DOCKER_ID}" ]
23 | then
24 | # Get docker id for rancher ha install
25 | DOCKER_ID=$(docker ps | grep "k8s_rancher_rancher" | cut -d' ' -f1 | head -1)
26 | if [ -z "${DOCKER_ID}" ]
27 | then
28 | echo "Could not find Rancher 2 container, exiting..."
29 | exit -1
30 | fi
31 | fi
32 | KUBECTL_CMD="docker exec ${DOCKER_ID} kubectl"
33 | fi
34 | fi
35 |
36 | echo "Rancher version: $(${KUBECTL_CMD} get settings.management.cattle.io server-version --no-headers -o custom-columns=version:value)"
37 | echo "Rancher id: $(${KUBECTL_CMD} get settings.management.cattle.io install-uuid --no-headers -o custom-columns=id:value)"
38 | echo
39 |
40 | ${KUBECTL_CMD} get clusters.management.cattle.io -o custom-columns=Cluster\ Id:metadata.name,Name:spec.displayName,K8s\ Version:status.version.gitVersion,Provider:status.driver,Created:metadata.creationTimestamp,Nodes:status.appliedSpec.rancherKubernetesEngineConfig.nodes[*].address
41 |
42 | CLUSTER_IDS=$(${KUBECTL_CMD} get cluster.management.cattle.io --no-headers -o custom-columns=id:metadata.name)
43 |
44 | for ID in $CLUSTER_IDS
45 | do
46 | CLUSTER_NAME=$(${KUBECTL_CMD} get cluster.management.cattle.io ${ID} --no-headers -o custom-columns=name:spec.displayName)
47 | NODE_COUNT=$(${KUBECTL_CMD} get nodes.management.cattle.io -n ${ID} --no-headers 2>/dev/null | wc -l )
48 | ((TOTAL_NODE_COUNT += NODE_COUNT))
49 | echo
50 | echo "--------------------------------------------------------------------------------"
51 | echo "Cluster: ${CLUSTER_NAME} (${ID})"
52 | ${KUBECTL_CMD} get nodes.management.cattle.io -n ${ID} -o custom-columns=Node\ Id:metadata.name,Address:status.internalNodeStatus.addresses[*].address,Role:status.rkeNode.role[*],CPU:status.internalNodeStatus.capacity.cpu,RAM:status.internalNodeStatus.capacity.memory,OS:status.dockerInfo.OperatingSystem,Docker\ Version:status.dockerInfo.ServerVersion,Created:metadata.creationTimestamp
53 | echo "Node count: ${NODE_COUNT}"
54 | done
55 | echo "--------------------------------------------------------------------------------"
56 | echo "Total node count: ${TOTAL_NODE_COUNT}"
57 |
--------------------------------------------------------------------------------
/swiss-army-knife/admin-tools.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | apiVersion: v1
3 | kind: Namespace
4 | metadata:
5 | name: swiss-army-knife
6 | labels:
7 | app: swiss-army-knife
8 | ---
9 | apiVersion: rbac.authorization.k8s.io/v1
10 | kind: ClusterRole
11 | metadata:
12 | labels:
13 | app: swiss-army-knife
14 | name: swiss-army-knife
15 | rules:
16 | - apiGroups:
17 | - "*"
18 | resources:
19 | - "*"
20 | verbs:
21 | - "*"
22 | - nonResourceURLs:
23 | - "*"
24 | verbs:
25 | - "*"
26 | ---
27 | apiVersion: v1
28 | kind: ServiceAccount
29 | metadata:
30 | name: swiss-army-knife
31 | namespace: swiss-army-knife
32 | labels:
33 | app: swiss-army-knife
34 | ---
35 | apiVersion: rbac.authorization.k8s.io/v1
36 | kind: ClusterRoleBinding
37 | metadata:
38 | labels:
39 | app: swiss-army-knife
40 | name: swiss-army-knife
41 | roleRef:
42 | apiGroup: rbac.authorization.k8s.io
43 | kind: ClusterRole
44 | name: swiss-army-knife
45 | subjects:
46 | - kind: ServiceAccount
47 | name: swiss-army-knife
48 | namespace: swiss-army-knife
49 | ---
50 | apiVersion: apps/v1
51 | kind: DaemonSet
52 | metadata:
53 | name: swiss-army-knife
54 | namespace: swiss-army-knife
55 | labels:
56 | app: swiss-army-knife
57 | spec:
58 | selector:
59 | matchLabels:
60 | app: swiss-army-knife
61 | template:
62 | metadata:
63 | labels:
64 | app: swiss-army-knife
65 | spec:
66 | tolerations:
67 | - operator: Exists
68 | containers:
69 | - name: swiss-army-knife
70 | image: supporttools/swiss-army-knife
71 | imagePullPolicy: IfNotPresent
72 | securityContext:
73 | privileged: true
74 | resources:
75 | limits:
76 | cpu: 1000m
77 | memory: 1000Mi
78 | requests:
79 | cpu: 100m
80 | memory: 100Mi
81 | env:
82 | - name: POD_NAMESPACE
83 | valueFrom:
84 | fieldRef:
85 | fieldPath: metadata.namespace
86 | - name: POD_IP
87 | valueFrom:
88 | fieldRef:
89 | fieldPath: status.podIP
90 | - name: NODE_NAME
91 | valueFrom:
92 | fieldRef:
93 | fieldPath: spec.nodeName
94 | volumeMounts:
95 | - name: rootfs
96 | mountPath: /rootfs
97 | serviceAccountName: swiss-army-knife
98 | volumes:
99 | - name: rootfs
100 | hostPath:
101 | path: /
102 | ---
103 | apiVersion: v1
104 | kind: Service
105 | metadata:
106 | name: swiss-army-knife
107 | namespace: swiss-army-knife
108 | labels:
109 | app: swiss-army-knife
110 | spec:
111 | selector:
112 | name: swiss-army-knife
113 | ports:
114 | - protocol: TCP
115 | port: 80
116 | targetPort: 80
117 | type: ClusterIP
--------------------------------------------------------------------------------
/swiss-army-knife/README.md:
--------------------------------------------------------------------------------
1 | # Swiss-Army-Knife
2 | Rancher Support uses the image of a standard tool called `swiss-army-knife` to help you manage your Rancher/Kubernetes environment. You can learn more about this image by visiting its official repo at [rancherlabs/swiss-army-knife](https://github.com/rancherlabs/swiss-army-knife/)
3 |
4 | TLDR; This image has a lot of useful tools that can be used for scripting and troubleshooting.
5 | - [`kubectl`](https://kubernetes.io/docs/reference/kubectl/overview/)
6 | - [`helm`](https://helm.sh/docs/intro/)
7 | - [`curl`](https://curl.haxx.se/docs/manpage.html)
8 | - [`jq`](https://stedolan.github.io/jq/)
9 | - [`traceroute`](https://www.traceroute.org/about.html)
10 | - [`dig`](https://www.dig.com/products/dns/dig/)
11 | - [`nslookup`](https://www.google.com/search?q=nslookup)
12 | - [`ping`](https://www.google.com/search?q=ping)
13 | - [`netstat`](https://www.google.com/search?q=netstat)
14 | - And many more!
15 |
16 | ## Example deployments
17 |
18 | ### Overlay Test
19 | As part of Rancher's overlay test, which can be found [here](https://ranchermanager.docs.rancher.com/troubleshooting/other-troubleshooting-tips/networking#check-if-overlay-network-is-functioning-correctly). This can be deployed to the cluster by running the following command:
20 | ```bash
21 | kubectl apply -f https://raw.githubusercontent.com/rancherlabs/support-tools/master/swiss-army-knife/overlaytest.yaml
22 | ```
23 |
24 | This will deploy a deamonset that will run on all nodes in the cluster. These pods will be running `tail -f /dev/null,` which will do nothing but keep the pod running.
25 |
26 | You can run the overlay test script by running the following command:
27 | ```bash
28 | curl -sfL https://raw.githubusercontent.com/rancherlabs/support-tools/master/swiss-army-knife/overlaytest.sh | bash
29 | ```
30 |
31 | ### Admin Tools
32 | This deployment will deploy `swiss-army-knife` to all nodes in the cluster but with additional permissions and privileges. This is useful for troubleshooting and managing your Rancher environment. The pod will be running `tail -f /dev/null,` which will do nothing but keep the pod running.
33 |
34 | This can be deployed to the cluster by running the following command:
35 | ```bash
36 | kubectl apply -f https://raw.githubusercontent.com/rancherlabs/support-tools/master/swiss-army-knife/admin-tools.yaml
37 | ```
38 |
39 | Inside the pod, you will be able to un `kubectl` commands with cluster-admin privileges. Along with this pod being able to gain full access to the node, including the ability to gain a root shell on the node. By running the following commands:
40 | - `kubectl -n kube-system get pods -l app=swiss-army-knife -o wide`
41 | - This will show you all pods running `swiss-army-knife` in the `kube-system` namespace.
42 | - Find the pod on the node you want to interact with.
43 | - `kubectl -n kube-system exec -it -- bash`
44 | - `chroot /rootfs`
45 |
46 | You are now running a root shell on the node with full privileges.
47 |
48 | **Important:** This deployment is designed for troubleshooting and management purposes and should not be left running on a cluster.
49 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/windows-log-collector/README.md:
--------------------------------------------------------------------------------
1 | # Rancher v2.x Windows log-collector
2 |
3 | This logs collector project was created to collect logs from Windows Kubernetes nodes. It is designed to be used with RKE1 Windows clusters for troubleshooting support cases.
4 |
5 | ## Usage
6 |
7 | - Open a new PowerShell window with Administrator Privileges (Find Windows PowerShell in Start Menu, right click, Run As Administrator)
8 | - Run the following commands in your PowerShell window
9 |
10 | ```ps1
11 | Set-ExecutionPolicy Bypass
12 | Start-BitsTransfer https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/windows-log-collector/win-log-collect.ps1
13 | .\win-log-collect.ps1
14 | ```
15 |
16 | ### Upon successful completion, your log bundle will be on the root of the C: drive (example below)
17 |
18 | ```
19 | > dir C:\
20 | d----- 11/14/2018 6:56 AM EFI
21 | d----- 6/2/2020 3:31 PM etc
22 | d----- 6/2/2020 3:31 PM opt
23 | d----- 5/13/2020 6:03 PM PerfLogs
24 | d-r--- 5/13/2020 5:25 PM Program Files
25 | d----- 6/2/2020 3:16 PM Program Files (x86)
26 | d----- 6/2/2020 7:23 PM rancher
27 | d----- 6/2/2020 4:06 PM run
28 | d-r--- 6/1/2020 6:30 PM Users
29 | d----- 6/2/2020 3:31 PM var
30 | d----- 6/1/2020 6:26 PM Windows
31 | -a---- 6/2/2020 5:07 PM 428911 rancher_EC2AMAZ-ENEJ0H8_20200602T1704290242Z.tgz
32 | ```
33 |
34 | ### Expected output
35 |
36 | > Note: The `Unable to Collect Windows Firewall information` error is expected if it there are no Domain specific firewall rules
37 |
38 | ```ps1
39 | Running Rancher Log Collection
40 | Creating temporary directory
41 | OK
42 | Collecting System information
43 | OK
44 | Collecting PS output
45 | Collecting Disk information
46 | Collecting Volume info
47 | OK
48 | Collecting Windows Firewall info
49 | Collecting Rules for Domain profile
50 | get_firewall_info : Unable to Collect Windows Firewall information
51 | At C:\Users\Administrator\log-collect-beta.ps1:397 char:5
52 | + get_firewall_info
53 | + ~~~~~~~~~~~~~~~~~
54 | + CategoryInfo : NotSpecified: (:) [Write-Error], WriteErrorException
55 | + FullyQualifiedErrorId : Microsoft.PowerShell.Commands.WriteErrorException,get_firewall_info
56 |
57 | Collecting installed applications list
58 | OK
59 | Collecting Services list
60 | OK
61 | Collecting Docker daemon information
62 | OK
63 | Collecting Kubernetes components config
64 | OK
65 | Collecting Windows Event logs
66 | OK
67 | Collecting Kubernetes Logs
68 | OK
69 | Collecting network Information
70 | OK
71 | Collecting group policy information
72 | Get-GPOReport is not a valid cmdlet
73 | Collecting proxy information
74 | OK
75 | Archiving Rancher log collection script data
76 | OK
77 | Done. Your log bundle is located in C:\rancher_EC2AMAZ-ENEJ0H8_20200602T1704290242Z
78 | Please supply the log bundle(s) to Rancher Support
79 | Cleaning up directory
80 | OK
81 | ```
82 |
--------------------------------------------------------------------------------
/rancher-metadata-syncer/README.md:
--------------------------------------------------------------------------------
1 | # rancher-metadata-syncer
2 | Rancher Metadata Syncer is a simple pod for publishing the Rancher metadata.json in an airgap setup to allow Rancher to get updated metadata files without granting Rancher internet access or upgrading Rancher.
3 |
4 | ## Installation
5 |
6 | Note: The following tool should only be deployed on the Rancher Local cluster and not on a downstream cluster.
7 |
8 | ### Option A - Configmap
9 | The Configmap option is used when you would like to add the metadata files via a Configmap.
10 | Note: The following steps should be run from a server/workstation with internet access.
11 |
12 | - Download the metadata file(s)
13 | ```bash
14 | wget --no-check-certificate -O v2-4.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.4/data.json
15 | wget --no-check-certificate -O v2-5.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.5/data.json
16 | tar -czvf v2-4.json.tar.gz v2-4.json
17 | tar -czvf v2-5.json.tar.gz v2-5.json
18 | ```
19 |
20 | - Create the Configmap with the metadata files.
21 |
22 | ```bash
23 | kubectl -n cattle-system create configmap rancher-metadata --from-file=v2-4.json=./v2-4.json.tar.gz --from-file=v2-5.json=./v2-5.json.tar.gz
24 | ```
25 |
26 | - Deploy the workload
27 | ```bash
28 | kubectl apply -f deployment-configmap.yaml
29 | ```
30 |
31 | - If you would update the metadata file, please do the following.
32 |
33 | ```bash
34 | wget --no-check-certificate -O v2-4.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.4/data.json
35 | wget --no-check-certificate -O v2-5.json https://releases.rancher.com/kontainer-driver-metadata/release-v2.5/data.json
36 | tar -czvf v2-4.json.tar.gz v2-4.json
37 | tar -czvf v2-5.json.tar.gz v2-5.json
38 | kubectl -n cattle-system delete configmap rancher-metadata
39 | kubectl -n cattle-system create configmap rancher-metadata --from-file=v2-4.json.tar.gz=./v2-4.json.tar.gz --from-file=v2-5.json.tar.gz=./v2-5.json.tar.gz
40 | kubectl -n cattle-system patch deployment rancher-metadata -p "{\"spec\":{\"template\":{\"metadata\":{\"labels\":{\"date\":\"$(date +%s)\"}}}}}"
41 | ```
42 |
43 | ### Option B - Proxy
44 | The proxy option is used if you would like the deployment to automatedly download the metadata files every 6 hours without opening all of Rancher to the internet via the Proxy.
45 |
46 | - Edit values HTTP_PROXY and HTTPS_PROXY in deployment-proxy.yaml match your environment requirements.
47 | ```bash
48 | - name: HTTPS_PROXY
49 | value: "https://:@:/"
50 | - name: HTTP_PROXY
51 | value: "http://:@:/"
52 | ```
53 |
54 | - Deploy the workload
55 | ```bash
56 | kubectl apply -f deployment-proxy.yaml
57 | ```
58 |
59 | ## Updating Rancher
60 |
61 | - Browse to the Rancher UI -> Global -> Settings -> rke-metadata-config
62 |
63 | - Update the value to the following for Rancher v2.4.x
64 | ```
65 | {
66 | "refresh-interval-minutes": "60",
67 | "url": "http://rancher-metadata/v2-4.json"
68 | }
69 | ```
70 |
71 | - Update the value to the following for Rancher v2.5.x
72 | ```
73 | {
74 | "refresh-interval-minutes": "60",
75 | "url": "http://rancher-metadata/v2-5.json"
76 | }
77 | ```
78 |
--------------------------------------------------------------------------------
/how-to-retrieve-kubeconfig-from-custom-cluster/rke-node-kubeconfig.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | PRIVATE_REGISTRY="$1/"
4 |
5 | # Check if controlplane node (kube-apiserver)
6 | CONTROLPLANE=$(docker ps -q --filter=name=kube-apiserver)
7 |
8 | # Get agent image from Docker images
9 | RANCHER_IMAGE=$(docker inspect $(docker images -q --filter=label=io.cattle.agent=true) --format='{{index .RepoTags 0}}' | tail -1)
10 |
11 | if [ -z $RANCHER_IMAGE ]; then
12 | RANCHER_IMAGE="${PRIVATE_REGISTRY}rancher/rancher-agent:v2.6.11"
13 | fi
14 |
15 | if [ -d /opt/rke/etc/kubernetes/ssl ]; then
16 | K8S_SSLDIR=/opt/rke/etc/kubernetes/ssl
17 | else
18 | K8S_SSLDIR=/etc/kubernetes/ssl
19 | fi
20 |
21 | # Determine object type for full-cluster-state (depends on Rancher/RKE version), can be either a configmap (older versions) or a secret (newer versions)
22 | FULL_CLUSTER_STATE_TYPE=$(docker run --rm --net=host -v $K8S_SSLDIR:/etc/kubernetes/ssl:ro --entrypoint bash $RANCHER_IMAGE -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get configmap -n kube-system full-cluster-state -o jsonpath='{.kind}' 2>/dev/null || kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get secret -n kube-system full-cluster-state -o jsonpath='{.kind}' 2>/dev/null')
23 |
24 | # Generate kubeconfig depending on object type for full-cluster-state
25 | if [ "$FULL_CLUSTER_STATE_TYPE" = "Secret" ]; then
26 | docker run --rm --net=host -v $K8S_SSLDIR:/etc/kubernetes/ssl:ro --entrypoint bash $RANCHER_IMAGE -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get secret -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | base64 -d | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_"' > kubeconfig_admin.yaml
27 | elif [ "$FULL_CLUSTER_STATE_TYPE" = "ConfigMap" ]; then
28 | docker run --rm --net=host -v $K8S_SSLDIR:/etc/kubernetes/ssl:ro --entrypoint bash $RANCHER_IMAGE -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get configmap -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_"' > kubeconfig_admin.yaml
29 | else
30 | echo "Invalid type for object \"full-cluster-state\" (should be a Secret or a ConfigMap). Exiting..."
31 | exit 1
32 | fi
33 |
34 | if [ -s kubeconfig_admin.yaml ]; then
35 | if [ -z $CONTROLPLANE ]; then
36 | echo "This is supposed to be run on a node with the 'controlplane' role as it will try to connect to https://127.0.0.1:6443"
37 | echo "You can manually change the 'server:' parameter inside 'kubeconfig_admin.yaml' to point to a node with the 'controlplane' role"
38 | fi
39 | echo "Kubeconfig is stored at: kubeconfig_admin.yaml
40 |
41 | You can use on of the following commands to use it:
42 |
43 | docker run --rm --net=host -v $PWD/kubeconfig_admin.yaml:/root/.kube/config --entrypoint bash $RANCHER_IMAGE -c 'kubectl get nodes'
44 |
45 | kubectl --kubeconfig kubeconfig_admin.yaml get nodes
46 |
47 | Note: if kubectl is not available on the node, the binary can be copied from the kubelet container:
48 | docker cp kubelet:/usr/local/bin/kubectl /usr/local/bin/"
49 | else
50 | echo "Failed to retrieve kubeconfig"
51 | fi
--------------------------------------------------------------------------------
/rancher-crd/enumerate-resources/rancher-resource-enumerator.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | datenow="$(date "+%F-%H-%M-%S")"
4 | outputdir="/tmp/enum-cattle-resources-$datenow"
5 | export outputdir
6 |
7 | usage() {
8 | printf "Rancher Resource Enumerator \n"
9 | printf "Usage: ./rancher-resource-enumerator.sh [ -d -n | -c | -a ]\n"
10 | printf " -h Display this help message.\n"
11 | printf " -a Enumerate all custom resources.\n"
12 | printf " -n Only enumerate resources in the specified namespace(s).\n"
13 | printf " -c Only enumerate cluster (non-namespaced) resources.\n"
14 | printf " -d Path to output directory (default: /tmp/enum-cattle-resources-).\n"
15 | exit 0
16 | }
17 |
18 | # Arguments
19 | optstring="cahd:n:"
20 | while getopts ${optstring} opt; do
21 | case ${opt} in
22 | h) usage
23 | ;;
24 | d) path=${OPTARG}
25 | outputdir="$path-$datenow"
26 | export outputdir
27 | ;;
28 | a) all=1
29 | export all
30 | ;;
31 | n) namespaces=${OPTARG}
32 | export namespaces
33 | ;;
34 | c) cluster=1
35 | export cluster
36 | ;;
37 | *) printf "Invalid Option: %s.\n" "$1"
38 | usage
39 | ;;
40 | esac
41 | done
42 |
43 |
44 | # Setup
45 | setup() {
46 | # Create output directory
47 | echo "Output directory set to $outputdir"
48 | mkdir -p "$outputdir"
49 | }
50 |
51 | # Get cluster resources
52 | non_namespaced() {
53 | kubectl api-resources --verbs=list --namespaced=false -o name | grep cattle.io | xargs -I _ sh -c "echo '(cluster) enumerating _ resources...'; kubectl get _ -o custom-columns=KIND:.kind,NAME:.metadata.name --no-headers=true --ignore-not-found=true >> $outputdir/_"
54 | }
55 |
56 | # Get namespaced resources
57 | namespaced() {
58 | ns="$1"
59 | # Select all namespaces if no namespace is specified
60 | if [ -z "$ns" ]; then
61 | ns="$(kubectl get ns --no-headers -o jsonpath='{.items[*].metadata.name}')"
62 | fi
63 | # Get all custom resources for validated namespaces
64 | for n in $ns
65 | do
66 | kubectl get ns "$n" -o name && \
67 | kubectl api-resources --verbs=list --namespaced=true -o name | grep cattle.io | xargs -I _ sh -c "echo '(namespaced) enumerating _ resources in $n...'; kubectl get _ -n $n -o custom-columns=KIND:.kind,NAME:.metadata.name,NAMESPACE:.metadata.namespace --no-headers=true --ignore-not-found=true >> $outputdir/_"
68 | done
69 | }
70 |
71 | # Get total counts
72 | totals() {
73 | countfiles="$outputdir/*"
74 | echo 'counting totals...'
75 | for f in $countfiles
76 | do
77 | wc -l "$f" >> "$outputdir"/totals
78 | done
79 | echo "results saved in $outputdir"
80 | exit 0
81 | }
82 |
83 | main() {
84 | if [ -n "$all" ]; then
85 | setup
86 | non_namespaced
87 | namespaced
88 | totals
89 | elif [ -n "$cluster" ]; then
90 | setup
91 | non_namespaced
92 | totals
93 | elif [ -n "$namespaces" ]; then
94 | setup
95 | namespaced "$namespaces"
96 | totals
97 | else
98 | usage
99 | fi
100 | }
101 |
102 | main
--------------------------------------------------------------------------------
/windows-agent-strict-verify/update-node.ps1:
--------------------------------------------------------------------------------
1 | <#
2 | .SYNOPSIS
3 | Updates the rancher_connection_info.json file on Windows nodes and optionally downloads the latest version of rancher-wins from the specified Rancher server
4 |
5 | .PARAMETER RancherServerURL
6 | The HTTPs URL of the Rancher server which manages the cluster this node is joined to
7 |
8 | .PARAMETER Token
9 | The Rancher API token tracked in the stv-aggregation secret
10 |
11 | .PARAMETER ForceRegeneration
12 | When set to true, this script will overwrite the rancher2_connection_info.json file, even if the cetificate-authority-data field is present
13 |
14 | .PARAMETER DownloadWins
15 | When set to true, this script will reach out to the RancherServerURL API and download the version of rancher-wins embedded in that sever
16 | #>
17 |
18 | param (
19 | [Parameter()]
20 | [String]
21 | $RancherServerURL,
22 |
23 | [Parameter()]
24 | [String]
25 | $Token,
26 |
27 | [Parameter()]
28 | [Switch]
29 | $ForceRegeneration,
30 |
31 | [Parameter()]
32 | [Switch]
33 | $DownloadWins
34 | )
35 |
36 | if ($DownloadWins -eq $true) {
37 | # Download the latest verson of wins from the rancher server
38 | $responseCode = $(curl.exe --connect-timeout 60 --max-time 300 --write-out "%{http_code}\n" --ssl-no-revoke -sfL "$RancherServerURL/assets/wins.exe" -o "/usr/local/bin/wins.exe")
39 | switch ( $responseCode ) {
40 | { "ok200", 200 } {
41 | Write-LogInfo "Successfully downloaded the wins binary."
42 | break
43 | }
44 | default {
45 | Write-LogError "$responseCode received while downloading the wins binary. Double check that the correct RancherServerURL has been provided"
46 | exit 1
47 | }
48 | }
49 | Copy-Item -Path "/usr/local/bin/wins.exe" -Destination "c:\Windows\wins.exe" -Force
50 | }
51 |
52 | # Check the current connection file to determine if CA data is already present.
53 | $info = (Get-Content C:\var\lib\rancher\agent\rancher2_connection_info.json -ErrorAction Ignore)
54 | if (($null -ne $info) -and (($info | ConvertFrom-Json).kubeConfig).Contains("certificate-authority-data")) {
55 | if (-Not $ForceRegeneration) {
56 | Write-Host "certificate-authority-data is already present in rancher2_connection_info.json"
57 | exit 0
58 | }
59 | }
60 |
61 | $CATTLE_ID=(Get-Content /etc/rancher/wins/cattle-id -ErrorAction Ignore)
62 | if (($null -eq $CATTLE_ID) -or ($CATTLE_ID -eq "")) {
63 | Write-Host "Could not obtain required CATTLE_ID value from node"
64 | exit 1
65 | }
66 |
67 | Write-Host "Updating rancher2_connection_info.json file"
68 |
69 | $responseCode = $(curl.exe --connect-timeout 60 --max-time 60 --write-out "%{http_code}\n " --ssl-no-revoke -sfL "$RancherServerURL/v3/connect/agent" -o /var/lib/rancher/agent/rancher2_connection_info.json -H "Authorization: Bearer $Token" -H "X-Cattle-Id: $CATTLE_ID" -H "Content-Type: application/json")
70 |
71 | switch ( $responseCode ) {
72 | { $_ -in "ok200", 200 } {
73 | Write-Host "Successfully downloaded Rancher connection information."
74 | exit 0
75 | }
76 | default {
77 | Write-Host "$responseCode received while downloading Rancher connection information. Double check that the correct RancherServerURL and Token have been provided"
78 | exit 1
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/troubleshooting-scripts/etcd/README.md:
--------------------------------------------------------------------------------
1 | # etcd-troubleshooting
2 |
3 | ## Check etcd members
4 | Command(s): `docker exec etcd etcdctl member list`
5 |
6 | **Example Output of a healthy cluster**
7 | ```bash
8 | 2f080bc6ec98f39b, started, etcd-a1ubrkeat03, https://172.27.5.33:2380, https://172.27.5.33:2379,https://172.27.5.33:4001, false
9 | 9d7204f89b221ba3, started, etcd-a1ubrkeat01, https://172.27.5.31:2380, https://172.27.5.31:2379,https://172.27.5.31:4001, false
10 | bd37bc0dc2e990b6, started, etcd-a1ubrkeat02, https://172.27.5.32:2380, https://172.27.5.32:2379,https://172.27.5.32:4001, false
11 | ```
12 |
13 | ## Check etcd endpoints
14 | Command(s): `curl https://raw.githubusercontent.com/rancherlabs/support-tools/master/troubleshooting-scripts/etcd/check-endpoints.sh | bash `
15 |
16 | **Example Output of a healthy cluster**
17 | ```bash
18 | Validating connection to https://172.27.5.33:2379/health
19 | {"health":"true"}
20 | Validating connection to https://172.27.5.31:2379/health
21 | {"health":"true"}
22 | Validating connection to https://172.27.5.32:2379/health
23 | {"health":"true"}
24 | ```
25 |
26 | ## Check etcd logs
27 |
28 | `health check for peer xxx could not connect: dial tcp IP:2380: getsockopt: connection refused`
29 |
30 | A connection to the address shown on port 2380 cannot be established. Check if the etcd container is running on the host with the address shown.
31 |
32 |
33 | `xxx is starting a new election at term x`
34 |
35 | The etcd cluster has lost it’s quorum and is trying to establish a new leader. This can happen when the majority of the nodes running etcd go down/unreachable.
36 |
37 |
38 | `connection error: desc = "transport: Error while dialing dial tcp 0.0.0.0:2379: i/o timeout"; Reconnecting to {0.0.0.0:2379 0 }`
39 |
40 | The host firewall is preventing network communication.
41 |
42 |
43 | `rafthttp: request cluster ID mismatch`
44 |
45 | The node with the etcd instance logging `rafthttp: request cluster ID mismatch` is trying to join a cluster that has already been formed with another peer. The node should be removed from the cluster, and re-added.
46 |
47 |
48 | `rafthttp: failed to find member`
49 |
50 | The cluster state (`/var/lib/etcd`) contains wrong information to join the cluster. The node should be removed from the cluster, the state directory should be cleaned and the node should be re-added.
51 |
52 | ## Enabling debug logging
53 | `curl -XPUT -d '{"Level":"DEBUG"}' --cacert $(docker exec etcd printenv ETCDCTL_CACERT) --cert $(docker exec etcd printenv ETCDCTL_CERT) --key $(docker exec etcd printenv ETCDCTL_KEY) https://localhost:2379/config/local/log`
54 |
55 | ## Disabling debug logging
56 | `curl -XPUT -d '{"Level":"INFO"}' --cacert $(docker exec etcd printenv ETCDCTL_CACERT) --cert $(docker exec etcd printenv ETCDCTL_CERT) --key $(docker exec etcd printenv ETCDCTL_KEY) https://localhost:2379/config/local/log`
57 |
58 | ## Getting etcd metrics
59 | `curl -X GET --cacert $(docker exec etcd printenv ETCDCTL_CACERT) --cert $(docker exec etcd printenv ETCDCTL_CERT) --key $(docker exec etcd printenv ETCDCTL_KEY) https://localhost:2379/metrics`
60 |
61 |
62 | **wal_fsync_duration_seconds (99% under 10 ms)**
63 |
64 | A wal_fsync is called when etcd persists its log entries to disk before applying them.
65 |
66 |
67 | **backend_commit_duration_seconds (99% under 25 ms)**
68 |
69 | A backend_commit is called when etcd commits an incremental snapshot of its most recent changes to disk.
70 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/supportability-review/collection-details.md:
--------------------------------------------------------------------------------
1 | # Rancher Supportability Review Collection Details
2 |
3 | ## Overview
4 | This document provides transparency about the data collected during a Rancher supportability review. The collection is designed to gather necessary diagnostic information while respecting privacy and security concerns.
5 |
6 | ## Cluster-Level Collection
7 |
8 | ### Kubernetes Components
9 | - API server configuration
10 | - Controller manager settings
11 | - Scheduler configuration
12 | - etcd status and metrics
13 | - Kubelet configuration
14 | - Container runtime status
15 |
16 | ### Workload Information
17 | - Pod status and configuration
18 | - Deployment configurations
19 | - StatefulSet configurations
20 | - DaemonSet configurations
21 | - Service configurations
22 | - Ingress configurations
23 |
24 | ### Cluster Resources
25 | - Namespace listing
26 | - Resource quotas
27 | - Limit ranges
28 | - Network policies
29 | - Storage classes and PV/PVC status
30 |
31 | ### Custom Resources
32 | - Rancher-specific CRDs status
33 | - Cluster configuration CRs
34 | - Helm releases
35 |
36 | ## Node-Level Collection
37 |
38 | ### System Information
39 | - OS version and distribution
40 | - Kernel parameters
41 | - System resources (CPU, memory, disk)
42 | - Network configuration
43 |
44 | ### Container Runtime
45 | - Docker/containerd version
46 | - Runtime configuration
47 | - Container logs
48 | - Image list
49 |
50 | ### Kubernetes Components
51 | - Kubelet status
52 | - Proxy configuration
53 | - CNI configuration
54 | - Container runtime logs
55 |
56 | ### System Logs
57 | - Kubernetes component logs
58 | - System service logs related to container runtime
59 | - Kernel logs related to container operations
60 |
61 | ## What is NOT Collected
62 |
63 | ### Excluded Data
64 | - Application data and logs
65 | - Secrets and sensitive configurations
66 | - User data
67 | - Database contents
68 | - Custom application configurations
69 | - SSL private keys
70 | - Authentication tokens
71 | - Password hashes
72 |
73 | ### Storage
74 | - Application persistent volumes content
75 | - User uploaded files
76 | - Backup files
77 |
78 | ### Network
79 | - Raw network traffic
80 | - Packet captures
81 | - Private network configurations
82 | - VPN configurations
83 |
84 | ## Data Handling
85 |
86 | ### Collection Process
87 | 1. Data is collected using Sonobuoy plugins
88 | 2. Information is aggregated at cluster level
89 | 3. Results are bundled into a single archive
90 |
91 | ### Security Measures
92 | - All collection is read-only
93 | - No modifications are made to cluster configuration
94 | - Collection runs with minimal required permissions
95 | - Data transfer is encrypted
96 | - Generated bundles are encoded and compressed
97 |
98 | ## Usage of Collected Data
99 |
100 | The collected information is used for:
101 | - Identifying potential system issues
102 | - Validating configurations
103 | - Ensuring compliance with best practices
104 | - Troubleshooting reported problems
105 | - Providing optimization recommendations
106 |
107 | The data is analyzed by SUSE Rancher Support to:
108 | - Verify system health
109 | - Identify potential improvements
110 | - Ensure security compliance
111 | - Provide targeted recommendations
112 | - Support issue resolution
113 |
114 | ## Questions or Concerns
115 |
116 | If you have questions about data collection or need to exclude certain types of information, please contact SUSE Rancher Support before running the collection tool. We can provide guidance on:
117 | - Customizing collection scope
118 | - Excluding sensitive namespaces
119 | - Modifying collection parameters
120 | - Reviewing collection results
--------------------------------------------------------------------------------
/collection/longhorn/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Set variables
4 | NAMESPACE="longhorn-system"
5 | OUTPUT_DIR="longhorn-support-bundle-$(date +%Y-%m-%d-%H-%M-%S)"
6 | ARCHIVE_NAME="${OUTPUT_DIR}.tar.gz"
7 |
8 | # Create directory structure
9 | mkdir -p "${OUTPUT_DIR}/logs/${NAMESPACE}"
10 | mkdir -p "${OUTPUT_DIR}/yamls/namespaced/${NAMESPACE}/kubernetes"
11 | mkdir -p "${OUTPUT_DIR}/yamls/namespaced/${NAMESPACE}/longhorn"
12 | mkdir -p "${OUTPUT_DIR}/yamls/cluster/kubernetes"
13 | mkdir -p "${OUTPUT_DIR}/nodes"
14 |
15 | echo "Creating support bundle for ${NAMESPACE} namespace..."
16 |
17 | # Get cluster information
18 | echo "Collecting cluster information..."
19 | kubectl version --output=yaml > "${OUTPUT_DIR}/yamls/cluster/kubernetes/version.yaml"
20 | kubectl get nodes -o yaml > "${OUTPUT_DIR}/yamls/cluster/kubernetes/nodes.yaml"
21 |
22 | # Get detailed information about each node
23 | NODES=$(kubectl get nodes -o jsonpath='{.items[*].metadata.name}')
24 | for node in $NODES; do
25 | echo "Getting detailed information for node ${node}..."
26 | mkdir -p "${OUTPUT_DIR}/nodes/${node}"
27 |
28 | # Get complete node YAML
29 | kubectl get node "$node" -o yaml > "${OUTPUT_DIR}/nodes/${node}/node.yaml"
30 |
31 | # Get node description
32 | kubectl describe node "$node" > "${OUTPUT_DIR}/nodes/${node}/description.txt"
33 |
34 | # Get node metrics if available
35 | kubectl top node "$node" 2>/dev/null > "${OUTPUT_DIR}/nodes/${node}/metrics.txt" || echo "Metrics not available" > "${OUTPUT_DIR}/nodes/${node}/metrics.txt"
36 |
37 | # Get node capacity and allocatable resources
38 | kubectl get node "$node" -o jsonpath='{.status.capacity}' > "${OUTPUT_DIR}/nodes/${node}/capacity.json"
39 | kubectl get node "$node" -o jsonpath='{.status.allocatable}' > "${OUTPUT_DIR}/nodes/${node}/allocatable.json"
40 | done
41 |
42 | # Get all standard Kubernetes resources in the namespace (excluding secrets)
43 | echo "Collecting standard Kubernetes resources..."
44 | RESOURCES="pods services deployments daemonsets statefulsets configmaps persistentvolumeclaims replicasets"
45 |
46 | for resource in $RESOURCES; do
47 | echo "Getting ${resource}..."
48 | kubectl get "$resource" -n "$NAMESPACE" -o yaml > "${OUTPUT_DIR}/yamls/namespaced/${NAMESPACE}/kubernetes/${resource}.yaml"
49 | done
50 |
51 | # Get all Longhorn CRDs and their instances
52 | echo "Collecting Longhorn custom resources..."
53 | LONGHORN_CRDS=$(kubectl get crd -o jsonpath='{range .items[?(@.spec.group=="longhorn.io")]}{.metadata.name}{"\n"}{end}')
54 |
55 | for crd in $LONGHORN_CRDS; do
56 | resource_type=$(echo "$crd" | cut -d. -f1)
57 | echo "Getting ${resource_type}..."
58 | kubectl get "$crd" -n "$NAMESPACE" -o yaml > "${OUTPUT_DIR}/yamls/namespaced/${NAMESPACE}/longhorn/${resource_type}.yaml"
59 | done
60 |
61 | # Collect pod logs
62 | echo "Collecting pod logs..."
63 | PODS=$(kubectl get pods -n "$NAMESPACE" -o jsonpath='{.items[*].metadata.name}')
64 |
65 | for pod in $PODS; do
66 | echo "Getting logs for pod ${pod}..."
67 | mkdir -p "${OUTPUT_DIR}/logs/${NAMESPACE}/${pod}"
68 |
69 | # Get container names for the pod
70 | CONTAINERS=$(kubectl get pod "$pod" -n "$NAMESPACE" -o jsonpath='{.spec.containers[*].name}')
71 |
72 | for container in $CONTAINERS; do
73 | echo "Getting logs for container ${container} in pod ${pod}..."
74 | kubectl logs "$pod" -c "$container" -n "$NAMESPACE" > "${OUTPUT_DIR}/logs/${NAMESPACE}/${pod}/${container}.log"
75 |
76 | # Get previous logs if available
77 | kubectl logs "$pod" -c "$container" -n "$NAMESPACE" --previous 2>/dev/null > "${OUTPUT_DIR}/logs/${NAMESPACE}/${pod}/${container}-previous.log" || true
78 | done
79 | done
80 |
81 | # Capture cluster events
82 | echo "Capturing cluster events..."
83 | kubectl get events --all-namespaces -o yaml > "${OUTPUT_DIR}/yamls/cluster/kubernetes/events.yaml"
84 | kubectl get events -n "$NAMESPACE" -o yaml > "${OUTPUT_DIR}/yamls/namespaced/${NAMESPACE}/kubernetes/events.yaml"
85 |
86 | # Compress the output directory
87 | echo "Creating archive ${ARCHIVE_NAME}..."
88 | tar -czf "$ARCHIVE_NAME" "$OUTPUT_DIR"
89 |
90 | # Clean up the output directory
91 | rm -rf "$OUTPUT_DIR"
92 |
93 | echo "Support bundle created: ${ARCHIVE_NAME}"
94 |
--------------------------------------------------------------------------------
/NGINX-to-pods-check/check.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
4 | usage()
5 | {
6 | cat << EOF
7 | usage: $0 options
8 | OPTIONS:
9 | -h Show this message
10 | -F Format Default: Table
11 | EOF
12 | }
13 |
14 | VERBOSE=
15 | while getopts .h:F:v. OPTION
16 | do
17 | case $OPTION in
18 | h)
19 | usage
20 | exit 1
21 | ;;
22 | F)
23 | FORMAT=$OPTARG
24 | ;;
25 | ?)
26 | usage
27 | exit
28 | ;;
29 | esac
30 | done
31 |
32 | if [[ -z $FORMAT ]]
33 | then
34 | FORMAT="Table"
35 | fi
36 |
37 | if [[ ! "$FORMAT" == "Table" ]] && [[ ! "$FORMAT" == "Inline" ]]
38 | then
39 | echo "Invalid Option for flag -F"
40 | exit 1
41 | fi
42 |
43 |
44 | kubectl get namespace -o custom-columns=NAMESPACE:.metadata.name --no-headers | while read namespace
45 | do
46 | kubectl get ingress -n "$namespace" -o custom-columns=ingress:.metadata.name --no-headers | while read ingress
47 | do
48 | kubectl get ingress $ingress -n $namespace -o yaml | grep 'service:' -A1 | awk '{print $2}' | sort | uniq | awk 'NF {p=1} p' | while read servicename
49 | do
50 | PORT="$(kubectl get endpoints "$servicename" -n "$namespace" -o yaml | grep 'port:' | awk '{print $2}'| head -n 1)"
51 | if [[ "$PORT" == 'port:' ]]
52 | then
53 | PORT="80"
54 | fi
55 | kubectl get endpoints "$servicename" -n "$namespace" -o yaml | grep '\- ip:' | awk '{print $3}' | while read endpointpodip
56 | do
57 | kubectl -n ingress-nginx get pods -l app=ingress-nginx -o custom-columns=POD:.metadata.name,NODE:.spec.nodeName,IP:.status.podIP --no-headers | while read ingresspod nodename podip
58 | do
59 | PODNAME="$(kubectl get pods -n $namespace -o custom-columns=POD:.metadata.name,IP:.status.podIP --no-headers | grep "$endpointpodip" | awk '{print $1}' | tr -d ' ')"
60 | if ! kubectl -n ingress-nginx exec $ingresspod -- curl -o /dev/null --connect-timeout 5 -s -q http://${endpointpodip}:${PORT} &> /dev/null
61 | then
62 | if [[ "$FORMAT" == "Inline" ]]
63 | then
64 | tput setaf 7; echo -n "Checking Pod $PODNAME PodIP $endpointpodip on Port $PORT in endpoint $servicename for ingress $ingress from $ingresspod on node $nodename "; tput setaf 1; echo "NOK"; tput sgr0
65 | fi
66 | if [[ "$FORMAT" == "Table" ]]
67 | then
68 | echo "####################################################"
69 | echo "Pod: $PODNAME"
70 | echo "PodIP: $endpointpodip"
71 | echo "Port: $PORT"
72 | echo "Endpoint: $servicename"
73 | echo "Ingress: $ingress"
74 | echo "Ingress Pod: $ingresspod"
75 | echo "Node: $nodename"
76 | tput setaf 1;echo "Status: Fail!"; tput sgr0
77 | echo "####################################################"
78 | fi
79 | else
80 | if [[ "$FORMAT" == "Inline" ]]
81 | then
82 | tput setaf 7; echo -n "Checking Pod $PODNAME PodIP $endpointpodip on Port $PORT in endpoint $servicename for ingress $ingress from $ingresspod on node $nodename "; tput setaf 2; echo "OK"; tput sgr0
83 | fi
84 | if [[ "$FORMAT" == "Table" ]]
85 | then
86 | echo "####################################################"
87 | echo "Pod: $PODNAME"
88 | echo "PodIP: $endpointpodip"
89 | echo "Port: $PORT"
90 | echo "Endpoint: $servicename"
91 | echo "Ingress: $ingress"
92 | echo "Ingress Pod: $ingresspod"
93 | echo "Node: $nodename"
94 | tput setaf 2;echo "Status: Pass!"; tput sgr0
95 | echo "####################################################"
96 | fi
97 | fi
98 | done
99 | done
100 | done
101 | done
102 | done
103 |
104 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/logs-collector/README.md:
--------------------------------------------------------------------------------
1 | # Rancher v2.x logs-collector
2 |
3 | This logs collector project was created to collect logs from Linux Kubernetes nodes. It is designed to be used in the following environments for troubleshooting support cases:
4 | - [RKE2 clusters](https://docs.rke2.io/)
5 | - [RKE1 clusters](https://rancher.com/docs/rke/latest/en/)
6 | - [K3s clusters](https://docs.k3s.io/)
7 | - [Custom clusters](https://docs.ranchermanager.rancher.io/pages-for-subheaders/use-existing-nodes)
8 | - [Infrastructure provider clusters](https://ranchermanager.docs.rancher.com/how-to-guides/new-user-guides/launch-kubernetes-with-rancher/use-new-nodes-in-an-infra-provider)
9 | - [Kubeadm clusters](https://kubernetes.io/docs/reference/setup-tools/kubeadm/)
10 |
11 | > Note: This script may not collect all necessary information when run on nodes in a [Hosted Kubernetes Provider](https://ranchermanager.docs.rancher.com/how-to-guides/new-user-guides/kubernetes-clusters-in-rancher-setup/set-up-clusters-from-hosted-kubernetes-providers) cluster.
12 |
13 | ## Usage
14 |
15 | The script needs to be downloaded and run directly on the node, using the `root` user or `sudo`.
16 |
17 | Output will be written to `/tmp` as a tar.gz archive named `-.tar.gz`, the default output directory can be changed with the `-d` flag.
18 |
19 | ### Download and run the script
20 | * Save the script as: `rancher2_logs_collector.sh`
21 |
22 | Using `wget`:
23 | ```bash
24 | wget --backups https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/logs-collector/rancher2_logs_collector.sh
25 | ```
26 | Using `curl`:
27 | ```bash
28 | curl -OLs https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/logs-collector/rancher2_logs_collector.sh
29 | ```
30 |
31 | * Run the script:
32 | ```bash
33 | sudo bash rancher2_logs_collector.sh
34 | ```
35 |
36 | ### Optional: Download and run the script in one command
37 | ```bash
38 | curl -Ls rnch.io/rancher2_logs | sudo bash
39 | ```
40 | > Note: This command requires `curl` to be installed, and internet access from the node.
41 |
42 | ## Flags
43 |
44 | ```
45 | Rancher 2.x logs-collector
46 | Usage: rancher2_logs_collector.sh [ -d -s -e -r -p -f ]
47 |
48 | All flags are optional
49 |
50 | -c Custom data-dir for RKE2 (ex: -c /opt/rke2)
51 | -d Output directory for temporary storage and .tar.gz archive (ex: -d /var/tmp)
52 | -s Start day of journald and docker log collection, # of days relative to the current day (ex: -s 7)
53 | -e End day of journald and docker log collection, # of days relative to the current day (ex: -e 5)
54 | -S Start date of journald and docker log collection. (ex: -S 2022-12-05)
55 | -E End date of journald and docker log collection. (ex: -E 2022-12-07)
56 | -r Override k8s distribution if not automatically detected (rke|k3s|rke2|kubeadm)
57 | -p When supplied runs with the default nice/ionice priorities, otherwise use the lowest priorities
58 | -f Force log collection if the minimum space isn't available
59 | -o Obfuscate IP addresses and hostnames
60 | ```
61 |
62 | ## Scope of collection
63 |
64 | Collection includes the following areas, the logs collector is designed to gather necessary diagnostic information while respecting privacy and security concerns. A detailed list is maintained in [collection-details.md](./collection-details.md).
65 |
66 | - Related OS logs and configuration:
67 | - Network configuration - interfaces, iptables
68 | - Disk configuration - devices, filesystems, utilization
69 | - Performance - resource usage, tuning
70 | - OS release and logs - versions, messages/syslog
71 | - Related Kubernetes object output, kubectl commands, and pod logs
72 | - Related CRD objects
73 | - Output from kubectl for troubleshooting
74 | - Pod logs from related namespaces
75 |
76 | The scope of collection is intentionally limited to avoid sensitive data, use minimal resources and disk space, and focus on the core areas needed for troubleshooting.
77 |
78 | IP addresses and hostnames are collected and can assist with troubleshooting, however these can be obfuscated when adding the `-o` flag for the log collection script.
79 |
80 | Note, if additional verbosity, debug, or audit logging is enabled for the related Kubernetes and OS components, these logs can be included and may contain sensitive output.
81 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/supportability-review/security-policies.md:
--------------------------------------------------------------------------------
1 | # Security Policy Configuration Guide
2 |
3 | ## Overview
4 | This guide provides detailed configuration examples for running the Rancher Supportability Review tool in environments with various security policies.
5 |
6 | ## Kyverno Policies
7 |
8 | ### Required Exclusions
9 | ```yaml
10 | apiVersion: kyverno.io/v1
11 | kind: ClusterPolicy
12 | metadata:
13 | name: privilege-policy
14 | spec:
15 | validationFailureAction: Enforce
16 | background: true
17 | rules:
18 | - name: privilege-escalation
19 | match:
20 | any:
21 | - resources:
22 | kinds:
23 | - Pod
24 | exclude:
25 | any:
26 | - resources:
27 | namespaces:
28 | - sonobuoy
29 | validate:
30 | message: "Privilege escalation is disallowed..."
31 | ```
32 |
33 | ### Common Kyverno Policies Requiring Modification
34 | - Privilege escalation policies
35 | - Container security policies
36 | - Resource quota policies
37 | - Host path mounting policies
38 |
39 | ## Pod Security Policies
40 |
41 | ### Required Permissions
42 | ```yaml
43 | apiVersion: policy/v1beta1
44 | kind: PodSecurityPolicy
45 | metadata:
46 | name: sonobuoy-psp
47 | spec:
48 | privileged: true
49 | allowPrivilegeEscalation: true
50 | volumes:
51 | - hostPath
52 | - configMap
53 | - emptyDir
54 | hostNetwork: true
55 | hostPID: true
56 | hostIPC: true
57 | runAsUser:
58 | rule: RunAsAny
59 | seLinux:
60 | rule: RunAsAny
61 | supplementalGroups:
62 | rule: RunAsAny
63 | fsGroup:
64 | rule: RunAsAny
65 | ```
66 |
67 | ## Network Policies
68 |
69 | ### Sonobuoy Aggregator Access
70 | ```yaml
71 | apiVersion: networking.k8s.io/v1
72 | kind: NetworkPolicy
73 | metadata:
74 | name: allow-sonobuoy
75 | namespace: sonobuoy
76 | spec:
77 | podSelector: {}
78 | policyTypes:
79 | - Ingress
80 | - Egress
81 | ingress:
82 | - from:
83 | - namespaceSelector:
84 | matchLabels:
85 | kubernetes.io/metadata.name: sonobuoy
86 | egress:
87 | - to:
88 | - namespaceSelector: {}
89 | ```
90 |
91 | ## Image Pull Policies
92 |
93 | ### Required Registry Access
94 | ```yaml
95 | apiVersion: operator.openshift.io/v1alpha1
96 | kind: ImageContentSourcePolicy
97 | metadata:
98 | name: sonobuoy-repo
99 | spec:
100 | repositoryDigestMirrors:
101 | - mirrors:
102 | - registry.example.com/supportability-review
103 | source: rancher/supportability-review
104 | - mirrors:
105 | - registry.example.com/sonobuoy
106 | source: rancher/mirrored-sonobuoy-sonobuoy
107 | ```
108 |
109 | ## OPA Exempting Namespaces
110 |
111 | ### Required Exemption
112 | ```yaml
113 | apiVersion: config.gatekeeper.sh/v1alpha1
114 | kind: Config
115 | metadata:
116 | name: config
117 | namespace: "gatekeeper-system"
118 | spec:
119 | match:
120 | - excludedNamespaces: ["sonobuoy"]
121 | processes: ["*"]
122 | ```
123 |
124 |
125 | ## Troubleshooting Security Policies
126 |
127 | ### Common Issues and Solutions
128 |
129 | #### 1. Privilege Escalation Blocked
130 | ```yaml
131 | # Error:
132 | validation error: privileged containers are not allowed
133 |
134 | # Solution:
135 | Add namespace exclusion for sonobuoy namespace in your policy
136 | ```
137 |
138 | #### 2. Host Path Mounting Blocked
139 | ```yaml
140 | # Error:
141 | hostPath volumes are not allowed
142 |
143 | # Solution:
144 | Modify PSP to allow hostPath volume types for sonobuoy namespace
145 | ```
146 |
147 | #### 3. Network Policy Blocks
148 | ```yaml
149 | # Error:
150 | unable to connect to sonobuoy aggregator
151 |
152 | # Solution:
153 | Ensure NetworkPolicy allows pod-to-pod communication in sonobuoy namespace
154 | ```
155 |
156 | ## Best Practices
157 |
158 | ### Security Policy Configuration
159 | 1. Use namespace-specific exclusions
160 | 2. Avoid blanket exemptions
161 | 3. Monitor policy audit logs
162 | 4. Regular policy review
163 |
164 | ### Deployment Considerations
165 | 1. Use dedicated service accounts
166 | 2. Implement least-privilege access
167 | 3. Regular security audits
168 | 4. Documentation of exceptions
169 |
170 | ## Support
171 | For additional assistance with security policy configuration, contact SUSE Rancher Support with:
172 | 1. Current policy configurations
173 | 2. Error messages
174 | 3. Cluster configuration details
175 |
--------------------------------------------------------------------------------
/how-to-retrieve-kubeconfig-from-custom-cluster/README.md:
--------------------------------------------------------------------------------
1 | # How to retrieve a kubeconfig from an RKE1 cluster
2 |
3 | During a Rancher outage or other disaster event you may lose access to a downstream cluster via Rancher and be unable to manage your applications. This process creates a kubeconfig to bypass Rancher, it connects directly to the local kube-apiserver on a control plane node.
4 |
5 | **Note**: The [Authorised Cluster Endpoint (ACE)](https://ranchermanager.docs.rancher.com/how-to-guides/new-user-guides/manage-clusters/access-clusters/use-kubectl-and-kubeconfig#authenticating-directly-with-a-downstream-cluster) is a default option enabled on clusters provisioned by Rancher, this contains a second context which connects directly to the downstream kube-apiserver and also bypasses Rancher.
6 |
7 | ### Pre-requisites
8 |
9 | - Rancher v2.2.x or newer
10 | - RKE v0.2.x or newer
11 | - SSH access to one of the controlplane nodes
12 | - Access to the Docker CLI or root/sudo
13 |
14 | ## Retrieve a kubeconfig - using jq
15 |
16 | This option requires `kubectl` and `jq` to be installed on the server.
17 |
18 | **Note**: kubectl can be copied from the kubelet container
19 |
20 | ```bash
21 | docker cp kubelet:/usr/local/bin/kubectl /usr/local/bin/
22 | ```
23 |
24 | - Get kubeconfig (Rancher 2.7.14+/Rancher 2.8.5+, RKE 1.4.19+/RKE 1.5.10+)
25 |
26 | ```bash
27 | kubectl --kubeconfig $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl/kubecfg-kube-node.yaml get secrets -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | base64 -d | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_" > kubeconfig_admin.yaml
28 | ```
29 |
30 | - Get kubeconfig (Earlier versions of Rancher and RKE)
31 |
32 | ```bash
33 | kubectl --kubeconfig $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl/kubecfg-kube-node.yaml get configmap -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_" > kubeconfig_admin.yaml
34 | ```
35 |
36 | - Run `kubectl get nodes`
37 | ```bash
38 | kubectl --kubeconfig kubeconfig_admin.yaml get nodes
39 | ```
40 |
41 | ## Retrieve a kubeconfig - without jq
42 |
43 | This option does not require `kubectl` or `jq` on the server because this uses the `rancher/rancher-agent` image to retrieve the kubeconfig.
44 |
45 | - Get kubeconfig (Rancher 2.7.14+/Rancher 2.8.5+, RKE 1.4.19+/RKE 1.5.10+)
46 | ```bash
47 | docker run --rm --net=host -v $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl:/etc/kubernetes/ssl:ro --entrypoint bash $(docker inspect $(docker images -q --filter=label=org.opencontainers.image.source=https://github.com/rancher/hyperkube) --format='{{index .RepoTags 0}}' | tail -1) -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get secret -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | base64 -d | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_"' > kubeconfig_admin.yaml
48 | ```
49 |
50 | - Get kubeconfig (Earlier versions of Rancher and RKE)
51 |
52 | ```bash
53 | docker run --rm --net=host -v $(docker inspect kubelet --format '{{ range .Mounts }}{{ if eq .Destination "/etc/kubernetes" }}{{ .Source }}{{ end }}{{ end }}')/ssl:/etc/kubernetes/ssl:ro --entrypoint bash $(docker inspect $(docker images -q --filter=label=org.opencontainers.image.source=https://github.com/rancher/hyperkube.git) --format='{{index .RepoTags 0}}' | tail -1) -c 'kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml get configmap -n kube-system full-cluster-state -o json | jq -r .data.\"full-cluster-state\" | jq -r .currentState.certificatesBundle.\"kube-admin\".config | sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://127.0.0.1:6443\"_"' > kubeconfig_admin.yaml
54 | ```
55 |
56 | - Run `kubectl get nodes`
57 | ```bash
58 | docker run --rm --net=host -v $PWD/kubeconfig_admin.yaml:/root/.kube/config:z --entrypoint bash $(docker inspect $(docker images -q --filter=label=org.opencontainers.image.source=https://github.com/rancher/hyperkube) --format='{{index .RepoTags 0}}' | tail -1) -c 'kubectl get nodes'
59 | ```
60 |
61 | ## Script
62 | Run `https://raw.githubusercontent.com/rancherlabs/support-tools/master/how-to-retrieve-kubeconfig-from-custom-cluster/rke-node-kubeconfig.sh` and follow the instructions given.
63 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/supportability-review/collect.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ "${DEBUG}" == "true" ]; then
4 | set -x
5 | fi
6 |
7 | HELP_MENU() {
8 | echo "Supportability Review
9 | Usage: collect.sh [ -h ]
10 |
11 | All flags are optional
12 |
13 | -h Print help menu for Supportability Review
14 |
15 | Environment variables:
16 |
17 | RANCHER_URL: Specify Rancher Server URL (Ex: https://rancher.example.com)
18 | RANCHER_TOKEN: Specify Rancher Token to connect to Rancher Server
19 | SR_IMAGE: Use this variable to point to custom container image of Supportability Review
20 | "
21 | }
22 |
23 | SR_IMAGE=${SR_IMAGE:-"rancher/supportability-review:latest"}
24 |
25 | if [ "${CONTAINER_RUNTIME}" == "" ]; then
26 | if command -v docker &> /dev/null; then
27 | echo "setting CONTAINER_RUNTIME=docker"
28 | CONTAINER_RUNTIME="docker"
29 | elif command -v nerdctl &> /dev/null; then
30 | echo "setting CONTAINER_RUNTIME=nerdctl"
31 | CONTAINER_RUNTIME="nerdctl"
32 | elif command -v podman &> /dev/null; then
33 | echo "setting CONTAINER_RUNTIME=podman"
34 | CONTAINER_RUNTIME="podman"
35 | else
36 | echo "error: couldn't detect CONTAINER_RUNTIME"
37 | exit 1
38 | fi
39 | else
40 | supported_runtime=false
41 | for runtime in docker nerdctl podman; do
42 | if [ "${CONTAINER_RUNTIME}" == ${runtime} ]; then
43 | supported_runtime=true
44 | break
45 | fi
46 | done
47 | if [ "${supported_runtime}" == false ]; then
48 | echo "error: unsupported CONTAINER_RUNTIME. Use docker|nerdctl|podman."
49 | exit 1
50 | fi
51 | fi
52 |
53 | if [[ "$SR_IMAGE" != *":dev" ]]; then
54 | echo "pulling image: ${SR_IMAGE}"
55 | $CONTAINER_RUNTIME pull "${SR_IMAGE}"
56 | fi
57 |
58 | CONTAINER_RUNTIME_ARGS=""
59 | COLLECT_INFO_FROM_RANCHER_SETUP_ARGS=""
60 |
61 | if [ "$ENABLE_PRIVILEGED" = "true" ]; then
62 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS --privileged"
63 | fi
64 |
65 | if [ "${SONOBUOY_TOLERATION_FILE}" != "" ]; then
66 | if [ ! -f "${SONOBUOY_TOLERATION_FILE}" ]; then
67 | echo "error: SONOBUOY_TOLERATION_FILE=${SONOBUOY_TOLERATION_FILE} specified, but cannot access that file"
68 | exit 1
69 | fi
70 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -v ${SONOBUOY_TOLERATION_FILE}:/tmp/sonobuoy_toleration.yml"
71 | COLLECT_INFO_FROM_RANCHER_SETUP_ARGS="$COLLECT_INFO_FROM_RANCHER_SETUP_ARGS --sonobuoy-toleration-file /tmp/sonobuoy_toleration.yml"
72 | fi
73 |
74 | if [ "${KUBECONFIG}" == "" ]; then
75 | if [ "${RANCHER_URL}" == "" ]; then
76 | echo "error: RANCHER_URL is not set"
77 | exit 1
78 | fi
79 |
80 | if [ "${RANCHER_TOKEN}" == "" ]; then
81 | echo "error: RANCHER_TOKEN is not set"
82 | exit 1
83 | fi
84 |
85 | if [ "$1" == "-h" ]; then
86 | HELP_MENU
87 | fi
88 |
89 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e RANCHER_URL="${RANCHER_URL}""
90 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e RANCHER_TOKEN="${RANCHER_TOKEN}""
91 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e RANCHER_VERIFY_SSL_CERTS="${RANCHER_VERIFY_SSL_CERTS}""
92 | else
93 | # TODO: Check if it's absolute path
94 | # TODO: Check if the file exists and it's readable
95 | echo "KUBECONFIG specified: ${KUBECONFIG}"
96 |
97 | if [ ! -f "${KUBECONFIG}" ]; then
98 | echo "error: KUBECONFIG=${KUBECONFIG} specified, but cannot access that file"
99 | exit 1
100 | fi
101 |
102 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -v ${KUBECONFIG}:/tmp/kubeconfig.yml"
103 | COLLECT_INFO_FROM_RANCHER_SETUP_ARGS="$COLLECT_INFO_FROM_RANCHER_SETUP_ARGS --kubeconfig /tmp/kubeconfig.yml"
104 |
105 | if [ -d "$HOME/.aws" ]; then
106 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -v $HOME/.aws:/root/.aws"
107 | fi
108 | if [ -n "$AWS_ACCESS_KEY_ID" ] && [ -n "$AWS_SECRET_ACCESS_KEY" ]; then
109 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e AWS_ACCESS_KEY_ID="${AWS_ACCESS_KEY_ID}" -e AWS_SECRET_ACCESS_KEY="${AWS_SECRET_ACCESS_KEY}""
110 | if [ -n "$AWS_SESSION_TOKEN" ]; then
111 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e AWS_SESSION_TOKEN="${AWS_SESSION_TOKEN}""
112 | fi
113 | fi
114 |
115 | fi
116 |
117 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e DB_HOST_NAME="${DB_HOST_NAME}""
118 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e DB_PORT_NUMBER="${DB_PORT_NUMBER}""
119 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS -e DB_KEY="${DB_KEY}""
120 |
121 | CONTAINER_RUNTIME_ARGS="$CONTAINER_RUNTIME_ARGS ${SR_IMAGE}"
122 |
123 | $CONTAINER_RUNTIME run --rm \
124 | -it \
125 | --network host \
126 | -v `pwd`:/data \
127 | $CONTAINER_RUNTIME_ARGS \
128 | collect_info_from_rancher_setup.py $COLLECT_INFO_FROM_RANCHER_SETUP_ARGS "$@"
129 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/scc-operator-collector/collection-details.md:
--------------------------------------------------------------------------------
1 | # SCC Operator Collector - Collection Details
2 |
3 | This document outlines the specific information gathered by the `scc-operator-collector.sh` script. The collected data is organized into a directory structure within the support bundle.
4 |
5 | ## Bundle Structure
6 |
7 | The support bundle has the following structure:
8 |
9 | ```
10 | /
11 | ├── cluster-info/
12 | ├── configmaps/
13 | ├── crds/
14 | ├── events/
15 | ├── leases/
16 | ├── operator-pods/
17 | ├── registrations/
18 | ├── secrets/
19 | ├── metadata.txt
20 | ```
21 |
22 | ---
23 |
24 | ## Collected Information
25 |
26 | ### 1. Cluster Information (`cluster-info/`)
27 |
28 | General information about the Rancher `local` Kubernetes cluster.
29 |
30 | - **`cluster-info.txt`**: Output of `kubectl cluster-info`.
31 | - **`nodes.txt`**: Output of `kubectl get nodes -o wide`.
32 | - **`nodes.yaml`**: Output of `kubectl get nodes -o yaml`.
33 | - **`version.yaml`**: Output of `kubectl version --output=yaml`.
34 |
35 | ### 2. ConfigMaps (`configmaps/`)
36 |
37 | Configuration details for the SCC operator.
38 |
39 | - **`configmaps-list.txt`**: A list of all ConfigMaps in the operator's namespace.
40 | - **`scc-operator-config.yaml`**: The main configuration for the operator from the `scc-operator-config` ConfigMap.
41 |
42 | ### 3. Custom Resource Definitions (`crds/`)
43 |
44 | The definition of the `Registration` Custom Resource.
45 |
46 | - **`registrations.scc.cattle.io.yaml`**: The YAML definition of the `registrations.scc.cattle.io` CRD.
47 | - **`registrations.scc.cattle.io-describe.txt`**: The output of `kubectl describe crd registrations.scc.cattle.io`.
48 |
49 | ### 4. Events (`events/`)
50 |
51 | Kubernetes events to provide a timeline of recent activities.
52 |
53 | - **`events-.txt`**: Events from the SCC operator's namespace.
54 | - **`events-.txt`**: Events from the lease namespace (if different from the operator namespace).
55 | - **`events-all-namespaces.txt`**: Events from all namespaces for broader context.
56 |
57 | ### 5. Leases (`leases/`)
58 |
59 | Information about the leader election lease for the operator.
60 |
61 | - **`leases-list.txt`**: A list of all leases in the lease namespace.
62 | - **`lease-scc-controllers.yaml`**: The YAML definition of the `scc-controllers` lease object.
63 | - **`lease-scc-controllers-describe.txt`**: The output of `kubectl describe lease scc-controllers`.
64 |
65 | ### 6. Operator Pods (`operator-pods/`)
66 |
67 | Detailed information about the SCC operator pods.
68 |
69 | - **`pods-list.txt`**: A list of all pods in the operator's namespace.
70 | - **`pod-.yaml`**: The YAML definition for each operator pod.
71 | - **`pod--describe.txt`**: The output of `kubectl describe pod` for each operator pod.
72 | - **`pod--logs.txt`**: Current logs from all containers in each operator pod.
73 | - **`pod--logs-previous.txt`**: Logs from previous container instances in each operator pod (if any).
74 | - **`no-pods.txt`**: This file is created if no operator pods are found.
75 |
76 | ### 7. Registrations (`registrations/`)
77 |
78 | Information about the `Registration` custom resources.
79 |
80 | - **`registrations-list.txt`**: A list of all `Registration` resources in the cluster.
81 | - **`registration-.yaml`**: The YAML definition for each `Registration` resource.
82 | - **`registration--describe.txt`**: The output of `kubectl describe registration` for each resource.
83 | - **`no-registrations.txt`**: This file is created if no `Registration` resources are found.
84 |
85 | ### 8. Secrets (`secrets/`)
86 |
87 | Secrets related to SCC registration and credentials. **Sensitive data fields are redacted by default.**
88 |
89 | - **`secrets-list.txt`**: A list of all secrets in the operator's namespace.
90 | - **`secret-.yaml`**: The YAML definition for each collected secret. The following secret patterns are collected:
91 | - `scc-registration`
92 | - `rancher-registration`
93 | - `scc-system-credentials-*`
94 | - `registration-code-*`
95 | - `offline-request-*`
96 | - `offline-certificate-*`
97 | - `rancher-scc-metrics`
98 | - **`REDACTED.txt`**: A note indicating that secret data has been redacted.
99 | - **`UNREDACTED-WARNING.txt`**: A warning file present if the `--no-redact` flag was used.
100 |
101 | ### 9. Metadata (`metadata.txt`)
102 |
103 | A summary of the collection process and environment.
104 |
105 | - Collection timestamp.
106 | - Bundle name and configuration.
107 | - Kubernetes version and context.
108 | - A summary of collected resources.
109 | - A security warning if redaction was disabled.
110 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/scc-operator-collector/analyzer.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # SCC Operator Support Bundle Analyzer
4 | # Analyzes a collected support bundle and formats it for human readability.
5 |
6 | set -e
7 |
8 | # Colors for output
9 | RED='\033[0;31m'
10 | GREEN='\033[0;32m'
11 | YELLOW='\033[0;33m'
12 | NC='\033[0m' # No Color
13 |
14 | # Usage information
15 | usage() {
16 | cat <
18 |
19 | Analyzes a support bundle created by the collector.sh script.
20 | This script is intended to be run on a workstation with jq and yq installed.
21 |
22 | OPTIONS:
23 | -h, --help Show this help message
24 |
25 | EXAMPLES:
26 | # Analyze a support bundle directory
27 | $(basename "$0") scc-support-bundle-20231027-123456
28 | EOF
29 | exit 1
30 | }
31 |
32 | # Logging functions
33 | log_info() {
34 | echo -e "${GREEN}[INFO]${NC} $1"
35 | }
36 |
37 | log_warn() {
38 | echo -e "${YELLOW}[WARN]${NC} $1" >&2
39 | }
40 |
41 | log_error() {
42 | echo -e "${RED}[ERROR]${NC} $1" >&2
43 | }
44 |
45 | # Parse command line arguments
46 | while [[ $# -gt 0 ]]; do
47 | case $1 in
48 | -h|--help)
49 | usage
50 | ;;
51 | *)
52 | if [[ -z "$BUNDLE_DIR" ]]; then
53 | BUNDLE_DIR="$1"
54 | else
55 | log_error "Unknown option: $1"
56 | usage
57 | fi
58 | shift
59 | ;;
60 | esac
61 | done
62 |
63 | if [[ -z "$BUNDLE_DIR" ]]; then
64 | log_error "Bundle directory not specified."
65 | usage
66 | fi
67 |
68 | if [[ ! -d "$BUNDLE_DIR" ]]; then
69 | log_error "Bundle directory not found: $BUNDLE_DIR"
70 | exit 1
71 | fi
72 |
73 | # Check if jq and yq are available
74 | if ! command -v jq &> /dev/null; then
75 | log_error "jq not found. Please install jq and try again."
76 | exit 1
77 | fi
78 | if ! command -v yq &> /dev/null; then
79 | log_error "yq not found. Please install yq and try again."
80 | exit 1
81 | fi
82 |
83 | # Function to process secrets for readability
84 | process_secret() {
85 | local input_file="$1"
86 | local secret_name="$2"
87 | local output_file="$3"
88 |
89 | log_info " - Processing secret: $secret_name"
90 |
91 | # Base64 decode all data fields and convert to stringData
92 | local secret_json
93 | secret_json=$(yq eval '(select(.kind == "Secret" and .data) | .stringData = .data | del(.data) | .stringData |= with_entries(.value |= @base64d)) // .' -o=json "$input_file")
94 |
95 | # Special handling for metrics secret to format the payload
96 | if [[ "$secret_name" == "rancher-scc-metrics" ]]; then
97 | # Extract the payload, pretty-print it if it's JSON, and update the secret
98 | local payload_content
99 | payload_content=$(echo "$secret_json" | jq -r '.stringData.payload // ""')
100 |
101 | if [[ -n "$payload_content" ]]; then
102 | local pretty_payload
103 | # Try to pretty-print; if it's not valid JSON, use the original content
104 | pretty_payload=$(echo "$payload_content" | jq '.' 2>/dev/null || echo "$payload_content")
105 |
106 | # Update the JSON with the new pretty-printed payload string
107 | secret_json=$(echo "$secret_json" | jq --arg p "$pretty_payload" '.stringData.payload = $p')
108 | fi
109 |
110 | # Convert to YAML, styling the payload as a multi-line literal block
111 | echo "$secret_json" | yq eval '.stringData.payload style="literal" | .' -P - > "$output_file"
112 | else
113 | # For all other secrets, just convert to YAML
114 | echo "$secret_json" | yq eval -P - > "$output_file"
115 | fi
116 | }
117 |
118 | # Main analysis process
119 | main() {
120 | log_info "Starting support bundle analysis for: ${BUNDLE_DIR}"
121 |
122 | local secrets_dir="${BUNDLE_DIR}/secrets"
123 | if [[ -d "$secrets_dir" ]]; then
124 | log_info "Processing secrets..."
125 | local processed_secrets_dir="${BUNDLE_DIR}/processed-secrets"
126 | mkdir -p "$processed_secrets_dir"
127 |
128 | for secret_file in "$secrets_dir"/secret-*.yaml; do
129 | if [[ -f "$secret_file" ]]; then
130 | local secret_name
131 | secret_name=$(basename "$secret_file" | sed -e 's/^secret-//' -e 's/\.yaml$//')
132 | process_secret "$secret_file" "$secret_name" "${processed_secrets_dir}/secret-${secret_name}.yaml"
133 | fi
134 | done
135 | fi
136 |
137 | log_info "${GREEN}Analysis complete! See the 'processed-secrets' directory for readable secrets.${NC}"
138 | }
139 |
140 | # Run main function
141 | main
142 |
--------------------------------------------------------------------------------
/extended-rancher-2-cleanup/extended-cleanup-rancher2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Cleanup for nodes provisioned using the RKE1 distribution
4 | # Note, for RKE2 and K3s use the uninstall script deployed on the node during install.
5 |
6 | # Directories to cleanup
7 | CLEANUP_DIRS=(/etc/ceph /etc/cni /etc/kubernetes /opt/cni /run/secrets/kubernetes.io /run/calico /run/flannel /var/lib/calico /var/lib/weave /var/lib/etcd /var/lib/cni /var/lib/kubelet /var/lib/rancher/rke/log /var/log/containers /var/log/pods /var/run/calico)
8 |
9 | # Interfaces to cleanup
10 | CLEANUP_INTERFACES=(flannel.1 cni0 tunl0 weave datapath vxlan-6784)
11 |
12 | run() {
13 |
14 | CONTAINERS=$(docker ps -qa)
15 | if [[ -n ${CONTAINERS} ]]
16 | then
17 | cleanup-containers
18 | else
19 | techo "No containers exist, skipping container cleanup..."
20 | fi
21 | cleanup-dirs
22 | cleanup-interfaces
23 | VOLUMES=$(docker volume ls -q)
24 | if [[ -n ${VOLUMES} ]]
25 | then
26 | cleanup-volumes
27 | else
28 | techo "No volumes exist, skipping container volume cleanup..."
29 | fi
30 | if [[ ${DELETE_IMAGES} -eq 1 ]]
31 | then
32 | IMAGES=$(docker images -q)
33 | if [[ -n ${IMAGES} ]]
34 | then
35 | cleanup-images
36 | else
37 | techo "No images exist, skipping container image cleanup..."
38 | fi
39 | fi
40 | if [[ -z ${SKIP_FLUSH_IPTABLES} ]]
41 | then
42 | flush-iptables
43 | else
44 | techo "Skipping flush of iptables rules..."
45 | fi
46 | techo "Done!"
47 |
48 | }
49 |
50 | cleanup-containers() {
51 |
52 | techo "Removing containers..."
53 | docker rm -f $(docker ps -qa)
54 |
55 | }
56 |
57 | cleanup-dirs() {
58 |
59 | techo "Unmounting filesystems..."
60 | for mount in $(mount | grep '/var/lib/kubelet' | awk '{ print $3 }')
61 | do
62 | umount -f $mount
63 | done
64 |
65 | if [ -n "${DELETE_SNAPSHOTS}" ]
66 | then
67 | techo "Removing etcd snapshots..."
68 | rm -rf /opt/rke
69 | fi
70 |
71 | techo "Removing directories..."
72 | for DIR in "${CLEANUP_DIRS[@]}"
73 | do
74 | techo "Removing $DIR"
75 | rm -rf $DIR
76 | done
77 |
78 | }
79 |
80 | cleanup-images() {
81 |
82 | techo "Removing images..."
83 | docker rmi -f $(docker images -q)
84 |
85 | }
86 |
87 | cleanup-interfaces() {
88 |
89 | techo "Removing interfaces..."
90 | for INTERFACE in "${CLEANUP_INTERFACES[@]}"
91 | do
92 | if $(ip link show ${INTERFACE} > /dev/null 2>&1)
93 | then
94 | techo "Removing $INTERFACE"
95 | ip link delete $INTERFACE
96 | fi
97 | done
98 |
99 | }
100 |
101 | cleanup-volumes() {
102 |
103 | techo "Removing volumes..."
104 | docker volume rm $(docker volume ls -q)
105 |
106 | }
107 |
108 | flush-iptables() {
109 |
110 | techo "Flushing iptables..."
111 | iptables -F -t nat
112 | iptables -X -t nat
113 | iptables -F -t mangle
114 | iptables -X -t mangle
115 | iptables -F
116 | iptables -X
117 | techo "Restarting Docker..."
118 | if systemctl list-units --full -all | grep -q docker.service
119 | then
120 | systemctl restart docker
121 | else
122 | /etc/init.d/docker restart
123 | fi
124 |
125 | }
126 |
127 | help() {
128 |
129 | echo "Rancher 2.x extended cleanup
130 | Usage: bash extended-cleanup-rancher2.sh [ -f -i -s ]
131 |
132 | All flags are optional
133 |
134 | -f | --skip-iptables Skip flush of iptables rules
135 | -i | --delete-images Cleanup all container images
136 | -s | --delete-snapshots Cleanup all etcd snapshots
137 | -h This help menu
138 |
139 | !! Warning, this script flushes iptables rules, removes containers, and all data specific to Kubernetes and Rancher
140 | !! Docker will be restarted when flushing iptables rules
141 | !! Backup data as needed before running this script
142 | !! Use at your own risk"
143 |
144 | }
145 |
146 | timestamp() {
147 |
148 | date "+%Y-%m-%d %H:%M:%S"
149 |
150 | }
151 |
152 | techo() {
153 |
154 | echo "$(timestamp): $*"
155 |
156 | }
157 |
158 | # Check if we're running as root.
159 | if [[ $EUID -ne 0 ]]
160 | then
161 | techo "This script must be run as root"
162 | exit 1
163 | fi
164 |
165 | while test $# -gt 0
166 | do
167 | case ${1} in
168 | -f|--skip-iptables)
169 | shift
170 | SKIP_FLUSH_IPTABLES=1
171 | ;;
172 | -i|--delete-images)
173 | shift
174 | DELETE_IMAGES=1
175 | ;;
176 | -s|--delete-snapshots)
177 | shift
178 | DELETE_SNAPSHOTS=1
179 | ;;
180 | h)
181 | help && exit 0
182 | ;;
183 | *)
184 | help && exit 0
185 | esac
186 | done
187 |
188 | # Run the cleanup
189 | run
--------------------------------------------------------------------------------
/adjust-downstream-webhook/adjust-downstream-webhook.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | usage() {
4 | cat << EOF
5 | usage: bash adjust-downstream-webhook.sh [--insecure-skip-tls-verify]
6 |
7 | This script adjusts the rancher-webhook chart release in all clusters managed by Rancher (excluding the local cluster).
8 | Depending on the version of Rancher, it either deletes the downstream webhook release, adjusts its version and restarts, or does nothing.
9 | Requires kubectl and helm to be installed and available on \$PATH.
10 | Requires rancher-charts helm repo. If you don't have it, please add: helm repo add rancher-charts https://charts.rancher.io && helm repo update
11 |
12 | RANCHER_URL without a trailing slash must be set with the server URL of Rancher.
13 | RANCHER_TOKEN must be set with an admin token generated with no scope.
14 | To ignore TLS verification, set --insecure-skip-tls-verify.
15 |
16 | Users also need to ensure they have the rancher-charts repo in the local Helm index.
17 | EOF
18 | }
19 |
20 | if [ "$1" == "-h" ]; then
21 | usage
22 | exit 0
23 | fi
24 |
25 | delete_webhook() {
26 | cluster="$1"
27 | current_chart=$(helm list -n cattle-system -l name=rancher-webhook | tail -1 | cut -f 6)
28 | echo "Deleting $current_chart from cluster $cluster."
29 | helm uninstall rancher-webhook -n cattle-system
30 | }
31 |
32 | replace_webhook() {
33 | cluster="$1"
34 | new_version="$2"
35 |
36 | echo "Updating the agent to make it remember the min version $new_version of rancher-webhook, so that it can deploy it when needed in the future in cluster $cluster."
37 | kubectl set env -n cattle-system deployment/cattle-cluster-agent CATTLE_RANCHER_WEBHOOK_MIN_VERSION="$new_version"
38 |
39 | helm get values -n cattle-system rancher-webhook -o yaml > current_values.yaml
40 | echo "Re-installing rancher-webhook to use $new_version in cluster $cluster."
41 | helm upgrade --install rancher-webhook rancher-charts/rancher-webhook -n cattle-system --version "$new_version" --values current_values.yaml
42 | rm -f current_values.yaml
43 | }
44 |
45 | adjust_webhook() {
46 | cluster="$1"
47 | rancher_version="$2"
48 |
49 | if [[ "$rancher_version" =~ 2\.6\.13 ]]; then
50 | replace_webhook "$cluster" 1.0.9+up0.2.10
51 | elif [[ "$rancher_version" =~ 2\.6\.[0-9]$ ]] || [[ "$rancher_version" =~ 2\.6\.1[0-2]$ ]]; then
52 | delete_webhook "$cluster"
53 | elif [[ "$rancher_version" =~ 2\.7\.[0-1]$ ]]; then
54 | delete_webhook "$cluster"
55 | elif [[ "$rancher_version" =~ 2\.7\.2 ]]; then
56 | replace_webhook "$cluster" 2.0.2+up0.3.2
57 | elif [[ "$rancher_version" =~ 2\.7\.3 ]]; then
58 | replace_webhook "$cluster" 2.0.3+up0.3.3
59 | elif [[ "$rancher_version" =~ 2\.7\.4 ]]; then
60 | replace_webhook "$cluster" 2.0.4+up0.3.4
61 | elif [[ "$rancher_version" =~ 2\.[7-9]\..* ]]; then
62 | # This matches anything else above 2.7, including 2.8.x and 2.9.x.
63 | echo "No need to delete rancher-webhook, given Rancher version $rancher_version."
64 | echo "Ensuring CATTLE_RANCHER_WEBHOOK_MIN_VERSION is set to an empty string."
65 | kubectl set env -n cattle-system deployment/cattle-cluster-agent CATTLE_RANCHER_WEBHOOK_MIN_VERSION=''
66 | else
67 | echo "Nothing to do, given Rancher version $rancher_version."
68 | fi
69 | }
70 |
71 | if [ -n "$DEBUG" ]
72 | then
73 | set -x
74 | fi
75 |
76 | if [[ -z "$RANCHER_TOKEN" || -z "$RANCHER_URL" ]]
77 | then
78 | echo "Required environment variables aren't properly set."
79 | usage
80 | exit 1
81 | fi
82 |
83 | kubeconfig="
84 | apiVersion: v1
85 | kind: Config
86 | clusters:
87 | - name: \"local\"
88 | cluster:
89 | server: \"$RANCHER_URL\"
90 |
91 | users:
92 | - name: \"local\"
93 | user:
94 | token: \"$RANCHER_TOKEN\"
95 |
96 |
97 | contexts:
98 | - name: \"local\"
99 | context:
100 | user: \"local\"
101 | cluster: \"local\"
102 |
103 | current-context: \"local\"
104 | "
105 |
106 | echo "$kubeconfig" >> .temp_kubeconfig.yaml
107 | # helm will complain if these are group/world readable
108 | chmod g-r .temp_kubeconfig.yaml
109 | chmod o-r .temp_kubeconfig.yaml
110 | export KUBECONFIG="$(pwd)/.temp_kubeconfig.yaml"
111 |
112 | if [[ "$1" == "--insecure-skip-tls-verify" ]]
113 | then
114 | kubectl config set clusters.local.insecure-skip-tls-verify true
115 | fi
116 |
117 | rancher_version=$(kubectl get setting server-version -o jsonpath='{.value}')
118 | if [[ -z "$rancher_version" ]]; then
119 | echo 'Failed to look up Rancher version.'
120 | exit 1
121 | fi
122 |
123 | clusters=$(kubectl get clusters.management.cattle.io -o jsonpath="{.items[*].metadata.name}")
124 | for cluster in $clusters
125 | do
126 | if [ "$cluster" == "local" ]
127 | then
128 | echo "Skipping deleting rancher-webhook in the local cluster."
129 | continue
130 | fi
131 | kubectl config set clusters.local.server "$RANCHER_URL/k8s/clusters/$cluster"
132 | adjust_webhook "$cluster" "$rancher_version"
133 | done
134 |
135 | rm .temp_kubeconfig.yaml
136 |
--------------------------------------------------------------------------------
/collection/longhorn/README.md:
--------------------------------------------------------------------------------
1 | # Longhorn Support Bundle Script
2 |
3 | ## Notes
4 |
5 | This script is intended to collect diagnostic information from Kubernetes clusters running Longhorn, including:
6 | - Logs from all containers in the `longhorn-system` namespace
7 | - YAML definitions of Kubernetes resources in the `longhorn-system` namespace
8 | - Longhorn Custom Resource Definitions and their instances
9 | - Kubernetes cluster information and node details
10 | - Events and metrics related to the cluster and Longhorn operations
11 |
12 | This script helps gather comprehensive information needed for troubleshooting Longhorn-related issues, while ensuring no sensitive information like Kubernetes Secrets is collected.
13 |
14 | Output will be written to the current directory as a tar.gz archive named `longhorn-support-bundle-YYYY-MM-DD-HH-MM-SS.tar.gz`.
15 |
16 | ## Usage
17 |
18 | The script needs to be downloaded and run by a user with sufficient permissions to access the Kubernetes cluster via `kubectl`.
19 |
20 | ### Download and run the script
21 | * Save the script as: `longhorn-support-bundle.sh`
22 |
23 | Using `wget`:
24 | ```bash
25 | wget https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/longhorn/run.sh
26 | ```
27 | Using `curl`:
28 | ```bash
29 | curl -OLs https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/longhorn/run.sh
30 | ```
31 |
32 | * Make the script executable:
33 | ```bash
34 | chmod +x longhorn-support-bundle.sh
35 | ```
36 |
37 | * Run the script:
38 | ```bash
39 | bash ./longhorn-support-bundle.sh
40 | ```
41 |
42 | ## Bundle Contents
43 |
44 | The script creates a support bundle with the following structure:
45 |
46 | ```
47 | longhorn-support-bundle-YYYY-MM-DD-HH-MM-SS/
48 | ├── logs/ # Pod logs from longhorn-system
49 | │ └── longhorn-system/
50 | │ └── [pod-name]/
51 | │ ├── [container-name].log
52 | │ └── [container-name]-previous.log
53 | │
54 | ├── yamls/ # YAML definitions of various resources
55 | │ ├── cluster/ # Cluster-scoped resources
56 | │ │ └── kubernetes/
57 | │ │ ├── nodes.yaml # All nodes in the cluster
58 | │ │ ├── events.yaml # Cluster-wide events
59 | │ │ └── version.yaml # Kubernetes version information
60 | │ │
61 | │ └── namespaced/ # Namespace-scoped resources
62 | │ └── longhorn-system/
63 | │ ├── kubernetes/ # Standard Kubernetes resources
64 | │ │ ├── pods.yaml
65 | │ │ ├── services.yaml
66 | │ │ ├── deployments.yaml
67 | │ │ ├── daemonsets.yaml
68 | │ │ ├── statefulsets.yaml
69 | │ │ ├── configmaps.yaml
70 | │ │ ├── persistentvolumeclaims.yaml
71 | │ │ ├── replicasets.yaml
72 | │ │ └── events.yaml # Namespace-specific events
73 | │ │
74 | │ └── longhorn/ # Longhorn CRDs
75 | │ ├── engines.yaml
76 | │ ├── volumes.yaml
77 | │ ├── nodes.yaml
78 | │ └── ...
79 | │
80 | ├── nodes/ # Per-node information
81 | │ └── [node-name]/
82 | │ ├── node.yaml # Complete node YAML definition
83 | │ ├── description.txt # Output of kubectl describe node
84 | │ ├── metrics.txt # Resource usage metrics
85 | │ ├── capacity.json # Node capacity information
86 | │ └── allocatable.json # Node allocatable resources
87 | │
88 | └── external/ # For additional external resources
89 | ```
90 |
91 | ## Information Collected
92 |
93 | The script collects the following information:
94 |
95 | 1. **Kubernetes Resources in the `longhorn-system` namespace:**
96 | - Pods, Services, Deployments, DaemonSets, StatefulSets
97 | - ConfigMaps, PersistentVolumeClaims, ReplicaSets
98 | - Does NOT include Secrets
99 |
100 | 2. **Longhorn Custom Resources:**
101 | - All Custom Resource Definitions with the API group `longhorn.io`
102 | - Instances of these CRDs in the `longhorn-system` namespace
103 |
104 | 3. **Pod Logs:**
105 | - Current and previous (if available) logs for all containers in all pods in the `longhorn-system` namespace
106 |
107 | 4. **Cluster Information:**
108 | - Kubernetes version
109 | - Cluster-wide events
110 | - Node information
111 |
112 | 5. **Per-Node Details:**
113 | - Complete YAML definition
114 | - Detailed node description
115 | - Resource capacity and allocation
116 | - Current metrics (if available)
117 |
118 | ## Requirements
119 |
120 | - A Kubernetes cluster with Longhorn installed
121 | - `kubectl` installed and configured to access your cluster
122 | - Sufficient permissions to read resources in the `longhorn-system` namespace and cluster-level resources
123 |
124 | ## Privacy and Security
125 |
126 | This script does not collect Kubernetes Secrets or sensitive credentials. However, be aware that logs and configuration data may contain sensitive information. Review the bundle before sharing it externally.
--------------------------------------------------------------------------------
/collection/rancher/v2.x/scc-operator-collector/README.md:
--------------------------------------------------------------------------------
1 | # SCC Operator Support Bundle Collector & Analyzer
2 |
3 | This directory contains two scripts for troubleshooting the SUSE Customer Center (SCC) Operator in a Rancher environment:
4 | 1. `scc-operator-collector.sh`: Gathers diagnostic information from the cluster.
5 | 2. `analyzer.sh`: Processes the collected bundle for easier local analysis.
6 |
7 | ---
8 |
9 | ## Collector (`scc-operator-collector.sh`)
10 |
11 | This script collects various details about the operator, its custom resources, and the Kubernetes environment it runs in.
12 |
13 | ### How to Run the Collector
14 |
15 | The script can be run directly from your local machine, provided you have `kubectl` installed and configured to connect to the target Kubernetes cluster.
16 | If run on a k8s cluster node, the script only needs to be run on a single node as results should not vary per-node.
17 |
18 | #### Prerequisites
19 |
20 | - `kubectl` installed and configured with access to the Rancher `local` cluster.
21 |
22 | #### Execution
23 |
24 | 1. **Download the script:**
25 | - Using `wget`:
26 | ```bash
27 | wget https://raw.githubusercontent.com/rancherlabs/support-tools/refs/heads/master/collection/rancher/v2.x/scc-operator-collector/scc-operator-collector.sh
28 | ```
29 | - Using `curl`:
30 | ```bash
31 | curl -O https://raw.githubusercontent.com/rancherlabs/support-tools/refs/heads/master/collection/rancher/v2.x/scc-operator-collector/scc-operator-collector.sh
32 | ```
33 |
34 | 2. **Run the collector:**
35 | ```bash
36 | bash scc-operator-collector.sh
37 | ```
38 | This will create a compressed `tar.gz` archive named `scc-support-bundle-.tar.gz` in the current directory. This bundle is safe to share with SUSE support.
39 |
40 | ### Collector Command-line Options
41 |
42 | | Option | Description | Default |
43 | | --- | --- | --- |
44 | | `--no-redact` | Disable redaction of sensitive information in secrets. **WARNING:** The bundle will contain sensitive data. | `false` |
45 | | `--output ` | Output format. Can be `tar` (a `tar.gz` archive) or `folder` (a directory). | `tar` |
46 | | `--namespace ` | The namespace where the SCC Operator is running. | `cattle-scc-system` |
47 | | `--lease-namespace `| The namespace where the operator's leader election lease is stored. | `kube-system` |
48 | | `--name ` | A custom name for the support bundle. | `scc-support-bundle-`|
49 | | `-h`, `--help` | Show the help message. | |
50 |
51 | #### Examples
52 |
53 | - **Default collection (redacted, compressed archive):**
54 | ```bash
55 | bash scc-operator-collector.sh
56 | ```
57 |
58 | - **Collect into a folder for local inspection:**
59 | ```bash
60 | bash scc-operator-collector.sh --output folder
61 | ```
62 |
63 | - **Collect without redacting secrets (for local debugging only):**
64 | ```bash
65 | bash scc-operator-collector.sh --no-redact
66 | ```
67 | *Note: This forces the output to `folder` format for security reasons.*
68 |
69 | ### Security and Redaction
70 |
71 | By default, the collector redacts sensitive data within secrets to prevent accidental exposure of credentials. When the `--no-redact` flag is used, this redaction is skipped.
72 |
73 | **WARNING:** Bundles created with `--no-redact` contain sensitive credentials and should **NEVER** be shared or uploaded to support tickets. Use the `analyzer.sh` script for local debugging of unredacted bundles.
74 |
75 | ### What is Collected?
76 |
77 | For a detailed list of the information gathered by the collector, please see [collection-details.md](./collection-details.md).
78 |
79 | ---
80 |
81 | ## Analyzer (`analyzer.sh`)
82 |
83 | This script processes a support bundle created by the collector, making it easier to read for local debugging. Its primary function is to decode secrets into a human-readable format.
84 |
85 | ### How to Run the Analyzer
86 |
87 | The analyzer is designed to be run on a workstation against a support bundle that has been unarchived or collected using the `folder` output format.
88 |
89 | #### Prerequisites
90 |
91 | - `jq` installed.
92 | - `yq` installed.
93 | - A support bundle directory (not a `.tar.gz` file).
94 |
95 | #### Execution
96 |
97 | 1. **Download the script:**
98 | - Using `wget`:
99 | ```bash
100 | wget https://raw.githubusercontent.com/rancher/support-tools/master/collection/rancher/v2.x/scc-operator-collector/analyzer.sh
101 | ```
102 | - Using `curl`:
103 | ```bash
104 | curl -O https://raw.githubusercontent.com/rancher/support-tools/master/collection/rancher/v2.x/scc-operator-collector/analyzer.sh
105 | ```
106 |
107 | 2. **Run the analyzer against a bundle directory:**
108 | ```bash
109 | bash analyzer.sh scc-support-bundle-
110 | ```
111 |
112 | ### What the Analyzer Does
113 |
114 | The script creates a new `processed-secrets` directory inside the bundle directory. Within this new directory, it:
115 | - Decodes all `data` fields from secrets and displays them as human-readable `stringData`.
116 | - For the `rancher-scc-metrics` secret, it pretty-prints the JSON `payload` for easier review.
117 |
118 | This allows you to easily inspect secret contents without manual `base64` decoding, which is especially useful when reviewing bundles locally.
119 |
--------------------------------------------------------------------------------
/windows-agent-strict-verify/README.md:
--------------------------------------------------------------------------------
1 | # Enabling agent strict TLS verification on existing Windows nodes
2 |
3 | In certain conditions, Windows nodes will not respect the Agent TLS Mode value set on the Rancher server. This setting was implemented in Rancher 2.9.0 and 2.8.6
4 |
5 | Windows nodes will not respect this setting if the following two conditions are true
6 |
7 | 1. The node was provisioned using a Rancher version older than 2.9.2 or 2.8.8, and continues to be used after a Rancher upgrade to 2.9.2, 2.8.8, or greater
8 | 2. The node is running a version of rke2 _older_ than the August 2024 patches. (i.e. any version _lower_ than v1.30.4, v1.29.8, v1.28.13, v1.27.16.)
9 |
10 | ## Workaround
11 |
12 | In order to retroactively enable strict TLS verification on Windows nodes, the following process must be followed. A Powershell script, `update-node.ps1` has been included to automate some parts of this process, however some steps (such as retrieving the required credentials used by the script) must be done manually.
13 |
14 |
15 | This process needs to be repeated for each Windows node joined to the cluster, but does not need to be done for newly provisioned nodes after Rancher has been upgraded to at least 2.9.2 or 2.8.8 - even if the rke2 version is older than the August patches. In scenarios where it is possible / safe to reprovision the impacted Windows nodes, this process may not be needed.
16 |
17 | 1. Stop the `rancher-wins` service using the `Stop-Service` PowerShell Command (`Stop-Service rancher-wins`)
18 |
19 | 2. Update the version of `wins.exe` running on the node. This can either be done manually, or via the `update-node.ps1` PowerShell script by passing the `-DownloadWins` flag
20 | 1. If a manual approach is taken, download the latest [version of rancher-wins from GitHub](https://github.com/rancher/wins/releases) (at least version `0.4.18`) and place the updated binary in the `c:/usr/local/bin` and `c:/Windows` directories, replacing the existing binaries.
21 |
22 | 2. If the automatic approach is taken, then you must include the `-DownloadWins` flag when invoking `update-node.ps1`. The version of `rancher-wins` packaged within your Rancher server will then be downloaded.
23 | + You must ensure that you are running a version of Rancher which embeds at _least_ `rancher-wins` `v0.4.18`. This version is included in Rancher v2.9.2, v2.8.8, and above.
24 | + Refer to the [`Obtaining the CATTLE_TOKEN and CATTLE_SERVER variables`](#obtaining-the-cattle_token-and-cattle_server-variables) section below to retrieve the required `CATTLE_TOKEN` and `CATTLE_SERVER` variables.
25 |
26 | 3. Manually update the `rancher-wins` config file to enable strict tls verification
27 | 1. This file is located in `c:/etc/rancher/wins/config`.
28 | 1. At the root level (i.e. a new line just before the `system-agent` field) add the following value `agentStrictTLSMode: true`
29 | 2. An [example configuration file](#example-updated-wins-config-file) can be seen at the bottom of this file
30 |
31 | 4. If needed, regenerate the rancher connection file
32 | 1. To determine if you need to do this, look at the `/var/lib/rancher/agent/rancher2_connection_info.json` file. If you intend to use strict validation, this file must contain a valid `ca-certificate-data` field.
33 | 2. If this field is missing
34 | 1. Refer to the [`Obtaining the CATTLE_TOKEN and CATTLE_SERVER variables`](#obtaining-the-cattle_token-and-cattle_server-variables) section to retrieve the required `CATTLE_TOKEN` and `CATTLE_SERVER` parameters
35 | 2. Create a new file containing the `update-node.ps1` script and run it, ensuring you properly pass the `CATTLE_SERVER` value to the `-RancherServerURL` flag, and the `CATTLE_TOKEN` value to the `-Token` flag.
36 | 1. Depending on whether you wish to manually update `rancher-wins`, run one of the following two commands
37 | 1. `./update-node.ps1 -RancherServerURL $CATTLE_SERVER -Token $CATTLE_TOKEN`
38 | 2. `./update-node.ps1 -RancherServerURL $CATTLE_SERVER -Token $CATTLE_TOKEN -DownloadWins`
39 | 2. Confirm that the `rancher2_connection_info.json` file contains the correct CA data.
40 |
41 | 5. Confirm the proper version of `rancher-wins` has been installed by running `win.exe --version`
42 | 6. Restart the node (`Restart-Computer`).
43 | 1. If the node is running an RKE2 version older than the August patches, you **must** restart the node otherwise pod networking will be impacted.
44 |
45 | ### Obtaining the `CATTLE_TOKEN` and `CATTLE_SERVER` variables
46 |
47 | - You must be a cluster administrator or have an account permitted to view cluster secrets in order to use this script, as the `CATTLE_TOKEN` is stored in a Kubernetes secret. You cannot simply generate an API token using the Rancher UI.
48 | - To obtain the `CATTLE_TOKEN` and `CATTLE_SERVER` values using the Rancher UI
49 | 1. Open Rancher's Cluster Explorer UI for the cluster which contains the relevant Windows nodes.
50 | 2. In the left hand section, under `More Resources`, go to `Core`, and then finally, `Secrets`.
51 | 3. Find the secret named `stv-aggregation`, and copy the `CATTLE_SERVER` and `CATTLE_TOKEN` fields.
52 | 4. Pass `CATTLE_TOKEN` to the `-Token` flag, and `CATTLE_SERVER` to the `-RancherServerURL` flag.
53 | - To obtain the `CATTLE_TOKEN` and `CATTLE_SERVER` values using kubectl
54 | 1. `kubectl get secret -n cattle-system stv-aggregation --template={{.data.CATTLE_TOKEN}} | base64 -d`
55 | 2. `kubectl get secret -n cattle-system stv-aggregation --template={{.data.CATTLE_SERVER}} | base64 -d`
56 |
57 | ### Example updated wins config file
58 |
59 | ```yaml
60 | # This file is located at c:/etc/rancher/wins/config
61 | white_list:
62 | processPaths:
63 | - C:/etc/rancher/wins/powershell.exe
64 | - C:/etc/rancher/wins/wins-upgrade.exe
65 | - C:/etc/wmi-exporter/wmi-exporter.exe
66 | - C:/etc/windows-exporter/windows-exporter.exe
67 | proxyPorts:
68 | - 9796
69 | agentStrictTLSMode: true
70 | systemagent:
71 | workDirectory: C:/var/lib/rancher/agent/work
72 | appliedPlanDirectory: C:/var/lib/rancher/agent/applied
73 | remoteEnabled: true
74 | preserveWorkDirectory: false
75 | connectionInfoFile: C:/var/lib/rancher/agent/rancher2_connection_info.json
76 | csi-proxy:
77 | url: https://haffel-rancher.cp-dev.rancher.space/assets/csi-proxy-%[1]s.tar.gz
78 | version: v1.1.3
79 | kubeletPath: C:/bin/kubelet.exe
80 | ```
--------------------------------------------------------------------------------
/windows-access-control-lists/README.md:
--------------------------------------------------------------------------------
1 | # Securing file ACLs on RKE2 Windows nodes
2 |
3 | In certain cases, Windows nodes joined to RKE2 clusters may not have appropriate Access Control Lists (ACLs) configured for important files and directories, allowing improper access by unprivileged user accounts such as `NT AUTHORITY\Authenticated Users`. This occurs in the following configurations
4 |
5 | + Standalone RKE2 nodes (i.e. RKE2 nodes **_not_** provisioned using Rancher) which run on Windows that were _initially_ provisioned using a version older than `1.27.15`, `1.28.11`, `1.29.6`, or `1.30.2`
6 |
7 | + Rancher provisioned RKE2 nodes that run on Windows that were created using a Rancher version older than `2.9.3` or `2.8.9`.
8 |
9 | This issue has been resolved for standalone RKE2 clusters starting with versions `1.27.15`, `1.28.1`, `1.29.6`, `1.30.2` and above. Rancher `2.9.3`, `2.8.9`, and above, have also been updated to properly configure ACLs on Windows nodes during initial provisioning as well as to retroactively update ACLs on existing nodes.
10 |
11 | If you are maintaining a standalone RKE2 Windows cluster which was provisioned using a version of RKE2 older than `1.27.15`, `1.28.11`, `1.29.6`, `1.30.2`, or if you maintain a Rancher provisioned RKE2 Windows cluster but are unable to upgrade to at least `2.9.3` or `2.8.9`, then you can use the below powershell script to manually update the relevant ACLs.
12 |
13 | This script only needs to be run once per node. If desired, additional files and directories can be secured by updating the `$restrictedPaths` variable. After running the script, only the `NT AUTHORITY\SYSTEM` and `BUILTIN\Administrators` group will have access to the specified files and directories. Directories will be configured with inheritance enabled to ensure child files and directories utilize the same restrictive ACL.
14 |
15 | Add the below script to a PowerShell file and run it using the PowerShell console as an Administrator.
16 |
17 | ```powershell
18 | function Set-RestrictedPermissions {
19 | [CmdletBinding()]
20 | param (
21 | [Parameter(Mandatory=$true)]
22 | [string]
23 | $Path,
24 | [Parameter(Mandatory=$true)]
25 | [Boolean]
26 | $Directory
27 | )
28 | $Owner = "BUILTIN\Administrators"
29 | $Group = "NT AUTHORITY\SYSTEM"
30 | $acl = Get-Acl $Path
31 |
32 | foreach ($rule in $acl.GetAccessRules($true, $true, [System.Security.Principal.SecurityIdentifier])) {
33 | $acl.RemoveAccessRule($rule) | Out-Null
34 | }
35 | $acl.SetAccessRuleProtection($true, $false)
36 | $acl.SetOwner((New-Object System.Security.Principal.NTAccount($Owner)))
37 | $acl.SetGroup((New-Object System.Security.Principal.NTAccount($Group)))
38 |
39 | Set-FileSystemAccessRule -Directory $Directory -acl $acl
40 |
41 | $FullPath = Resolve-Path $Path
42 | Write-Host "Setting restricted ACL on $FullPath"
43 | Set-Acl -Path $Path -AclObject $acl
44 | }
45 |
46 | function Set-FileSystemAccessRule() {
47 | [CmdletBinding()]
48 | param (
49 | [Parameter(Mandatory=$true)]
50 | [Boolean]
51 | $Directory,
52 | [Parameter(Mandatory=$false)]
53 | [System.Security.AccessControl.ObjectSecurity]
54 | $acl
55 | )
56 | $users = @(
57 | $acl.Owner,
58 | $acl.Group
59 | )
60 | if ($Directory -eq $true) {
61 | foreach ($user in $users) {
62 | $rule = New-Object System.Security.AccessControl.FileSystemAccessRule(
63 | $user,
64 | [System.Security.AccessControl.FileSystemRights]::FullControl,
65 | [System.Security.AccessControl.InheritanceFlags]'ObjectInherit,ContainerInherit',
66 | [System.Security.AccessControl.PropagationFlags]::None,
67 | [System.Security.AccessControl.AccessControlType]::Allow
68 | )
69 | $acl.AddAccessRule($rule)
70 | }
71 | } else {
72 | foreach ($user in $users) {
73 | $rule = New-Object System.Security.AccessControl.FileSystemAccessRule(
74 | $user,
75 | [System.Security.AccessControl.FileSystemRights]::FullControl,
76 | [System.Security.AccessControl.AccessControlType]::Allow
77 | )
78 | $acl.AddAccessRule($rule)
79 | }
80 | }
81 | }
82 |
83 | function Confirm-ACL {
84 | [CmdletBinding()]
85 | param (
86 | [Parameter(Mandatory=$true)]
87 | [String]
88 | $Path
89 | )
90 | foreach ($a in (Get-Acl $path).Access) {
91 | $ref = $a.IdentityReference
92 | if (($ref -ne "BUILTIN\Administrators") -and ($ref -ne "NT AUTHORITY\SYSTEM")) {
93 | return $false
94 | }
95 | }
96 | return $true
97 | }
98 |
99 | $RKE2_DATA_DIR="c:\var\lib\rancher\rke2"
100 | $SYSTEM_AGENT_DIR="c:\var\lib\rancher\agent"
101 | $RANCHER_PROVISIONING_DIR="c:\var\lib\rancher\capr"
102 |
103 | $restrictedPaths = @(
104 | [PSCustomObject]@{
105 | Path = "c:\etc\rancher\wins\config"
106 | Directory = $false
107 | }
108 | [PSCustomObject]@{
109 | Path = "c:\etc\rancher\node\password"
110 | Directory = $false
111 | }
112 | [PSCustomObject]@{
113 | Path = "$SYSTEM_AGENT_DIR\rancher2_connection_info.json"
114 | Directory = $false
115 | }
116 | [PSCustomObject]@{
117 | Path = "c:\etc\rancher\rke2\config.yaml.d\50-rancher.yaml"
118 | Directory = $false
119 | }
120 | [PSCustomObject]@{
121 | Path = "c:\usr\local\bin\rke2.exe"
122 | Directory = $false
123 | }
124 | [PSCustomObject]@{
125 | Path = "$RANCHER_PROVISIONING_DIR"
126 | Directory = $true
127 | }
128 | [PSCustomObject]@{
129 | Path = "$SYSTEM_AGENT_DIR"
130 | Directory = $true
131 | }
132 | [PSCustomObject]@{
133 | Path = "$RKE2_DATA_DIR"
134 | Directory = $true
135 | }
136 | )
137 |
138 | foreach ($path in $restrictedPaths) {
139 | # Some paths will not exist on standalone RKE2 clusters
140 | if (-Not (Test-Path -Path $path.Path)) {
141 | continue
142 | }
143 |
144 | if (-Not (Confirm-ACL -Path $path.Path)) {
145 | Set-RestrictedPermissions -Path $path.Path -Directory $path.Directory
146 | } else {
147 | Write-Host "ACLs have been properly configured for the $($path.Path) $(if($path.Directory){ "directory" } else { "file" })"
148 | }
149 | }
150 | ```
151 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/systems-information-v2/README.md:
--------------------------------------------------------------------------------
1 | # Rancher 2.x Systems Summary v2
2 |
3 | The script runs as a pod in the Rancher Management (local) cluster and collects information about the clusters managed by Rancher. The script collects the following information:
4 |
5 | - Rancher server version and installation UUID
6 | - Details of all clusters managed by Rancher, including:
7 | - Cluster ID and name
8 | - Kubernetes version
9 | - Provider type
10 | - Creation timestamp
11 | - Nodes associated with each cluster
12 | - For each cluster, detailed information about each node, including:
13 | - Node ID and address
14 | - Role within the cluster
15 | - CPU and RAM capacity
16 | - Operating system and Docker version
17 | - Creation timestamp
18 | - Total count of nodes across all clusters
19 |
20 | ## How to use
21 |
22 | Run the following command to deploy the script as a pod in the Rancher Management (local) cluster:
23 |
24 | ```bash
25 | # Deploy the pod in the cluster
26 | kubectl apply -f https://raw.githubusercontent.com/rancherlabs/support-tools/master/collection/rancher/v2.x/systems-information-v2/deploy.yaml
27 |
28 | # Wait for the pod to reach Succeeded status
29 | while [[ $(kubectl get pod rancher-systems-summary-pod -n cattle-system -o 'jsonpath={..status.phase}') != "Succeeded" ]]; do
30 | echo "Waiting for rancher-systems-summary-pod to complete..."
31 | sleep 5
32 | done
33 |
34 | # Follow the logs from the pod
35 | kubectl logs -f pod/rancher-systems-summary-pod -n cattle-system
36 |
37 | # Clean up the pod
38 | kubectl delete pod/rancher-systems-summary-pod -n cattle-system
39 | ```
40 |
41 | > Note: It might take a few minutes for the pod to collect the information and display it in the logs. The script will exit after displaying the information, you should see `Total node count` at the end of the log output
42 |
43 | Example output:
44 |
45 | ```bash
46 | Rancher Systems Summary Report
47 | ==============================
48 | Run on Tue Oct 7 14:44:27 UTC 2025
49 |
50 | NAME READY STATUS RESTARTS AGE
51 | rancher-5d5896844-bkmlj 1/1 Running 2 (27d ago) 27d
52 | rancher-5d5896844-t8hvc 1/1 Running 6 (6d14h ago) 27d
53 | rancher-5d5896844-wcf7q 1/1 Running 1 (6d8h ago) 27d
54 | Rancher version: v2.12.1
55 | Rancher id: 57299729-c16b-4857-8a48-3a45f36b2b94
56 |
57 | Cluster Id Name K8s Version Provider Created Nodes
58 | c-4kt65 3nuc-harvester v1.32.4+rke2r1 harvester 2025-07-26T21:28:19Z
59 | c-hcrk7 observability v1.32.7+k3s1 k3s 2025-08-20T20:16:37Z
60 | c-m-sh4jmcxr rke2-harv v1.32.6+rke2r1 rke2 2025-08-05T20:04:21Z
61 | local local v1.32.6+k3s1 k3s 2025-07-26T21:23:04Z
62 |
63 | --------------------------------------------------------------------------------
64 | Cluster: 3nuc-harvester (c-4kt65)
65 | Node Id Address etcd Control Plane Worker CPU RAM OS Container Runtime Version Created
66 | machine-br42p 10.10.12.103,nuc-03 true true false 12 65544020Ki Harvester v1.5.1-rc2 containerd://2.0.4-k3s2 2025-07-26T21:30:26Z
67 | machine-f4zxg 10.10.12.101,nuc-01 true true false 12 65560396Ki Harvester v1.5.1-rc2 containerd://2.0.4-k3s2 2025-07-26T21:30:26Z
68 | machine-hqtmv 10.10.12.102,nuc-02 true true false 12 65544008Ki Harvester v1.5.1-rc2 containerd://2.0.4-k3s2 2025-07-26T21:30:26Z
69 | Node count: 3
70 |
71 | --------------------------------------------------------------------------------
72 | Cluster: observability (c-hcrk7)
73 | Node Id Address etcd Control Plane Worker CPU RAM OS Container Runtime Version Created
74 | machine-4j4rp 10.10.12.182,observability-02 true true false 4 16381888Ki SUSE Linux Micro 6.1 containerd://2.0.5-k3s2.32 2025-08-20T20:17:40Z
75 | machine-8bs8x 10.10.12.181,observability-01 true true false 4 16381892Ki SUSE Linux Micro 6.1 containerd://2.0.5-k3s2.32 2025-08-20T20:17:40Z
76 | machine-z5khp 10.10.12.183,observability-03 true true false 4 16381892Ki SUSE Linux Micro 6.1 containerd://2.0.5-k3s2.32 2025-08-20T20:17:40Z
77 | Node count: 3
78 |
79 | --------------------------------------------------------------------------------
80 | Cluster: rke2-harv (c-m-sh4jmcxr)
81 | Node Id Address etcd Control Plane Worker CPU RAM OS Container Runtime Version Created
82 | machine-29qwr 10.10.15.94,rke2-harv-workers-sm-xv9q4-k9lnh false false true 4 8137228Ki SUSE Linux Micro 6.1 containerd://2.0.5-k3s1 2025-08-25T15:38:35Z
83 | machine-f4hwq 10.10.15.80,rke2-harv-control-plane-92bsj-pf5tn true true false 2 4015184Ki SUSE Linux Micro 6.1 containerd://2.0.5-k3s1 2025-08-05T20:13:11Z
84 | machine-fjftz 10.10.15.93,rke2-harv-workers-sm-xv9q4-s688w false false true 4 8137228Ki SUSE Linux Micro 6.1 containerd://2.0.5-k3s1 2025-08-25T15:29:59Z
85 | machine-g6z62 10.10.15.77,rke2-harv-control-plane-92bsj-z6qcp true true false 2 4015184Ki SUSE Linux Micro 6.1 containerd://2.0.5-k3s1 2025-08-05T20:12:37Z
86 | machine-gpbxx 10.10.15.92,rke2-harv-workers-sm-xv9q4-d5h8t false false true 4 8137228Ki SUSE Linux Micro 6.1 containerd://2.0.5-k3s1 2025-08-25T15:29:37Z
87 | machine-l9sl6 10.10.15.76,rke2-harv-control-plane-92bsj-kdm4h true true false 2 4015184Ki SUSE Linux Micro 6.1 containerd://2.0.5-k3s1 2025-08-05T20:10:11Z
88 | Node count: 6
89 |
90 | --------------------------------------------------------------------------------
91 | Cluster: local (local)
92 | Node Id Address etcd Control Plane Worker CPU RAM OS Container Runtime Version Created
93 | machine-bhffb 10.10.12.123,rancher-03 true true false 2 7730528Ki SUSE Linux Enterprise Server 15 SP6 containerd://2.0.5-k3s2.32 2025-07-31T13:32:48Z
94 | machine-mwx5g 10.10.12.122,rancher-02 true true false 2 7730536Ki SUSE Linux Enterprise Server 15 SP6 containerd://2.0.5-k3s1.32 2025-07-26T21:23:21Z
95 | machine-rnwmp 10.10.12.121,rancher-01 true true false 2 7730536Ki SUSE Linux Enterprise Server 15 SP6 containerd://2.0.5-k3s1.32 2025-07-26T21:23:21Z
96 | Node count: 3
97 | --------------------------------------------------------------------------------
98 | Total node count: 15
99 | ```
100 |
--------------------------------------------------------------------------------
/collection/rancher/v2.x/rancher-pod-collector/rancher-pod-collector.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Minimum space needed to run the script (MB)
4 | SPACE="512"
5 |
6 | setup() {
7 |
8 | TMPDIR=$(mktemp -d $MKTEMP_BASEDIR)
9 | techo "Created ${TMPDIR}"
10 |
11 | }
12 |
13 | disk-space() {
14 |
15 | AVAILABLE=$(df -m ${TMPDIR} | tail -n 1 | awk '{ print $4 }')
16 | if [ "${AVAILABLE}" -lt "${SPACE}" ]
17 | then
18 | techo "${AVAILABLE} MB space free, minimum needed is ${SPACE} MB."
19 | DISK_FULL=1
20 | fi
21 |
22 | }
23 |
24 | verify-access() {
25 |
26 | techo "Verifying cluster access"
27 | if [[ ! -z $OVERRIDE_KUBECONFIG ]];
28 | then
29 | ## Just use the kubeconfig that was set by the user
30 | KUBECTL_CMD="kubectl --kubeconfig $OVERRIDE_KUBECONFIG"
31 | elif [[ ! -z $KUBECONFIG ]];
32 | then
33 | KUBECTL_CMD="kubectl"
34 | elif [[ ! -z $KUBERNETES_PORT ]];
35 | then
36 | ## We are inside the k8s cluster or we're using the local kubeconfig
37 | RANCHER_POD=$(kubectl -n cattle-system get pods -l app=rancher --no-headers -o custom-columns=id:metadata.name | head -n1)
38 | KUBECTL_CMD="kubectl -n cattle-system exec -c rancher ${RANCHER_POD} -- kubectl"
39 | elif $(command -v k3s >/dev/null 2>&1)
40 | then
41 | ## We are on k3s node
42 | KUBECTL_CMD="k3s kubectl"
43 | elif $(command -v docker >/dev/null 2>&1)
44 | then
45 | DOCKER_ID=$(docker ps | grep "k8s_rancher_rancher" | cut -d' ' -f1 | head -1)
46 | KUBECTL_CMD="docker exec ${DOCKER_ID} kubectl"
47 | else
48 | ## Giving up
49 | techo "Could not find a kubeconfig"
50 | fi
51 | if ! ${KUBECTL_CMD} cluster-info >/dev/null 2>&1
52 | then
53 | techo "Can not access cluster"
54 | exit 1
55 | else
56 | techo "Cluster access has been verified"
57 | fi
58 | }
59 |
60 | cluster-info() {
61 |
62 | techo "Collecting cluster info"
63 | mkdir -p $TMPDIR/clusterinfo
64 | ${KUBECTL_CMD} cluster-info > $TMPDIR/clusterinfo/cluster-info 2>&1
65 | ${KUBECTL_CMD} get nodes -o wide > $TMPDIR/clusterinfo/get-node-wide 2>&1
66 | ${KUBECTL_CMD} cluster-info dump -o yaml -n cattle-system --log-file-max-size 200 --output-directory $TMPDIR/clusterinfo/cluster-info-dump
67 | ## Grabbing cattle-system items
68 | mkdir -p $TMPDIR/cattle-system/
69 | ${KUBECTL_CMD} get endpoints -n cattle-system -o wide > $TMPDIR/cattle-system/get-endpoints 2>&1
70 | ${KUBECTL_CMD} get ingress -n cattle-system -o yaml > $TMPDIR/cattle-system/get-ingress.yaml 2>&1
71 | ${KUBECTL_CMD} get pods -n cattle-system -o wide > $TMPDIR/cattle-system/get-pods 2>&1
72 | ${KUBECTL_CMD} get svc -n cattle-system -o yaml > $TMPDIR/cattle-system/get-svc.yaml 2>&1
73 | ## Grabbing kube-system items
74 | mkdir -p $TMPDIR/kube-system/
75 | ${KUBECTL_CMD} get configmap -n kube-system cattle-controllers -o yaml > $TMPDIR/kube-system/get-configmap-cattle-controllers.yaml 2>&1
76 | ## Grabbing cluster configuration
77 | mkdir -p $TMPDIR/clusters
78 | ${KUBECTL_CMD} get clusters.management.cattle.io -A > $TMPDIR/clusters/clusters 2>&1
79 | ${KUBECTL_CMD} get clusters.management.cattle.io -A -o yaml > $TMPDIR/clusters/clusters.yaml 2>&1
80 |
81 | }
82 |
83 | enable-debug() {
84 |
85 | techo "Enabling debug for Rancher pods"
86 | for POD in $(${KUBECTL_CMD} get pods -n cattle-system -l app=rancher --no-headers | awk '{print $1}');
87 | do
88 | if [ ! -z "${TRACE}" ]
89 | then
90 | techo "Pod: $POD `${KUBECTL_CMD} exec -n cattle-system -c rancher $POD -- loglevel --set trace`"
91 | else
92 | techo "Pod: $POD `${KUBECTL_CMD} exec -n cattle-system -c rancher $POD -- loglevel --set debug`"
93 | fi
94 | done
95 |
96 | }
97 |
98 | disable-debug() {
99 |
100 | techo "Disabling debug for Rancher pods"
101 | for POD in $(${KUBECTL_CMD} get pods -n cattle-system -l app=rancher --no-headers | awk '{print $1}');
102 | do
103 | techo "Pod: $POD `${KUBECTL_CMD} exec -n cattle-system -c rancher $POD -- loglevel --set info`"
104 | done
105 |
106 | }
107 |
108 | watch-logs() {
109 |
110 | techo "Live tailing debug logs from Rancher pods"
111 | techo "Please use Ctrl+C to finish tailing"
112 | mkdir -p $TMPDIR/rancher-logs/
113 | ${KUBECTL_CMD} -n cattle-system logs -f -l app=rancher -c rancher | tee $TMPDIR/rancher-logs/live-logs
114 |
115 | }
116 |
117 |
118 | pause() {
119 |
120 | read -n1 -rsp $'Press any key once finished logging with debug loglevel, or Ctrl+C to exit and leave debug loglevel enabled... \n'
121 |
122 | }
123 |
124 | archive() {
125 |
126 | FILEDIR=$(dirname $TMPDIR)
127 | FILENAME="$(kubectl config view -o jsonpath='{.current-context}')-$(date +'%Y-%m-%d_%H_%M_%S').tar"
128 | tar --create --file ${FILEDIR}/${FILENAME} --directory ${TMPDIR}/ .
129 | ## gzip separately for Rancher OS
130 | gzip ${FILEDIR}/${FILENAME}
131 |
132 | techo "Created ${FILEDIR}/${FILENAME}.gz"
133 |
134 | }
135 |
136 | cleanup() {
137 |
138 | techo "Removing ${TMPDIR}"
139 | rm -r -f "${TMPDIR}" >/dev/null 2>&1
140 |
141 | }
142 |
143 | help() {
144 |
145 | echo "Rancher Pod Collector
146 | Usage: rancher-pod-collector.sh [ -d -k KUBECONFIG -t -w -f ]
147 |
148 | All flags are optional
149 |
150 | -d Output directory for temporary storage and .tar.gz archive (ex: -d /var/tmp)
151 | -k Override the kubeconfig (ex: ~/.kube/custom)
152 | -t Enable trace logs
153 | -w Live tailing Rancher logs
154 | -f Force log collection if the minimum space isn't available"
155 |
156 | }
157 |
158 | timestamp() {
159 |
160 | date "+%Y-%m-%d %H:%M:%S"
161 |
162 | }
163 |
164 | techo() {
165 |
166 | echo "$(timestamp): $*"
167 |
168 | }
169 |
170 | while getopts ":d:k:ftwh" opt; do
171 | case $opt in
172 | d)
173 | MKTEMP_BASEDIR="${OPTARG}/temp.XXXX"
174 | ;;
175 | k)
176 | OVERRIDE_KUBECONFIG="${OPTARG}"
177 | ;;
178 | f)
179 | FORCE=1
180 | ;;
181 | t)
182 | TRACE=1
183 | ;;
184 | w)
185 | WATCH=1
186 | ;;
187 | h)
188 | help && exit 0
189 | ;;
190 | :)
191 | techo "Option -$OPTARG requires an argument."
192 | exit 1
193 | ;;
194 | *)
195 | help && exit 0
196 | esac
197 | done
198 |
199 | setup
200 | disk-space
201 | if [ -n "${DISK_FULL}" ]
202 | then
203 | if [ -z "${FORCE}" ]
204 | then
205 | techo "Cleaning up and exiting"
206 | cleanup
207 | exit 1
208 | else
209 | techo "-f (force) used, continuing"
210 | fi
211 | fi
212 |
213 | if [ ! -z "${TRACE}" ]
214 | then
215 | techo "WARNING: Trace logging has been set. Please confirm that you understand this may capture sensitive information."
216 | pause
217 | fi
218 | verify-access
219 | enable-debug
220 | if [ ! -z "${WATCH}" ]
221 | then
222 | watch-logs
223 | else
224 | techo "Debug loglevel has been set"
225 | pause
226 | fi
227 | disable-debug
228 | cluster-info
229 | archive
230 | cleanup
231 |
--------------------------------------------------------------------------------
/reverse-rke-state-migrations/reverse-rke-state-migrations.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -o pipefail
5 |
6 | # Use this to specify a custom kubectl base command or options.
7 | KUBECTL="kubectl"
8 |
9 | # Use this to specify a custom curl base command or options.
10 | # By default, we pass options that make curl silent, except when errors occur,
11 | # and we also force CURL to error if HTTP requests do not receive successful
12 | # (2xx) response codes.
13 | CURL="curl -sSf"
14 |
15 | function display_help() {
16 | echo 'This script can be used to reverse RKE cluster state migrations.'
17 | echo 'Please ensure the $RANCHER_TOKEN environment variable is set to a valid Rancher API admin token'
18 | echo 'Please also ensure the following tools are installed:'
19 | echo ' kubectl: https://kubernetes.io/docs/tasks/tools/#kubectl'
20 | echo ' jq: https://jqlang.github.io/jq'
21 | echo ' yq: https://mikefarah.gitbook.io/yq/#install'
22 | echo
23 | echo
24 | echo "Usage: $(basename $0) --rancher-host [Rancher hostname]"
25 | echo
26 | echo ' $RANCHER_TOKEN [Required] Environment variable containing Rancher admin token'
27 | echo ' -n, --rancher-host [Required] Rancher hostname'
28 | echo ' -k, --insecure-skip-tls-verify [Optional] Skip certificate verification'
29 | echo " -d, --debug [Optional] Calls 'set -x'"
30 | echo " -h, --help Print this message"
31 | }
32 |
33 | POSITIONAL_ARGS=()
34 |
35 | while [[ $# -gt 0 ]]; do
36 | case $1 in
37 | -n|--rancher-host)
38 | RANCHER_HOST="$2"
39 | shift # past argument
40 | shift # past value
41 | ;;
42 | -k|--insecure-skip-tls-verify)
43 | KUBECTL="$KUBECTL --insecure-skip-tls-verify"
44 | CURL="$CURL -k"
45 | shift # past argument
46 | ;;
47 | -d|--debug)
48 | set -x
49 | shift # past argument
50 | ;;
51 | -h|--help)
52 | display_help
53 | exit 1
54 | ;;
55 | -*|--*)
56 | echo "Unknown option $1"
57 | display_help
58 | exit 1
59 | ;;
60 | *)
61 | POSITIONAL_ARGS+=("$1") # save positional arg
62 | shift # past argument
63 | ;;
64 | esac
65 | done
66 |
67 | set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters
68 |
69 | # Make sure a Rancher API token was set
70 | if [[ -z "$RANCHER_TOKEN" ]]; then
71 | echo 'ERROR: $RANCHER_TOKEN is unset'
72 | display_help
73 | exit 1
74 | fi
75 |
76 | # Make sure a rancher host was set
77 | if [[ -z "$RANCHER_HOST" ]]; then
78 | echo 'ERROR: --rancher-host is unset'
79 | display_help
80 | exit 1
81 | fi
82 |
83 | # Make sure the jq command is available
84 | if ! command -v "jq" &> /dev/null; then
85 | echo "Missing jq command. See download/installation instructions at https://jqlang.github.io/jq/."
86 | exit 1
87 | fi
88 |
89 | # Make sure the yq command is available
90 | if ! command -v "yq" &> /dev/null; then
91 | echo "Missing yq command. See download/installation instructions at https://mikefarah.gitbook.io/yq/#install."
92 | exit 1
93 | fi
94 |
95 | # Make sure the kubectl command is available
96 | if ! command -v "kubectl" &> /dev/null; then
97 | echo "Missing kubectl command. See download/installation instructions at https://kubernetes.io/docs/tasks/tools/#kubectl."
98 | exit 1
99 | fi
100 |
101 | # Downloads kubeconfig for the cluster with ID $MANAGEMENT_CLUSTER_ID.
102 | downloadKubeConfig() {
103 | $CURL -X 'POST' -H 'accept: application/yaml' -u "$RANCHER_TOKEN" \
104 | "https://${RANCHER_HOST}/v3/clusters/${MANAGEMENT_CLUSTER_ID}?action=generateKubeconfig" \
105 | | yq -r '.config' > .kube/config-"$MANAGEMENT_CLUSTER_ID"
106 | }
107 |
108 | # Downloads kubeconfig for the local cluster.
109 | getLocalKubeConfig() {
110 | $CURL -X 'POST' -H 'accept: application/yaml' -u "$RANCHER_TOKEN" \
111 | "https://${RANCHER_HOST}/v3/clusters/local?action=generateKubeconfig" \
112 | | yq -r '.config' > .kube/config
113 | }
114 |
115 | # Moves downstream cluster state from a secret to a configmap.
116 | reverseMigrateClusterState() {
117 | # Load cluster state from the secret
118 | SECRET=$($KUBECTL get secret full-cluster-state -n kube-system -o yaml)
119 | if [ $? -ne 0 ]; then
120 | echo "[cluster=$MANAGEMENT_CLUSTER_ID] failed to fetch secret full-cluster-state, skipping this cluster"
121 | return
122 | fi
123 |
124 | # Make sure the cluster state is not empty or invalid
125 | CLUSTER_STATE=$(echo "$SECRET" | yq -r '.data.full-cluster-state' | base64 --decode)
126 | if [[ "$?" -ne 0 || "${PIPESTATUS[0]}" -ne 0 || "${PIPESTATUS[1]}" -ne 0 || "${PIPESTATUS[2]}" -ne 0 ]]; then
127 | echo "[cluster=$MANAGEMENT_CLUSTER_ID] failed to decode cluster state, skipping this cluster"
128 | return
129 | fi
130 |
131 | if [ -z "$CLUSTER_STATE" ]; then
132 | echo "[cluster=$MANAGEMENT_CLUSTER_ID] cluster state is empty, skipping this cluster"
133 | return
134 | fi
135 |
136 | # Copy cluster state to a configmap
137 | $KUBECTL create configmap full-cluster-state -n kube-system --from-literal=full-cluster-state="$CLUSTER_STATE"
138 |
139 | # Remove the secret
140 | $KUBECTL delete secret full-cluster-state -n kube-system
141 | }
142 |
143 | # Performs reverse migrations on all downstream RKE clusters.
144 | reverseMigrateRKEClusters() {
145 | # Download kubeconfig for the local cluster
146 | getLocalKubeConfig
147 |
148 | # Fetch all RKE cluster IDs
149 | MANAGEMENT_CLUSTER_IDS=($(
150 | $CURL -H 'accept: application/json' -u "$RANCHER_TOKEN" \
151 | "https://${RANCHER_HOST}/v1/management.cattle.io.cluster?exclude=metadata.managedFields" \
152 | | jq -r '.data[] | select(.spec.rancherKubernetesEngineConfig) | .id')
153 | )
154 |
155 | # Migrate each RKE cluster's state
156 | for MANAGEMENT_CLUSTER_ID in "${MANAGEMENT_CLUSTER_IDS[@]}"
157 | do
158 | # Download and point to downstream cluster kubeconfig
159 | downloadKubeConfig
160 | export KUBECONFIG=".kube/config-$MANAGEMENT_CLUSTER_ID"
161 |
162 | echo "Moving state back to configmap for cluster $MANAGEMENT_CLUSTER_ID"
163 | set +e
164 | reverseMigrateClusterState
165 | set -e
166 | done
167 |
168 | # Remove the migration configmap since we've reversed the migrations
169 | if $KUBECTL get configmap migraterkeclusterstate -n cattle-system > /dev/null 2>&1; then
170 | echo "Deleting configmap migraterkeclusterstate"
171 | $KUBECTL delete configmap migraterkeclusterstate -n cattle-system
172 | fi
173 | }
174 |
175 | main() {
176 | # Create temp directory to which we'll download cluster kubeconfig files.
177 | cd "$(mktemp -d)"
178 | echo "Using temp directory $(pwd)"
179 |
180 | echo "WARNING: 'full-cluster-state' secrets will be deleted for downstream RKE clusters after being moved."
181 | echo -n "Please make sure you've backed them up before proceeding. Proceed? (yes/no) "
182 | read ANSWER
183 |
184 | if [ "$ANSWER" = "yes" ]; then
185 | mkdir -p .kube
186 | reverseMigrateRKEClusters
187 | rm -rf .kube
188 | elif [ "$ANSWER" = "no" ]; then
189 | echo "Aborting"
190 | exit 1
191 | else
192 | echo "Invalid response. Please type 'yes' or 'no'."
193 | exit 1
194 | fi
195 | }
196 |
197 | main
198 |
--------------------------------------------------------------------------------