├── .vscode
└── launch.json
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── check-legacy-endpoint-access
├── README.md
└── check-legacy-endpoint-access.yaml
├── configure-hugepages
└── configure-hugepages.yaml
├── container-insecure-registry
└── insecure-registry-config.yaml
├── containerd-http-proxy
├── README.md
├── configure_http_proxy.yaml
└── sample_configmap.yaml
├── containerd
└── debug-logging
│ ├── README.md
│ └── containerd-debug-logging-daemonset.yaml
├── disable-mglru
└── disable-mglru.yaml
├── disable-smt
├── cos
│ ├── disable_smt_cos.sh
│ ├── disable_smt_cos.sh.md5
│ ├── enable_smt_cos.sh
│ └── enable_smt_cos.sh.md5
└── gke
│ ├── disable-smt.yaml
│ └── enable-smt.yaml
├── drop-small-mss
└── drop-small-mss.yaml
├── enable-kdump
├── cos-enable-kdump.yaml
├── disable-hung-task-panic-sysctl.yaml
├── ubuntu-enable-kdump.yaml
└── ubuntu-kdump.md
├── gvisor
└── enable-gvisor-flags.yaml
├── kubelet-log-config
└── kubelet-log-config.yaml
├── manual-node-upgrade
├── README.md
└── manual_node_upgrade.sh
├── migrating-to-containerd
├── README.md
└── find-nodepools-to-migrate.sh
├── os-audit
├── README.md
└── cos-auditd-logging.yaml
├── perf
├── perf-record.yaml
└── perf-trace.yaml
└── ssh-server-config
├── README.md
├── set-login-grace-time-gdcso-vmware.yaml
└── set-login-grace-time.yaml
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | // Use IntelliSense to learn about possible attributes.
3 | // Hover to view descriptions of existing attributes.
4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5 | "version": "0.2.0",
6 | "configurations": [
7 | {
8 | "type": "bashdb",
9 | "request": "launch",
10 | "name": "Bash-Debug (simplest configuration)",
11 | "program": "${file}"
12 | }
13 | ]
14 | }
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to to see
12 | your current agreements on file or to sign a new one.
13 |
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 |
18 | ## Code reviews
19 |
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 |
25 | ## Community Guidelines
26 |
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google.com/conduct/).
29 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # K8S Node Tools
2 |
3 | This repository contains tools to be used for K8S nodes.
4 |
--------------------------------------------------------------------------------
/check-legacy-endpoint-access/README.md:
--------------------------------------------------------------------------------
1 | The check-legacy-endpoint-access tool is a Kubernates DaemonSet that checks metadata server
2 | legacy access count of computeMetadata/0.1 and
3 | computeMetadata/v1beta1 every five minutes and writes the result to logs.
4 |
5 | ## How to use it?
6 | Apply it to all nodes in your cluster by running the
7 | following command. Run the command once per cluster per
8 | Google Cloud Platform project.
9 | ```
10 | kubectl apply -f \
11 | https://raw.githubusercontent.com/GoogleCloudPlatform\
12 | /k8s-node-tools/master/check-legacy-endpoint-access/check-legacy-endpoint-access.yaml
13 | ```
14 | ## How to get the result?
15 | Run the command below to get related log.
16 | ```
17 | kubectl -n kube-system logs -l app=check-legacy-endpoint-access | grep "access
18 | count"
19 | ```
20 | Below is a sample log entry
21 | ```
22 | 2019-10-17 20:35:12 for node gke-someone-k8s-default-pool-484b3c6d-csgj.c.someone-dev.internal, legacy access count of computeMetadata/0.1 is: 0, legacy access count of computeMetadata/v1beta1 is: 2
23 | ```
24 | If you want to see log history, you can go to GCP console -> Logs Viewer and use
25 | the filter as below
26 | ```
27 | resource.type="container"
28 | resource.labels.namespace_id="kube-system"
29 | logName:"/check-legacy-endpoint-access"
30 | ```
31 |
--------------------------------------------------------------------------------
/check-legacy-endpoint-access/check-legacy-endpoint-access.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | kind: DaemonSet
16 | apiVersion: apps/v1
17 | metadata:
18 | name: check-legacy-endpoint-access
19 | namespace: kube-system
20 | labels:
21 | app: check-legacy-endpoint-access
22 | spec:
23 | selector:
24 | matchLabels:
25 | app: check-legacy-endpoint-access
26 | template:
27 | metadata:
28 | labels:
29 | app: check-legacy-endpoint-access
30 | spec:
31 | hostNetwork: true
32 | containers:
33 | - name: check-legacy-endpoint-access
34 | image: gcr.io/distroless/python3
35 | command:
36 | - python
37 | - -c
38 | - |
39 | from urllib.request import Request, urlopen
40 | from datetime import datetime
41 | from time import sleep
42 |
43 | def curl(url):
44 | request = Request(url)
45 | request.add_header('Metadata-Flavor', 'Google')
46 | response = urlopen(request).read()
47 | return str(response, 'utf-8')
48 |
49 | hostname = curl('http://169.254.169.254/computeMetadata/v1/instance/hostname')
50 | while True:
51 | v01_count = curl('http://169.254.169.254/computeMetadata/v1/instance/legacy-endpoint-access/0.1')
52 | v1beta1_count = curl('http://169.254.169.254/computeMetadata/v1/instance/legacy-endpoint-access/v1beta1')
53 | now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
54 |
55 | print("{} for node {}, legacy access count of computeMetadata/0.1 is: {}, legacy access count of computeMetadata/v1beta1 is: {}".format(now, hostname, v01_count, v1beta1_count), flush=True)
56 |
57 | sleep(300)
58 |
59 |
--------------------------------------------------------------------------------
/configure-hugepages/configure-hugepages.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | # Deploy this DaemonSet to configure hugepages on nodes with the
17 | # "cloud.google.com/gke-configure-hugepages=true" label.
18 | #
19 | # Change the values of NR_HUGEPAGES and HUGEPAGE_SIZE to suit your needs. See
20 | # https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt for details.
21 | # This daemonset currently only supports 1G and 2M hugepages. The task will
22 | # also fail if the node it's run on does not have enough free memory for the
23 | # kernel to allocate the requested number of hugepages with the specified size.
24 | #
25 | # WARNING: Changing the hugepages requires a kubelet restart. Therefore, in
26 | # order to avoid disrupting your workloads, it is recommended to create a new
27 | # node pool with the "cloud.google.com/gke-configure-hugepages=true" label in your
28 | # cluster, deploy the DaemonSet to configure hugepages in that node pool,
29 | # and then migrate your workloads to the new node pool.
30 | #
31 | # This DaemonSet will work on Standard clusters and cannot be run on Autopilot
32 | # cluster. There are currently no workaround for Autopilot cluster for enabling hugepages.
33 | #
34 |
35 | apiVersion: apps/v1
36 | kind: DaemonSet
37 | metadata:
38 | name: gke-configure-hugepages
39 | namespace: kube-system
40 | spec:
41 | selector:
42 | matchLabels:
43 | name: gke-configure-hugepages
44 | updateStrategy:
45 | type: RollingUpdate
46 | template:
47 | metadata:
48 | labels:
49 | name: gke-configure-hugepages
50 | spec:
51 | tolerations:
52 | - operator: Exists
53 | volumes:
54 | - name: host
55 | hostPath:
56 | path: /
57 | hostPID: true
58 | initContainers:
59 | - name: gke-configure-hugepages
60 | image: "gke.gcr.io/gke-distroless/bash"
61 | env:
62 | # Number of 2M hugepages. Update the value as desired.
63 | - name: NR_HUGEPAGES_2M
64 | value: "0"
65 | # Number of 1G hugepages. Update the value as desired.
66 | - name: NR_HUGEPAGES_1G
67 | value: "0"
68 | command:
69 | - /bin/bash
70 | - -c
71 | - |
72 | set -xeuo pipefail
73 |
74 | function err() { echo >&2 "$@"; }
75 |
76 | # Writes to the nr_hugepages sysfs file for the given size (in kB),
77 | # and then checks that we got that many hugepages afterward. Returns
78 | # error if the result does not match.
79 | #
80 | # $1: hugepages size in kB, as seen in /sys/kernel/mm/hugepages.
81 | # $2: number of hugepages of that size.
82 | function set-nr-hugepages() {
83 | [[ "$#" -eq 2 ]] || return
84 | local size; size="$1"; shift
85 | local number; number="$1"; shift
86 |
87 | local dest; dest="/host/sys/kernel/mm/hugepages/hugepages-${size}/nr_hugepages"
88 |
89 | echo "Attempting to create ${number} hugepages of size ${size}"
90 | echo -n "${number}" >"${dest}"
91 | local actual; actual="$(cat "${dest}")"
92 | if [[ "${actual}" != "${number}" ]]; then
93 | err "Attempted to create ${number} hugepages of size ${size} but got ${actual}"
94 | return 1
95 | fi
96 | echo "Successfully created ${number} hugepages of size ${size}!"
97 | }
98 |
99 | set-nr-hugepages "2048kB" "${NR_HUGEPAGES_2M}"
100 | set-nr-hugepages "1048576kB" "${NR_HUGEPAGES_1G}"
101 |
102 | echo "Restarting kubelet..."
103 | chroot /host nsenter -a -t1 -- systemctl restart kubelet.service
104 | echo "Success!"
105 | volumeMounts:
106 | - name: host
107 | mountPath: /host
108 | resources:
109 | requests:
110 | memory: 5Mi
111 | cpu: 5m
112 | securityContext:
113 | privileged: true
114 | containers:
115 | - image: gcr.io/google-containers/pause:3.2
116 | name: pause
117 | # Ensures that the pods will only run on the nodes having the correct
118 | # label.
119 | nodeSelector:
120 | "cloud.google.com/gke-configure-hugepages": "true"
121 |
--------------------------------------------------------------------------------
/container-insecure-registry/insecure-registry-config.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: DaemonSet
3 | metadata:
4 | name: insecure-registries
5 | namespace: default
6 | labels:
7 | k8s-app: insecure-registries
8 | spec:
9 | selector:
10 | matchLabels:
11 | name: insecure-registries
12 | updateStrategy:
13 | type: RollingUpdate
14 | template:
15 | metadata:
16 | labels:
17 | name: insecure-registries
18 | spec:
19 | nodeSelector:
20 | cloud.google.com/gke-container-runtime: "containerd"
21 | hostPID: true
22 | containers:
23 | - name: startup-script
24 | image: registry.k8s.io/startup-script:v2
25 | imagePullPolicy: Always
26 | securityContext:
27 | privileged: true
28 | env:
29 | - name: ADDRESS
30 | value: "REGISTRY_ADDRESS"
31 | - name: STARTUP_SCRIPT
32 | value: |
33 | set -o errexit
34 | set -o pipefail
35 | set -o nounset
36 |
37 | if [[ -z "$ADDRESS" || "$ADDRESS" == "REGISTRY_ADDRESS" ]]; then
38 | echo "Error: Environment variable ADDRESS is not set in containers.spec.env"
39 | exit 1
40 | fi
41 |
42 | echo "Allowlisting insecure registries..."
43 | containerd_config="/etc/containerd/config.toml"
44 | hostpath=$(sed -nr 's; config_path = "([-/a-z0-9_.]+)";\1;p' "$containerd_config")
45 | if [[ -z "$hostpath" ]]; then
46 | echo "Node uses CRI config model V1 (deprecated), adding mirror under $containerd_config..."
47 | grep -qxF '[plugins."io.containerd.grpc.v1.cri".registry.mirrors."'$ADDRESS'"]' "$containerd_config" || \
48 | echo -e '[plugins."io.containerd.grpc.v1.cri".registry.mirrors."'$ADDRESS'"]\n endpoint = ["http://'$ADDRESS'"]' >> "$containerd_config"
49 | else
50 | host_config_dir="$hostpath/$ADDRESS"
51 | host_config_file="$host_config_dir/hosts.toml"
52 | echo "Node uses CRI config model V2, adding mirror under $host_config_file..."
53 | if [[ ! -e "$host_config_file" ]]; then
54 | mkdir -p "$host_config_dir"
55 | echo -e "server = \"https://$ADDRESS\"\n" > "$host_config_file"
56 | fi
57 | echo -e "[host.\"http://$ADDRESS\"]\n capabilities = [\"pull\", \"resolve\"]\n" >> "$host_config_file"
58 | fi
59 | echo "Reloading systemd management configuration"
60 | systemctl daemon-reload
61 | echo "Restarting containerd..."
62 | systemctl restart containerd
63 |
--------------------------------------------------------------------------------
/containerd-http-proxy/README.md:
--------------------------------------------------------------------------------
1 | # Configure Containerd HTTP/S proxy
2 |
3 | This guide outlines the steps to configure a HTTP/S proxy for Containerd on GKE nodes, including Autopilot mode clusters. Typical use cases include access of external image repositories for container pulls.
4 |
5 | ## Instructions
6 |
7 | 1. Create a ConfigMap named `containerd-proxy-configmap` that includes the values for `HTTP_PROXY`, `HTTPS_PROXY`, and `NO_PROXY` (optional). These values are used as environment variables to configure the proxy settings for the Containerd service. A sample ConfigMap configuration is provided in `sample_configmap.yaml`. Please modify this sample with your proxy settings before applying it to your cluster.
8 |
9 | ```
10 | kubectl apply -f sample_configmap.yaml
11 | ```
12 |
13 | 2. Deploy the daemonset in `configure_http_proxy.yaml`. As it has been specifically allowlisted for GKE Autopilot, this **manifest in this repo cannot be changed if you are deploying to GKE Autopilot mode clusters**.
14 |
15 | ```
16 | kubectl apply -f configure_http_proxy.yaml
17 | ```
18 |
19 | ## Note
20 | **Any update on the `configure_http_proxy.yaml` will break the allowlist for GKE Autopilot**. If you need to make any necessary change, please ask your Google Cloud sales representative to reach to the GKE Autopilot team.
21 |
--------------------------------------------------------------------------------
/containerd-http-proxy/configure_http_proxy.yaml:
--------------------------------------------------------------------------------
1 | kind: DaemonSet
2 | apiVersion: apps/v1
3 | metadata:
4 | name: containerd-http-proxy
5 | spec:
6 | selector:
7 | matchLabels:
8 | name: containerd-http-proxy
9 | template:
10 | metadata:
11 | labels:
12 | name: containerd-http-proxy
13 | spec:
14 | hostPID: true
15 | volumes:
16 | - name: systemd-containerd-service
17 | hostPath:
18 | path: /etc/systemd/system/containerd.service.d
19 | type: DirectoryOrCreate
20 | initContainers:
21 | - name: startup-script
22 | image: gke.gcr.io/debian-base:bookworm-v1.0.0-gke.1
23 | imagePullPolicy: IfNotPresent
24 | securityContext:
25 | privileged: true
26 | volumeMounts:
27 | - name: systemd-containerd-service
28 | mountPath: /etc/systemd/system/containerd.service.d
29 | command:
30 | - /bin/sh
31 | - -c
32 | - |
33 | set -e
34 | set -u
35 |
36 | validate_proxy() {
37 | input_string=$1
38 |
39 | if echo "$input_string" | grep -q ' '; then
40 | echo "Error: Input cannot contain spaces. Input: '$input_string'"
41 | exit 1
42 | fi
43 |
44 | if echo "$input_string" | sed 1d | grep -q .; then
45 | echo "Error: Input cannot contain newline. Input: '$input_string'"
46 | exit 1
47 | fi
48 |
49 | if echo "$input_string" | grep -q -e '"' -e "'"; then
50 | echo "Error: Input cannot contain quotes. Input: '$input_string'"
51 | exit 1
52 | fi
53 | }
54 |
55 | validate_proxy "${HTTP_PROXY:-}"
56 | validate_proxy "${HTTPS_PROXY:-}"
57 | validate_proxy "${NO_PROXY:-localhost}"
58 |
59 | cat > /etc/systemd/system/containerd.service.d/http-proxy.conf <&2
73 | env:
74 | - name: HTTP_PROXY
75 | valueFrom:
76 | configMapKeyRef:
77 | name: containerd-proxy-configmap
78 | key: HTTP_PROXY
79 | - name: HTTPS_PROXY
80 | valueFrom:
81 | configMapKeyRef:
82 | name: containerd-proxy-configmap
83 | key: HTTPS_PROXY
84 | - name: NO_PROXY
85 | valueFrom:
86 | configMapKeyRef:
87 | name: containerd-proxy-configmap
88 | key: NO_PROXY
89 | optional: true
90 | containers:
91 | - name: pause-container
92 | image: gke.gcr.io/pause:3.7
93 | imagePullPolicy: IfNotPresent
94 |
95 |
--------------------------------------------------------------------------------
/containerd-http-proxy/sample_configmap.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: containerd-proxy-configmap
5 | data:
6 | HTTP_PROXY: http://proxy.example.com:80
7 | HTTPS_PROXY: https://proxy.example.com:443
8 | NO_PROXY: localhost,metadata.google.internal
9 |
--------------------------------------------------------------------------------
/containerd/debug-logging/README.md:
--------------------------------------------------------------------------------
1 | # Containerd Debugging Logging
2 |
3 | The `containerd-debug-logging-daemonset.yaml` is a daemonset that enables
4 | containerd debug logs. These logs may be useful for troubleshooting. Note that
5 | debug logs are quite verbose and such increase log volume for these logs.
6 |
7 | The daemonset includes a nodeSelector targeting
8 | `containerd-debug-logging=true`. To run the daemonset on selected nodes for
9 | debugging, labels the nodes with the corresponding label (`kubectl label node
10 | ${NODE_NAME} containerd-debug-logging=true`)
11 |
12 | Otherwise, modify the daemonset's existing
13 | [nodeSelector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector)
14 | or add an [node
15 | affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector)
16 | to target the desired nodes or node pools.
17 |
--------------------------------------------------------------------------------
/containerd/debug-logging/containerd-debug-logging-daemonset.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: DaemonSet
3 | metadata:
4 | name: containerd-debug-logging
5 | namespace: default
6 | labels:
7 | k8s-app: containerd-debug-logging
8 | spec:
9 | selector:
10 | matchLabels:
11 | name: containerd-debug-logging
12 | template:
13 | metadata:
14 | labels:
15 | name: containerd-debug-logging
16 | spec:
17 | nodeSelector:
18 | containerd-debug-logging: "true"
19 | hostPID: true
20 | containers:
21 | - name: startup-script
22 | image: gke.gcr.io/startup-script:v2
23 | imagePullPolicy: Always
24 | securityContext:
25 | privileged: true
26 | env:
27 | - name: STARTUP_SCRIPT
28 | value: |
29 | set -o errexit
30 | set -o pipefail
31 | set -o nounset
32 |
33 | echo "creating containerd.service.d directory"
34 | mkdir -p /etc/systemd/system/containerd.service.d
35 | echo "creating 10-level_debug.conf file"
36 | echo -e "[Service]\nExecStart=\nExecStart=/usr/bin/containerd --log-level debug" > /etc/systemd/system/containerd.service.d/10-level_debug.conf
37 | echo "Reloading systemd management configuration"
38 | systemctl daemon-reload
39 | echo "Restarting containerd..."
40 | systemctl restart containerd
41 |
--------------------------------------------------------------------------------
/disable-mglru/disable-mglru.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2024 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # This DaemonSet disables the kernel option Multi-Gen LRU.
16 | #
17 | # - First, update your GKE node pools with the annotation `disable-mglru: true`
18 | # or your preferred annotation, if you update the annotation also update
19 | # the nodeSelection section below
20 | # - Next, deploy the DaemonSet to your cluster.
21 | apiVersion: apps/v1
22 | kind: DaemonSet
23 | metadata:
24 | name: disable-mglru
25 | namespace: default
26 | labels:
27 | k8s-app: disable-mglru
28 | spec:
29 | selector:
30 | matchLabels:
31 | name: disable-mglru
32 | template:
33 | metadata:
34 | labels:
35 | name: disable-mglru
36 | spec:
37 | nodeSelector:
38 | disable-mglru: "true"
39 | hostPID: true
40 | containers:
41 | - name: startup-script
42 | image: gke.gcr.io/startup-script:v2
43 | imagePullPolicy: Always
44 | securityContext:
45 | privileged: true
46 | env:
47 | - name: STARTUP_SCRIPT
48 | value: |
49 | set -o errexit
50 | set -o pipefail
51 | set -o nounset
52 | echo n > /sys/kernel/mm/lru_gen/enabled
53 |
--------------------------------------------------------------------------------
/disable-smt/cos/disable_smt_cos.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Copyright 2019 Google LLC
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # https://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | set -e
18 |
19 | # Exits if the system uses secure boot.
20 | function check_not_secure_boot() {
21 | if [[ ! -d "/sys/firmware/efi" ]]; then
22 | return
23 | fi
24 |
25 | efi="$(mktemp -d)"
26 | mount -t efivarfs none "${efi}"
27 |
28 | # Read the secure boot variable.
29 | secure_boot="$(hexdump -v -e '/1 "%02X "' ${efi}/SecureBoot-*)"
30 |
31 | # Clean up
32 | umount "${efi}"
33 | rmdir "${efi}"
34 |
35 | # https://wiki.archlinux.org/index.php/Secure_Boot
36 | if [[ "${secure_boot}" == "06 00 00 00 01 " ]]; then
37 | echo "Secure Boot is enabled. Boot options cannot be changed."
38 | exit 1
39 | fi
40 | }
41 |
42 | # Disable SMT and reboot if SMT is currently enabled
43 | function disable_smt() {
44 | if grep " nosmt " /proc/cmdline > /dev/null; then
45 | echo "'nosmt' already present on the kernel command line. Nothing to do."
46 | return
47 | fi
48 | echo "Attempting to set 'nosmt' on the kernel command line."
49 | if [[ "${EUID}" -ne 0 ]]; then
50 | echo "This script must be run as root."
51 | return 1
52 | fi
53 | check_not_secure_boot
54 |
55 | dir="$(mktemp -d)"
56 | mount /dev/sda12 "${dir}"
57 | sed -i -e "s|cros_efi|cros_efi nosmt|g" "${dir}/efi/boot/grub.cfg"
58 | umount "${dir}"
59 | rmdir "${dir}"
60 | echo "Rebooting."
61 | reboot
62 | }
63 |
64 | disable_smt
65 |
--------------------------------------------------------------------------------
/disable-smt/cos/disable_smt_cos.sh.md5:
--------------------------------------------------------------------------------
1 | f11fa6d1a69c3008a5eb1e037aef3cf3
2 |
--------------------------------------------------------------------------------
/disable-smt/cos/enable_smt_cos.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Copyright 2019 Google LLC
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # https://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | set -eou pipefail
18 |
19 | # Exits if the system uses secure boot.
20 | function check_not_secure_boot() {
21 | if [[ ! -d "/sys/firmware/efi" ]]; then
22 | return
23 | fi
24 |
25 | efi="$(mktemp -d)"
26 | mount -t efivarfs none "${efi}"
27 |
28 | # Read the secure boot variable.
29 | secure_boot="$(hexdump -v -e '/1 "%02X "' ${efi}/SecureBoot-*)"
30 |
31 | # Clean up
32 | umount "${efi}"
33 | rmdir "${efi}"
34 |
35 | # https://wiki.archlinux.org/index.php/Secure_Boot
36 | if [[ "${secure_boot}" == "06 00 00 00 01 " ]]; then
37 | echo "Secure Boot is enabled. Boot options cannot be changed."
38 | exit 1
39 | fi
40 | }
41 |
42 | # Enable SMT and reboot if SMT is currently disabled.
43 | function enable_smt() {
44 | if [[ ! $(grep " nosmt " /proc/cmdline) ]]; then
45 | echo "'nosmt' is not present on the kernel command line. Nothing to do."
46 | return
47 | fi
48 | echo "Attempting to remove 'nosmt' on the kernel command line."
49 | if [[ "${EUID}" -ne 0 ]]; then
50 | echo "This script must be run as root."
51 | return 1
52 | fi
53 | check_not_secure_boot
54 |
55 | dir="$(mktemp -d)"
56 | mount /dev/sda12 "${dir}"
57 | sed -i -e "s| nosmt||g" "${dir}/efi/boot/grub.cfg"
58 | umount "${dir}"
59 | rmdir "${dir}"
60 | echo "Rebooting."
61 | reboot
62 | }
63 |
64 | enable_smt
--------------------------------------------------------------------------------
/disable-smt/cos/enable_smt_cos.sh.md5:
--------------------------------------------------------------------------------
1 | 78e4c15395235663789022f4cf3e0b60
--------------------------------------------------------------------------------
/disable-smt/gke/disable-smt.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | # Deploy this DaemonSet to disable hyper-threading on the nodes with the
17 | # "cloud.google.com/gke-smt-disabled=true" label.
18 | #
19 | # WARNING: Disabling hyper-threading might have severe performance impact on
20 | # your clusters and application. Please ensure that this is acceptable before
21 | # deploying this to your production clusters.
22 | #
23 | # WARNING: Disabling hyper-threading requires node reboot. Therefore, in order
24 | # to avoid disrupting your workloads, it is recommended to create a new node
25 | # pool with the "cloud.google.com/gke-smt-disabled=true" label in your cluster,
26 | # deploy the DaemonSet to disable hyper-threading in that node pool, and then
27 | # migrate your workloads to the new node pool.
28 |
29 | #
30 | # NOTE:
31 | # It's recommended to use the --threads-per-core flag on the node-pool to
32 | # configure SMT setting on nodes.
33 | # https://cloud.google.com/kubernetes-engine/docs/how-to/configure-smt
34 | #
35 |
36 | apiVersion: apps/v1
37 | kind: DaemonSet
38 | metadata:
39 | name: disable-smt
40 | namespace: kube-system
41 | spec:
42 | selector:
43 | matchLabels:
44 | name: disable-smt
45 | updateStrategy:
46 | type: RollingUpdate
47 | template:
48 | metadata:
49 | labels:
50 | name: disable-smt
51 | spec:
52 | tolerations:
53 | - operator: Exists
54 | volumes:
55 | - name: host
56 | hostPath:
57 | path: /
58 | hostPID: true
59 | initContainers:
60 | - name: smt
61 | image: bash
62 | command:
63 | - /usr/local/bin/bash
64 | - -c
65 | - |
66 | set -euo pipefail
67 | echo "SMT is set to $(cat /host/sys/devices/system/cpu/smt/control)"
68 | echo "Setting SMT to off"
69 | echo -n "off" > /host/sys/devices/system/cpu/smt/control
70 | echo "Restarting Kubelet..."
71 | chroot /host nsenter --target=1 --all -- systemctl restart kubelet.service
72 | volumeMounts:
73 | - name: host
74 | mountPath: /host
75 | resources:
76 | requests:
77 | memory: 5Mi
78 | cpu: 5m
79 | securityContext:
80 | privileged: true
81 | containers:
82 | - image: gcr.io/google-containers/pause:3.2
83 | name: pause
84 | # Ensures that the pods will only run on the nodes having the certain
85 | # label.
86 | nodeSelector:
87 | "cloud.google.com/gke-smt-disabled": "true"
88 |
--------------------------------------------------------------------------------
/disable-smt/gke/enable-smt.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | # Deploy this DaemonSet to enable hyper-threading on the nodes with the
17 | # "cloud.google.com/gke-smt-disabled=false" label.
18 | #
19 | # WARNING: Enabling hyper-threading may make the node vulnerable to
20 | # Microarchitectural Data Sampling (MDS). Please ensure that this is acceptable
21 | # before deploying this to your production clusters.
22 | #
23 | # WARNING: Enabling hyper-threading requires node reboot. Therefore, in order
24 | # to avoid disrupting your workloads, it is recommended to create a new node
25 | # pool with the "cloud.google.com/gke-smt-disabled=false" label in your cluster,
26 | # deploy the DaemonSet to enable hyper-threading in that node pool, and then
27 | # migrate your workloads to the new node pool.
28 |
29 | #
30 | # NOTE:
31 | # It's recommended to use the --threads-per-core flag on the node-pool to
32 | # configure SMT setting on nodes.
33 | # https://cloud.google.com/kubernetes-engine/docs/how-to/configure-smt
34 | #
35 |
36 | apiVersion: apps/v1
37 | kind: DaemonSet
38 | metadata:
39 | name: enable-smt
40 | namespace: kube-system
41 | spec:
42 | selector:
43 | matchLabels:
44 | name: enable-smt
45 | updateStrategy:
46 | type: RollingUpdate
47 | template:
48 | metadata:
49 | labels:
50 | name: enable-smt
51 | spec:
52 | tolerations:
53 | - operator: Exists
54 | volumes:
55 | - name: host
56 | hostPath:
57 | path: /
58 | hostPID: true
59 | initContainers:
60 | - name: smt
61 | image: bash
62 | command:
63 | - /usr/local/bin/bash
64 | - -c
65 | - |
66 | set -euo pipefail
67 | echo "SMT is set to $(cat /host/sys/devices/system/cpu/smt/control)"
68 | echo "Setting SMT to on";
69 | echo -n "on" > /host/sys/devices/system/cpu/smt/control
70 | echo "Restarting Kubelet..."
71 | chroot /host nsenter --target=1 --all -- systemctl restart kubelet.service
72 | volumeMounts:
73 | - name: host
74 | mountPath: /host
75 | resources:
76 | requests:
77 | memory: 5Mi
78 | cpu: 5m
79 | securityContext:
80 | privileged: true
81 | containers:
82 | - image: gcr.io/google-containers/pause:3.2
83 | name: pause
84 | # Ensures that the pods will only run on the nodes having the certain
85 | # label.
86 | nodeSelector:
87 | "cloud.google.com/gke-smt-disabled": "false"
88 |
--------------------------------------------------------------------------------
/drop-small-mss/drop-small-mss.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | kind: DaemonSet
16 | apiVersion: apps/v1
17 | metadata:
18 | name: drop-small-mss
19 | namespace: kube-system
20 | labels:
21 | app: drop-small-mss
22 | spec:
23 | selector:
24 | matchLabels:
25 | app: drop-small-mss
26 | template:
27 | metadata:
28 | labels:
29 | app: drop-small-mss
30 | annotations:
31 | scheduler.alpha.kubernetes.io/critical-pod: ""
32 | spec:
33 | hostPID: true
34 | containers:
35 | - name: drop-small-mss
36 | image: k8s.gcr.io/startup-script:v2
37 | imagePullPolicy: Always
38 | securityContext:
39 | privileged: true
40 | env:
41 | - name: STARTUP_SCRIPT
42 | value: |
43 | #! /bin/bash
44 |
45 | set -o errexit
46 | set -o pipefail
47 | set -o nounset
48 |
49 | iptables -w -t mangle -I PREROUTING -m comment --comment "drop-small-mss" -p tcp -m tcpmss --mss 1:500 -j DROP
50 | priorityClassName: system-node-critical
51 | tolerations:
52 | - operator: Exists
53 |
--------------------------------------------------------------------------------
/enable-kdump/cos-enable-kdump.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | # Deploy this DaemonSet to enable kdump on the COS nodes with the
17 | # "cloud.google.com/gke-kdump-enabled=true" label.
18 | #
19 | # WARNING: Enabling kdump requires node reboot. Therefore, in order to avoid
20 | # disrupting your workloads, it is recommended to create a new node pool with
21 | # the "cloud.google.com/gke-kdump-enabled=true" label in your cluster,
22 | # deploy the DaemonSet to enable kdump in that node pool, and then migrate
23 | # your workloads to the new node pool.
24 |
25 | apiVersion: apps/v1
26 | kind: DaemonSet
27 | metadata:
28 | name: enable-kdump
29 | namespace: kube-system
30 | spec:
31 | selector:
32 | matchLabels:
33 | name: enable-kdump
34 | updateStrategy:
35 | type: RollingUpdate
36 | template:
37 | metadata:
38 | labels:
39 | name: enable-kdump
40 | spec:
41 | volumes:
42 | - name: host
43 | hostPath:
44 | path: /
45 | initContainers:
46 | - name: enable-kdump
47 | image: ubuntu
48 | command:
49 | - /bin/bash
50 | - -c
51 | - |
52 | function verify_base_image {
53 | local id="$(grep "^ID=" /host/etc/os-release)"
54 | if [[ "${id#*=}" != "cos" ]]; then
55 | echo "This kdump feature switch is designed to run on Container-Optimized OS only"
56 | exit 0
57 | fi
58 | }
59 | function check_kdump_feature {
60 | chroot /host /usr/sbin/kdump_helper show
61 | }
62 | function enable_kdump_feature_and_reboot_if_needed {
63 | chroot /host /usr/sbin/kdump_helper enable
64 | local -r is_enabled=$(chroot /host /usr/sbin/kdump_helper show | grep "kdump enabled" | sed -rn "s/kdump enabled: (.*)/\1/p")
65 | local -r is_ready=$(chroot /host /usr/sbin/kdump_helper show | grep "kdump ready" | sed -rn "s/kdump ready: (.*)/\1/p")
66 | if [[ "${is_enabled}" == "true" && "${is_ready}" == "false" ]]; then
67 | echo "kdump is enabled. Rebooting for it to take effect."
68 | chroot /host systemctl reboot
69 | fi
70 | }
71 | verify_base_image
72 | check_kdump_feature
73 | enable_kdump_feature_and_reboot_if_needed
74 | resources:
75 | requests:
76 | memory: 5Mi
77 | cpu: 5m
78 | securityContext:
79 | privileged: true
80 | volumeMounts:
81 | - name: host
82 | mountPath: /host
83 | containers:
84 | - image: gke.gcr.io/pause:3.8@sha256:880e63f94b145e46f1b1082bb71b85e21f16b99b180b9996407d61240ceb9830
85 | name: pause
86 | nodeSelector:
87 | "cloud.google.com/gke-kdump-enabled": "true"
88 | "cloud.google.com/gke-os-distribution": "cos"
89 |
--------------------------------------------------------------------------------
/enable-kdump/disable-hung-task-panic-sysctl.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | apiVersion: apps/v1
16 | kind: DaemonSet
17 | metadata:
18 | name: disable-hung-task-panic-sysctl
19 | labels:
20 | app: disable-hung-task-panic-sysctl
21 | spec:
22 | selector:
23 | matchLabels:
24 | name: disable-hung-task-panic-sysctl
25 | template:
26 | metadata:
27 | labels:
28 | name: disable-hung-task-panic-sysctl
29 | spec:
30 | nodeSelector:
31 | "cloud.google.com/gke-os-distribution": "ubuntu"
32 | "disable-hung-task-panic-sysctl": "true"
33 | hostPID: true
34 | containers:
35 | - name: disable-hung-task-panic-sysctl
36 | image: debian
37 | imagePullPolicy: Always
38 | securityContext:
39 | privileged: true
40 | command:
41 | - /usr/bin/nsenter
42 | - -t 1
43 | - -m
44 | - -u
45 | - -i
46 | - -n
47 | - -p
48 | - --
49 | - /bin/bash
50 | - -c
51 | - |
52 |
53 | set -o errexit
54 | set -o pipefail
55 | set -o nounset
56 |
57 | echo "setting sysctls"
58 | sysctl -w kernel.hung_task_panic=0
59 | echo "sysctls are set"
60 | sleep 10
61 |
--------------------------------------------------------------------------------
/enable-kdump/ubuntu-enable-kdump.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | apiVersion: apps/v1
16 | kind: DaemonSet
17 | metadata:
18 | name: enable-kdump
19 | labels:
20 | app: enable-kdump
21 | spec:
22 | selector:
23 | matchLabels:
24 | name: enable-kdump
25 | template:
26 | metadata:
27 | labels:
28 | name: enable-kdump
29 | spec:
30 | nodeSelector:
31 | "cloud.google.com/gke-os-distribution": "ubuntu"
32 | "enable-kdump": "true"
33 | hostPID: true
34 | containers:
35 | - name: enable-kdump
36 | image: debian
37 | imagePullPolicy: Always
38 | securityContext:
39 | privileged: true
40 | command:
41 | - /usr/bin/nsenter
42 | - -t 1
43 | - -m
44 | - -u
45 | - -i
46 | - -n
47 | - -p
48 | - --
49 | - /bin/bash
50 | - -c
51 | - |
52 |
53 | set -o errexit
54 | set -o pipefail
55 | set -o nounset
56 |
57 | function check_kdump() {
58 | local kdump_show
59 | kdump_show=$(kdump-config show)
60 | if echo "${kdump_show}" | grep -q "ready to kdump"; then
61 | echo "ready to kdump!"
62 |
63 | echo "setting sysctls"
64 | sysctl -w kernel.hung_task_panic=1
65 | sysctl -w kernel.hung_task_timeout_secs=20
66 | echo "sysctls are set"
67 | else
68 | echo "kdump not setup, isn't ready"
69 | fi
70 | echo "kdump-config show ==> ${kdump_show}"
71 | echo "/proc/cmdline ==> $(cat /proc/cmdline)"
72 | }
73 |
74 | function install() {
75 | echo "installing kdump"
76 | apt-get update
77 | DEBIAN_FRONTEND=noninteractive apt-get install -y linux-crashdump
78 | sed -i 's/^GRUB_CMDLINE_LINUX_DEFAULT.*/GRUB_CMDLINE_LINUX_DEFAULT="\$GRUB_CMDLINE_LINUX_DEFAULT crashkernel=2G-4G:320M,4G-32G:512M,32G-64G:1024M,64G-128G:2048M,128G-:4096M"/g' /etc/default/grub.d/kdump-tools.cfg
79 | update-grub
80 | echo "kdump enabled; waiting for reboot in 10 secs..."
81 | ( sleep 10 && reboot ) &
82 |
83 | while true; do
84 | echo "$(date '+%Y-%m-%dT%H:%M:%SZ') waiting for reboot..."
85 | sleep 1
86 | done
87 | }
88 |
89 | if command -v "kdump-config" &> /dev/null; then
90 | check_kdump
91 | sleep 10
92 | else
93 | install
94 | fi
95 |
--------------------------------------------------------------------------------
/enable-kdump/ubuntu-kdump.md:
--------------------------------------------------------------------------------
1 | # kdump for Ubuntu
2 |
3 | ## Obtaining an kdump
4 |
5 | Use the `ubuntu-enable-kdump.yaml` DaemonSet to install and setup kdump on a set
6 | of nodes. The DaemonSet uses the `enable-kdump=true` node selector, so nodes
7 | must be labeled
8 |
9 | ```
10 | kubectl label nodes ${NODE_NAME} enable-kdump=true
11 | ```
12 |
13 | ## Triggering a test kdump
14 |
15 | SSH into a node and trigger a system crash with sysrq
16 |
17 | ```
18 | sudo -i
19 | sysctl -w kernel.sysrq=1
20 | echo c > /proc/sysrq-trigger
21 | ```
22 |
23 | A dump will be written to `/var/crash`
24 |
25 | ## Analyzing an kdump
26 |
27 | Create a VM for the analysis
28 |
29 | ```
30 | gcloud beta compute instances create dump-test-vm \
31 | --machine-type=e2-standard-4 \
32 | --image-family=ubuntu-1804-lts \
33 | --image-project=ubuntu-os-cloud \
34 | --boot-disk-size=100GB \
35 | --boot-disk-type=pd-ssd \
36 | --zone=us-central1-c
37 | ```
38 |
39 | SCP the contents of `/var/crash` to dump-test-vm
40 |
41 | Find the right deb for the correct kernel version, see
42 | [here](https://launchpad.net/~canonical-kernel-team/+archive/ubuntu/ppa/+packages?field.name_filter=linux-gke&field.status_filter=published).
43 | Obtain the url for the deb for the linux image with debug symbols, e.g. for
44 | `linux-gke-5.0 - 5.0.0-1046.47` the deb containing the vmlinux can be obtained
45 | [here](https://launchpad.net/~canonical-kernel-team/+archive/ubuntu/ppa/+build/18789932/+files/linux-image-unsigned-5.0.0-1032-gke-dbgsym_5.0.0-1032.33_amd64.ddeb).
46 |
47 | ```
48 | gcloud compute ssh dump-test-vm
49 | sudo apt-get update && sudo apt-get install -y linux-crashdump
50 |
51 | cd ${HOME}
52 | # Location of deb for vmlinux
53 | LINUX_DEB_IMAGE_URL="https://launchpad.net/..."
54 | wget "${LINUX_DEB_IMAGE_URL}"
55 | ar -x linux-image-unsigned-5.0.0-1032-gke-dbgsym_5.0.0-1032.33_amd64.ddeb
56 | mkdir debug_image
57 | tar -xf data.tar.xz -C debug_image/
58 |
59 | # Contents of /var/crash from the crash dump.
60 | CRASH_DUMP="var/crash/SOME_TIMESTAMP/dump.SOME_TIMESTAMP"
61 |
62 | # Start debugging!
63 | crash debug_image/usr/lib/debug/boot/vmlinux-5.0.0-1032-gke ${CRASH_DUMP}
64 | ```
65 |
--------------------------------------------------------------------------------
/gvisor/enable-gvisor-flags.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # Deploy this DaemonSet to enable flag override to gVisor pods. To set flags for
16 | # a given pod, add pod annotations with the following format:
17 | # dev.gvisor.flag.:
18 | #
19 | # Here is an example that enables "debug-log", "debug", and "strace" flags:
20 | # metadata:
21 | # annotations:
22 | # dev.gvisor.flag.debug-log: "/tmp/sandbox-%ID/"
23 | # dev.gvisor.flag.debug: "true"
24 | # dev.gvisor.flag.strace: "true"
25 | #
26 | # Note: this is supported starting from 1.18.6-gke.3504.
27 |
28 | apiVersion: apps/v1
29 | kind: DaemonSet
30 | metadata:
31 | name: enable-gvisor-flags
32 | namespace: kube-system
33 | spec:
34 | selector:
35 | matchLabels:
36 | name: enable-gvisor-flags
37 | updateStrategy:
38 | type: RollingUpdate
39 | template:
40 | metadata:
41 | labels:
42 | name: enable-gvisor-flags
43 | spec:
44 | tolerations:
45 | - operator: Exists
46 | volumes:
47 | - name: host
48 | hostPath:
49 | path: /
50 | initContainers:
51 | - name: enable-gvisor-flags
52 | image: ubuntu
53 | command:
54 | - /bin/bash
55 | - -c
56 | - echo -e ' allow-flag-override = "true"' >> "/host/run/containerd/runsc/config.toml"
57 | volumeMounts:
58 | - name: host
59 | mountPath: /host
60 | resources:
61 | requests:
62 | memory: 5Mi
63 | cpu: 5m
64 | securityContext:
65 | privileged: true
66 | containers:
67 | - image: gke.gcr.io/pause:3.8@sha256:880e63f94b145e46f1b1082bb71b85e21f16b99b180b9996407d61240ceb9830
68 | name: pause
69 | nodeSelector:
70 | "sandbox.gke.io/runtime": "gvisor"
71 |
--------------------------------------------------------------------------------
/kubelet-log-config/kubelet-log-config.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2024 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # Deploy this DaemonSet to configure kubelet log rotation on nodes with the
16 | # "kubelet-log-config=true" label.
17 | #
18 | # Change the values of CONTAINER_LOG_MAX_SIZE and CONTAINER_LOG_MAX_FILES to
19 | # suit your needs.
20 | #
21 | # WARNING: Changing the kubelet log rotation configuration requires a kubelet
22 | # restart. Therefore, in order to avoid disrupting your workloads, it is
23 | # recommended to create a new node pool with the "kubelet-log-config=true" label
24 | # in your cluster, deploy the DaemonSet to configure kubelet log rotation in
25 | # that node pool, and then migrate your workloads to the new node pool.
26 |
27 | apiVersion: apps/v1
28 | kind: DaemonSet
29 | metadata:
30 | name: kubelet-log-config
31 | namespace: kube-system
32 | spec:
33 | selector:
34 | matchLabels:
35 | name: kubelet-log-config
36 | updateStrategy:
37 | type: RollingUpdate
38 | template:
39 | metadata:
40 | labels:
41 | name: kubelet-log-config
42 | spec:
43 | tolerations:
44 | - operator: Exists
45 | volumes:
46 | - name: host
47 | hostPath:
48 | path: /
49 | hostPID: true
50 | initContainers:
51 | - name: kubelet-log-config
52 | image: debian
53 | env:
54 | # The maximum size of the container log file before it is rotated.
55 | # Update the value as desired.
56 | - name: CONTAINER_LOG_MAX_SIZE
57 | value: "10Mi"
58 | # The maximum number of container log files that for a container.
59 | # Update the value as desired.
60 | - name: CONTAINER_LOG_MAX_FILES
61 | value: "5"
62 | command:
63 | - /bin/bash
64 | - -c
65 | - |
66 | set -xeuo pipefail
67 |
68 | # Configure the kubelet log rotation behavior.
69 | # $1: Field name in kubelet configuration.
70 | # $2: Value for the kubelet config field.
71 | function set-kubelet-log-config() {
72 | [[ "$#" -eq 2 ]] || return
73 | local field; field="$1"; shift
74 | local value; value="$1"; shift
75 |
76 | local config; config="/host/home/kubernetes/kubelet-config.yaml"
77 |
78 | echo "Remove existing configuration for ${field} if there is any."
79 | sed -i "/${field}/d" "${config}"
80 |
81 | echo "Set ${field} to ${value}."
82 | echo "${field}: ${value}" >> "${config}"
83 | }
84 |
85 | set-kubelet-log-config containerLogMaxSize "${CONTAINER_LOG_MAX_SIZE}"
86 | set-kubelet-log-config containerLogMaxFiles "${CONTAINER_LOG_MAX_FILES}"
87 |
88 | echo "Restarting kubelet..."
89 | chroot /host nsenter -a -t1 -- systemctl restart kubelet.service
90 |
91 | echo "Success!"
92 | volumeMounts:
93 | - name: host
94 | mountPath: /host
95 | resources:
96 | requests:
97 | memory: 5Mi
98 | cpu: 5m
99 | securityContext:
100 | privileged: true
101 | containers:
102 | - image: gcr.io/google-containers/pause:3.2
103 | name: pause
104 | # Ensures that the pods will only run on the nodes having the correct
105 | # label.
106 | nodeSelector:
107 | "kubelet-log-config": "true"
108 |
--------------------------------------------------------------------------------
/manual-node-upgrade/README.md:
--------------------------------------------------------------------------------
1 | The sample script `manual_node_upgrade.sh` filters all node pools not matching the control plane's k8s version for a specified cluster. For each node pool identified by the filter, this script submits an upgrade request via the `gcloud` command. This script is idempotent and can be run as many times as necessary to ensure all node pools manually upgrade without side effects.
2 |
3 | For more information about GKE manual upgrades, consult the online documentation: https://cloud.google.com/kubernetes-engine/docs/how-to/upgrading-a-cluster
--------------------------------------------------------------------------------
/manual-node-upgrade/manual_node_upgrade.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Copyright 2023 Google LLC
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 |
9 | # https://www.apache.org/licenses/LICENSE-2.0
10 |
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Example: `./manual_node_upgrade.sh CLUSTER REGION`
18 | #
19 |
20 | CLUSTER_NAME=$1
21 | REGION=$2
22 |
23 | [ -z "$CLUSTER_NAME" ] || [ -z "$REGION" ] \
24 | && echo "Usage: ./manual_node_upgrade.sh CLUSTER_NAME REGION" \
25 | && exit 1;
26 |
27 | # fetch current control plane version
28 | CLUSTER_VERSION=$(gcloud container clusters describe \
29 | "$CLUSTER_NAME" --format="value(currentMasterVersion)" \
30 | --region=$REGION)
31 |
32 | # list node pools with version not matching control plane
33 | # and then issue an upgrade command for each identified node pool
34 | for np in $(gcloud container node-pools list \
35 | --format="value(name)" --filter="version!=$CLUSTER_VERSION" \
36 | --cluster "$CLUSTER_NAME" --region=$REGION); do
37 | gcloud container clusters upgrade "$CLUSTER_NAME" --node-pool $np \
38 | --region=$REGION --quiet;
39 | done
--------------------------------------------------------------------------------
/migrating-to-containerd/README.md:
--------------------------------------------------------------------------------
1 | # Migrating to Containerd
2 |
3 | Find information about running Containerd nodes on GKE [here](https://cloud.google.com/kubernetes-engine/docs/concepts/using-containerd).
4 |
5 | The sample script `find-nodepools-to-migrate.sh` iterates over all node pools across available projects, and for each node pool outputs the suggestion on whether the node pool should be migrated to Containerd. This script also outputs the node pool version and suggested migration command as listed in the [updating your node images](https://cloud.google.com/kubernetes-engine/docs/concepts/using-containerd#updating-image-type) document. Make sure that you review the [known issues](https://cloud.google.com/kubernetes-engine/docs/concepts/using-containerd#known_issues) for a node pool version before migration.
6 |
--------------------------------------------------------------------------------
/migrating-to-containerd/find-nodepools-to-migrate.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # [START gke_node_find_non_containerd_nodepools]
4 | for project in $(gcloud projects list --format="value(projectId)")
5 | do
6 | echo "ProjectId: $project"
7 | for clusters in $( \
8 | gcloud container clusters list \
9 | --project $project \
10 | --format="csv[no-heading](name,location,autopilot.enabled,currentMasterVersion,autoscaling.enableNodeAutoprovisioning,autoscaling.autoprovisioningNodePoolDefaults.imageType)")
11 | do
12 | IFS=',' read -r -a clustersArray <<< "$clusters"
13 | cluster_name="${clustersArray[0]}"
14 | cluster_zone="${clustersArray[1]}"
15 | cluster_isAutopilot="${clustersArray[2]}"
16 | cluster_version="${clustersArray[3]}"
17 | cluster_minorVersion=${cluster_version:0:4}
18 | cluster_autoprovisioning="${clustersArray[4]}"
19 | cluster_autoprovisioningImageType="${clustersArray[5]}"
20 |
21 | if [ "$cluster_isAutopilot" = "True" ]; then
22 | echo " Cluster: $cluster_name (autopilot) (zone: $cluster_zone)"
23 | echo " Autopilot clusters are running Containerd."
24 | else
25 | echo " Cluster: $cluster_name (zone: $cluster_zone)"
26 |
27 | if [ "$cluster_autoprovisioning" = "True" ]; then
28 | if [ "$cluster_minorVersion" \< "1.20" ]; then
29 | echo " Node autoprovisioning is enabled, and new node pools will have image type 'COS'."
30 | echo " This settings is not configurable on the current version of a cluster."
31 | echo " Please upgrade you cluster and configure the default node autoprovisioning image type."
32 | echo " "
33 | else
34 | if [ "$cluster_autoprovisioningImageType" = "COS" ]; then
35 | echo " Node autoprovisioning is configured to create new node pools of type 'COS'."
36 | echo " Run the following command to update:"
37 | echo " gcloud container clusters update '$cluster_name' --project '$project' --zone '$cluster_zone' --enable-autoprovisioning --autoprovisioning-image-type='COS_CONTAINERD'"
38 | echo " "
39 | fi
40 |
41 | if [ "$cluster_autoprovisioningImageType" = "UBUNTU" ]; then
42 | echo " Node autoprovisioning is configured to create new node pools of type 'UBUNTU'."
43 | echo " Run the following command to update:"
44 | echo " gcloud container clusters update '$cluster_name' --project '$project' --zone '$cluster_zone' --enable-autoprovisioning --autoprovisioning-image-type='UBUNTU_CONTAINERD'"
45 | echo " "
46 | fi
47 | fi
48 | fi
49 |
50 | for nodepools in $( \
51 | gcloud container node-pools list \
52 | --project $project \
53 | --cluster $cluster_name \
54 | --zone $cluster_zone \
55 | --format="csv[no-heading](name,version,config.imageType)")
56 | do
57 | IFS=',' read -r -a nodepoolsArray <<< "$nodepools"
58 | nodepool_name="${nodepoolsArray[0]}"
59 | nodepool_version="${nodepoolsArray[1]}"
60 | nodepool_imageType="${nodepoolsArray[2]}"
61 |
62 | nodepool_minorVersion=${nodepool_version:0:4}
63 |
64 | echo " Nodepool: $nodepool_name, version: $nodepool_version ($nodepool_minorVersion), image: $nodepool_imageType"
65 |
66 | minorVersionWithRev="${nodepool_version/-gke./.}"
67 | linuxGkeMinVersion="1.14"
68 | windowsGkeMinVersion="1.21.1.2200"
69 |
70 | suggestedImageType="COS_CONTAINERD"
71 |
72 | if [ "$nodepool_imageType" = "UBUNTU" ]; then
73 | suggestedImageType="UBUNTU_CONTAINERD"
74 | elif [ "$nodepool_imageType" = "WINDOWS_LTSC" ]; then
75 | suggestedImageType="WINDOWS_LTSC_CONTAINERD"
76 | elif [ "$nodepool_imageType" = "WINDOWS_SAC" ]; then
77 | suggestedImageType="WINDOWS_SAC_CONTAINERD"
78 | fi
79 |
80 | tab=$'\n ';
81 | nodepool_message="$tab Please update the nodepool to use Containerd."
82 | nodepool_message+="$tab Make sure to consult with the list of known issues https://cloud.google.com/kubernetes-engine/docs/concepts/using-containerd#known_issues."
83 | nodepool_message+="$tab Run the following command to upgrade:"
84 | nodepool_message+="$tab "
85 | nodepool_message+="$tab gcloud container clusters upgrade '$cluster_name' --project '$project' --zone '$cluster_zone' --image-type '$suggestedImageType' --node-pool '$nodepool_name'"
86 | nodepool_message+="$tab "
87 |
88 | # see https://cloud.google.com/kubernetes-engine/docs/concepts/node-images
89 | if [ "$nodepool_imageType" = "COS_CONTAINERD" ] || [ "$nodepool_imageType" = "UBUNTU_CONTAINERD" ] ||
90 | [ "$nodepool_imageType" = "WINDOWS_LTSC_CONTAINERD" ] || [ "$nodepool_imageType" = "WINDOWS_SAC_CONTAINERD" ]; then
91 | nodepool_message="$tab Nodepool is using Containerd already"
92 | elif ( [ "$nodepool_imageType" = "WINDOWS_LTSC" ] || [ "$nodepool_imageType" = "WINDOWS_SAC" ] ) &&
93 | [ "$(printf '%s\n' "$windowsGkeMinVersion" "$minorVersionWithRev" | sort -V | head -n1)" != "$windowsGkeMinVersion" ]; then
94 | nodepool_message="$tab Upgrade nodepool to the version that supports Containerd for Windows"
95 | elif [ "$(printf '%s\n' "$linuxGkeMinVersion" "$minorVersionWithRev" | sort -V | head -n1)" != "$linuxGkeMinVersion" ]; then
96 | nodepool_message="$tab Upgrade nodepool to the version that supports Containerd"
97 | fi
98 | echo "$nodepool_message"
99 | done
100 | fi # not autopilot
101 | done
102 | done
103 |
104 | # Sample output:
105 | #
106 | # ProjectId: my-project-id
107 | # Cluster: autopilot-cluster-1 (autopilot) (zone: us-central1)
108 | # Autopilot clusters are running Containerd.
109 | # Cluster: cluster-1 (zone: us-central1-c)
110 | # Nodepool: default-pool, version: 1.18.12-gke.1210 (1.18), image: COS
111 | #
112 | # Please update the nodepool to use Containerd.
113 | # Make sure to consult with the list of known issues https://cloud.google.com/kubernetes-engine/docs/concepts/using-containerd#known_issues.
114 | # Run the following command to upgrade:
115 | #
116 | # gcloud container clusters upgrade 'cluster-1' --project 'my-project-id' --zone 'us-central1-c' --image-type 'COS_CONTAINERD' --node-pool 'default-pool'
117 | #
118 | # Nodepool: pool-1, version: 1.18.12-gke.1210 (1.18), image: COS
119 | #
120 | # Please update the nodepool to use Containerd.
121 | # Make sure to consult with the list of known issues https://cloud.google.com/kubernetes-engine/docs/concepts/using-containerd#known_issues.
122 | # Run the following command to upgrade:
123 | #
124 | # gcloud container clusters upgrade 'cluster-1' --project 'my-project-id' --zone 'us-central1-c' --image-type 'COS_CONTAINERD' --node-pool 'pool-1'
125 | #
126 | # Nodepool: winpool, version: 1.18.12-gke.1210 (1.18), image: WINDOWS_SAC
127 | #
128 | # Upgrade nodepool to the version that supports Containerd for Windows
129 | #
130 | # Cluster: another-test-cluster (zone: us-central1-c)
131 | # Nodepool: default-pool, version: 1.20.4-gke.400 (1.20), image: COS_CONTAINERD
132 | #
133 | # Nodepool is using Containerd already
134 | #
135 | # [END gke_node_find_non_containerd_nodepools]
136 | #
137 |
--------------------------------------------------------------------------------
/os-audit/README.md:
--------------------------------------------------------------------------------
1 | The os-audit tool is the example code for
2 | [enabling Linux auditd logs on GKE nodes](https://cloud.google.com/kubernetes-engine/docs/how-to/linux-auditd-logging),
3 | which documents how to enable verbose operating system audit logs on Google
4 | Kubernetes Engine nodes running Container-Optimized OS.
5 |
--------------------------------------------------------------------------------
/os-audit/cos-auditd-logging.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | apiVersion: v1
16 | kind: Namespace
17 | metadata:
18 | name: cos-auditd
19 | ---
20 | apiVersion: apps/v1
21 | kind: DaemonSet
22 | metadata:
23 | name: cos-auditd-logging
24 | namespace: cos-auditd
25 | annotations:
26 | kubernetes.io/description: 'DaemonSet that enables Linux auditd logging on non-Autopilot COS nodes.'
27 | spec:
28 | selector:
29 | matchLabels:
30 | name: cos-auditd-logging
31 | template:
32 | metadata:
33 | labels:
34 | name: cos-auditd-logging
35 | spec:
36 | # Necessary for ensuring access to Google Cloud credentials from the node's metadata server.
37 | hostNetwork: true
38 | hostPID: true
39 | dnsPolicy: Default
40 | initContainers:
41 | - name: cos-auditd-setup
42 | image: ubuntu
43 | command: ["chroot", "/host", "systemctl", "start", "cloud-audit-setup"]
44 | securityContext:
45 | privileged: true
46 | volumeMounts:
47 | - name: host
48 | mountPath: /host
49 | resources:
50 | requests:
51 | memory: "10Mi"
52 | cpu: "10m"
53 | containers:
54 | - name: cos-auditd-fluent-bit
55 | securityContext:
56 | allowPrivilegeEscalation: false
57 | capabilities:
58 | drop:
59 | - all
60 | add:
61 | - DAC_OVERRIDE
62 | env:
63 | - name: NODE_NAME
64 | valueFrom:
65 | fieldRef:
66 | apiVersion: v1
67 | fieldPath: spec.nodeName
68 | # Substitute these (manually or via envsubst). For example, run
69 | # `CLUSTER_NAME=example-cluster CLUSTER_LOCATION=us-central1-a envsubst '$CLUSTER_NAME,$CLUSTER_LOCATION' < ${THIS_FILE:?} | kubectl apply -f -`
70 | - name: CLUSTER_NAME
71 | value: "$CLUSTER_NAME"
72 | - name: CLUSTER_LOCATION
73 | value: "$CLUSTER_LOCATION"
74 | # This image is used for demo purposes. The best practice is to use the image from controlled registry and reference it by SHA.
75 | image: fluent/fluent-bit:latest
76 | imagePullPolicy: IfNotPresent
77 | livenessProbe:
78 | httpGet:
79 | path: /
80 | port: 2024
81 | initialDelaySeconds: 120
82 | periodSeconds: 60
83 | timeoutSeconds: 5
84 | ports:
85 | - name: metrics
86 | containerPort: 2024
87 | resources:
88 | limits:
89 | cpu: "1"
90 | memory: 500Mi
91 | requests:
92 | cpu: 100m
93 | memory: 200Mi
94 | terminationMessagePath: /dev/termination-log
95 | terminationMessagePolicy: File
96 | volumeMounts:
97 | - mountPath: /var/log
98 | name: varlog
99 | - mountPath: /var/lib/cos-auditd-fluent-bit/pos-files
100 | name: varlib-cos-auditd-fluent-bit-pos-files
101 | - mountPath: /fluent-bit/etc
102 | name: config-volume
103 | nodeSelector:
104 | cloud.google.com/gke-os-distribution: cos
105 | restartPolicy: Always
106 | terminationGracePeriodSeconds: 120
107 | tolerations:
108 | - operator: "Exists"
109 | effect: "NoExecute"
110 | - operator: "Exists"
111 | effect: "NoSchedule"
112 | volumes:
113 | - name: host
114 | hostPath:
115 | path: /
116 | - name: varlog
117 | hostPath:
118 | path: /var/log
119 | - name: varlibcos-auditd-fluent-bit
120 | hostPath:
121 | path: /var/lib/cos-auditd-fluent-bit
122 | type: DirectoryOrCreate
123 | - name: varlib-cos-auditd-fluent-bit-pos-files
124 | hostPath:
125 | path: /var/lib/cos-auditd-fluent-bit/pos-files
126 | type: DirectoryOrCreate
127 | - name: config-volume
128 | configMap:
129 | name: cos-auditd-fluent-bit-config
130 | updateStrategy:
131 | type: RollingUpdate
132 | ---
133 | kind: ConfigMap
134 | apiVersion: v1
135 | metadata:
136 | name: cos-auditd-fluent-bit-config
137 | namespace: cos-auditd
138 | annotations:
139 | kubernetes.io/description: 'ConfigMap for Linux auditd logging daemonset on COS nodes.'
140 | data:
141 | fluent-bit.conf: |-
142 | [SERVICE]
143 | Flush 5
144 | Grace 120
145 | Log_Level info
146 | Daemon off
147 | HTTP_Server On
148 | HTTP_Listen 0.0.0.0
149 | HTTP_PORT 2024
150 |
151 | [INPUT]
152 | # https://docs.fluentbit.io/manual/input/systemd
153 | Name systemd
154 | Alias audit
155 | Tag audit
156 | Systemd_Filter SYSLOG_IDENTIFIER=audit
157 | Path /var/log/journal
158 | DB /var/lib/cos-auditd-fluent-bit/pos-files/audit.db
159 |
160 | [FILTER]
161 | # https://docs.fluentbit.io/manual/pipeline/filters/modify
162 | Name modify
163 | Match audit
164 | Add logging.googleapis.com/local_resource_id k8s_node.${NODE_NAME}
165 |
166 | [FILTER]
167 | Name modify
168 | Match audit
169 | Add logging.googleapis.com/logName linux-auditd
170 |
171 | [OUTPUT]
172 | # https://docs.fluentbit.io/manual/pipeline/outputs/stackdriver
173 | Name stackdriver
174 | Match audit
175 | Severity_key severity
176 | log_name_key logging.googleapis.com/logName
177 | Resource k8s_node
178 | # The plugin will read the project ID from the metadata server, but not the cluster name and location for some reason, so they have to be injected.
179 | k8s_cluster_name ${CLUSTER_NAME}
180 | k8s_cluster_location ${CLUSTER_LOCATION}
181 | net.connect_timeout 60
182 | Retry_Limit 14
183 | Workers 1
184 |
--------------------------------------------------------------------------------
/perf/perf-record.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | #
16 | #
17 | # Change TARGET_PGREP below to names of processes to trace.
18 | # Also change KERNEL_VERSION below to version being tested.
19 |
20 | apiVersion: apps/v1
21 | kind: DaemonSet
22 | metadata:
23 | name: enable-perf-record
24 | labels:
25 | app: enable-perf-record
26 | spec:
27 | selector:
28 | matchLabels:
29 | name: enable-perf-record
30 | template:
31 | metadata:
32 | labels:
33 | name: enable-perf-record
34 | spec:
35 | nodeSelector:
36 | "enable-perf": "true"
37 | hostPID: true
38 | volumes:
39 | - name: host
40 | hostPath:
41 | path: /
42 | containers:
43 | - name: enable-perf-record
44 | image: debian
45 | imagePullPolicy: Always
46 | volumeMounts:
47 | - name: host
48 | mountPath: /host
49 | securityContext:
50 | privileged: true
51 | command:
52 | - /bin/bash
53 | - -c
54 | - |
55 |
56 | set -o errexit
57 | set -o pipefail
58 | set -o nounset
59 |
60 | KERNEL_VERSION="5.0.0"
61 |
62 | apt-get update && apt-get install -y curl procps daemontools build-essential bison flex libelf-dev binutils-dev
63 | curl -O https://mirrors.edge.kernel.org/pub/linux/kernel/tools/perf/v"${KERNEL_VERSION}"/perf-"${KERNEL_VERSION}".tar.gz
64 | tar xzf perf-"${KERNEL_VERSION}".tar.gz
65 | make -C perf-"${KERNEL_VERSION}"/tools/perf install
66 | PERF="/root/bin/perf"
67 |
68 | d=$(date '+%Y-%m-%dT%H:%M:%SZ')
69 | out_dir="/host/var/log/perf_record/${d}"
70 | mkdir -p "${out_dir}"
71 | cd "${out_dir}"
72 | echo "starting perf recording! will dump to ${out_dir}"
73 |
74 | "${PERF}" record -F 999 -g --timestamp-filename --switch-output="10s" > /dev/null
75 |
--------------------------------------------------------------------------------
/perf/perf-trace.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # https://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | #
16 | #
17 | # Change TARGET_PGREP below to names of processes to trace.
18 | # Also change KERNEL_VERSION below to version being tested.
19 |
20 | apiVersion: apps/v1
21 | kind: DaemonSet
22 | metadata:
23 | name: enable-perf-trace
24 | labels:
25 | app: enable-perf-trace
26 | spec:
27 | selector:
28 | matchLabels:
29 | name: enable-perf-trace
30 | template:
31 | metadata:
32 | labels:
33 | name: enable-perf-trace
34 | spec:
35 | nodeSelector:
36 | "enable-perf": "true"
37 | hostPID: true
38 | volumes:
39 | - name: host
40 | hostPath:
41 | path: /
42 | containers:
43 | - name: enable-perf-trace
44 | image: debian
45 | imagePullPolicy: Always
46 | volumeMounts:
47 | - name: host
48 | mountPath: /host
49 | securityContext:
50 | privileged: true
51 | env:
52 | # TARGET_PGREP options
53 | # Values:
54 | # empty or unset to do full trace
55 | # pgrep query to filter
56 | - name: TARGET_PGREP
57 | value: "kubelet"
58 | command:
59 | - /bin/bash
60 | - -c
61 | - |
62 |
63 | set -o errexit
64 | set -o pipefail
65 | set -o nounset
66 |
67 | MAX_LOG_SIZE="16777215" # 16 MB
68 | MAX_LOGS="2000"
69 | KERNEL_VERSION="5.0.0"
70 |
71 | apt-get update && apt-get install -y curl procps daemontools build-essential bison flex libelf-dev binutils-dev
72 | curl -O https://mirrors.edge.kernel.org/pub/linux/kernel/tools/perf/v"${KERNEL_VERSION}"/perf-"${KERNEL_VERSION}".tar.gz
73 | tar xzf perf-"${KERNEL_VERSION}".tar.gz
74 | make -C perf-"${KERNEL_VERSION}"/tools/perf install
75 | PERF="/root/bin/perf"
76 |
77 | d=$(date '+%Y-%m-%dT%H:%M:%SZ')
78 |
79 | out_dir="/host/var/log/perf_trace/${d}"
80 | mkdir -p "${out_dir}"
81 |
82 | echo "starting perf! will dump to ${out_dir}"
83 |
84 | if [[ -z "${TARGET_PGREP+x}" ]]; then
85 | echo "full system perf trace"
86 | "${PERF}" trace |& multilog t s${MAX_LOG_SIZE} n${MAX_LOGS} "${out_dir}"
87 | else
88 | echo "PID perf trace"
89 | PIDS=$(pgrep "${TARGET_PGREP}" -d ",")
90 | echo "staring perf on pids == ${PIDS}"
91 | "${PERF}" trace --pid="${PIDS}" |& multilog t s${MAX_LOG_SIZE} n${MAX_LOGS} "${out_dir}"
92 | fi
93 |
--------------------------------------------------------------------------------
/ssh-server-config/README.md:
--------------------------------------------------------------------------------
1 | The ssh-server-config tool is a Kubernates DaemonSet that set [loginGraceTime](https://man.openbsd.org/sshd#g) to 0.
2 |
3 | ## :warning: This configuration may increase the risk of denial of service attacks and may cause issues with legitimate SSH access.
4 |
5 | ## How to use it?
6 | Apply it to all nodes in your cluster by running the
7 | following command. Run the command once per cluster per
8 | Google Cloud Platform project.
9 |
10 | ### GKE Clusters
11 |
12 | ```
13 | kubectl apply -f \
14 | https://raw.githubusercontent.com/GoogleCloudPlatform\
15 | /k8s-node-tools/master/ssh-server-config/set-login-grace-time.yaml
16 | ```
17 |
18 | ### GDC software-only for VMware Clusters
19 |
20 | ```
21 | kubectl apply -f \
22 | https://raw.githubusercontent.com/GoogleCloudPlatform\
23 | /k8s-node-tools/master/ssh-server-config/set-login-grace-time-gdcso-vmware.yaml
24 | ```
25 |
26 | ## How to get the result?
27 | Run the command below to get related log.
28 | ```
29 | kubectl -n kube-system logs -l app=ssh-server-config -c ssh-server-config
30 | ```
31 |
--------------------------------------------------------------------------------
/ssh-server-config/set-login-grace-time-gdcso-vmware.yaml:
--------------------------------------------------------------------------------
1 | kind: DaemonSet
2 | apiVersion: apps/v1
3 | metadata:
4 | name: ssh-server-config
5 | namespace: kube-system
6 | labels:
7 | app: ssh-server-config
8 | spec:
9 | selector:
10 | matchLabels:
11 | app: ssh-server-config
12 | template:
13 | metadata:
14 | labels:
15 | app: ssh-server-config
16 | spec:
17 | hostPID: true
18 | tolerations:
19 | - operator: Exists
20 | initContainers:
21 | - name: ssh-server-config
22 | image: gke.gcr.io/debian-base:bookworm-v1.0.3-gke.0@sha256:91b29592ee0b782c0ab777bfcabd14a0ae83d8e8eb90d3f0eb500acafae3f4e5
23 | securityContext:
24 | privileged: true
25 | command:
26 | - /bin/sh
27 | - -c
28 | - |
29 | set -e
30 | set -u
31 | if [ ! -e "/etc/ssh/sshd_config" ] ; then
32 | echo "/etc/ssh/sshd_config not found"
33 | exit 1
34 | fi
35 |
36 | cp /etc/ssh/sshd_config /etc/ssh/sshd_config.cp
37 | if grep -q "^LoginGraceTime" "/etc/ssh/sshd_config.cp"; then
38 | # Update existing LoginGraceTime
39 | sed -i "s/^LoginGraceTime.*/LoginGraceTime 0/" "/etc/ssh/sshd_config.cp"
40 | else
41 | # Add new LoginGraceTime
42 | echo "LoginGraceTime 0" >> "/etc/ssh/sshd_config.cp"
43 | fi
44 |
45 | cp /etc/ssh/sshd_config.cp /etc/ssh/sshd_config
46 | rm /etc/ssh/sshd_config.cp
47 |
48 |
49 | EXEC="nsenter -t 1 -m -p --"
50 | $EXEC systemctl reload sshd
51 | echo "sshd logingracetime after restart:"
52 | $EXEC sshd -T | grep logingracetime
53 | resources:
54 | requests:
55 | memory: 5Mi
56 | cpu: 5m
57 | volumeMounts:
58 | - name: sshd-config
59 | mountPath: /etc/ssh/sshd_config
60 | containers:
61 | - name: pause-container
62 | image: gke.gcr.io/pause:3.7@sha256:5b658f3c4f034a9619ad7e6d1ee49ee532a1e0a598dc68b06d17b6036116b924
63 | volumes:
64 | - name: sshd-config
65 | hostPath:
66 | path: /etc/ssh/sshd_config
67 | type: File
68 |
69 |
70 |
--------------------------------------------------------------------------------
/ssh-server-config/set-login-grace-time.yaml:
--------------------------------------------------------------------------------
1 | kind: DaemonSet
2 | apiVersion: apps/v1
3 | metadata:
4 | name: ssh-server-config
5 | namespace: kube-system
6 | labels:
7 | app: ssh-server-config
8 | spec:
9 | selector:
10 | matchLabels:
11 | app: ssh-server-config
12 | template:
13 | metadata:
14 | labels:
15 | app: ssh-server-config
16 | spec:
17 | hostPID: true
18 | initContainers:
19 | - name: ssh-server-config
20 | image: gke.gcr.io/debian-base:bookworm-v1.0.3-gke.0@sha256:91b29592ee0b782c0ab777bfcabd14a0ae83d8e8eb90d3f0eb500acafae3f4e5
21 | securityContext:
22 | privileged: true
23 | command:
24 | - /bin/sh
25 | - -c
26 | - |
27 | set -e
28 | set -u
29 | if [ ! -e "/etc/ssh/sshd_config" ] ; then
30 | echo "/etc/ssh/sshd_config not found"
31 | exit 1
32 | fi
33 |
34 | cp /etc/ssh/sshd_config /etc/ssh/sshd_config.cp
35 | if grep -q "^LoginGraceTime" "/etc/ssh/sshd_config.cp"; then
36 | # Update existing LoginGraceTime
37 | sed -i "s/^LoginGraceTime.*/LoginGraceTime 0/" "/etc/ssh/sshd_config.cp"
38 | else
39 | # Add new LoginGraceTime
40 | echo "LoginGraceTime 0" >> "/etc/ssh/sshd_config.cp"
41 | fi
42 |
43 | cp /etc/ssh/sshd_config.cp /etc/ssh/sshd_config
44 | rm /etc/ssh/sshd_config.cp
45 |
46 |
47 | EXEC="nsenter -t 1 -m -p --"
48 | $EXEC systemctl reload sshd
49 | echo "sshd logingracetime after restart:"
50 | $EXEC sshd -T | grep logingracetime
51 | volumeMounts:
52 | - name: sshd-config
53 | mountPath: /etc/ssh/sshd_config
54 | containers:
55 | - name: pause-container
56 | image: gke.gcr.io/pause:3.7@sha256:5b658f3c4f034a9619ad7e6d1ee49ee532a1e0a598dc68b06d17b6036116b924
57 | volumes:
58 | - name: sshd-config
59 | hostPath:
60 | path: /etc/ssh/sshd_config
61 | type: File
62 |
63 |
64 |
65 |
--------------------------------------------------------------------------------