├── .github
└── workflows
│ └── auto-approve.yml
├── README.md
├── agents.yaml
├── compile-agents-yaml.sh
├── heapster.yaml
├── hooks
└── pre-commit
├── logging-agent.yaml
├── metadata-agent.yaml
└── rbac-setup.yaml
/.github/workflows/auto-approve.yml:
--------------------------------------------------------------------------------
1 | # Automatically approve PRs created by our release robot account.
2 | name: Auto approve
3 | on: pull_request
4 |
5 | jobs:
6 | auto-approve:
7 | runs-on: ubuntu-latest
8 | steps:
9 | - uses: hmarr/auto-approve-action@v2.0.0
10 | if: github.actor == 'stackdriver-instrumentation-release'
11 | with:
12 | github-token: "${{ secrets.GITHUB_TOKEN }}"
13 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Stackdriver Kubernetes Configs
2 |
3 | A collection of Kubernetes configurations to integrate with Stackdriver
4 | products. For now, it supports [Stackdriver Logging](https://cloud.google.com/logging/)
5 | and [Stackdriver Monitoring](https://cloud.google.com/monitoring/). This repo is only used
6 | for [manual installation](https://cloud.google.com/monitoring/kubernetes-engine/customizing)
7 | on existing clusters.
8 |
9 | ## Setting up git hooks
10 |
11 | From the root directory of this repo, please run the following command:
12 |
13 | ```
14 | ln -s "$(realpath hooks/pre-commit)" "$(git rev-parse --git-dir)/hooks/pre-commit"
15 | ```
16 |
17 | This will ensure that all commits run the
18 | [`compile-agents-yaml.sh`](#compile-agents-yaml) command.
19 |
20 | ## Compile Agents YAML
21 |
22 | From the root directory of this repo, you can run the following command:
23 |
24 | ```
25 | ./compile-agents-yaml.sh
26 | ```
27 |
28 | This will ensure that all commits re-generate the `agents.yaml` file to keep it
29 | up-to-date with other YAML file changes.
30 |
31 |
--------------------------------------------------------------------------------
/agents.yaml:
--------------------------------------------------------------------------------
1 | # THIS FILE IS AUTO-GENERATED DO NOT EDIT
2 | apiVersion: apps/v1
3 | kind: Deployment
4 | metadata:
5 | labels:
6 | k8s-app: stackdriver-heapster
7 | version: v1.6.1
8 | name: heapster
9 | namespace: stackdriver-agents
10 | spec:
11 | replicas: 1
12 | selector:
13 | matchLabels:
14 | k8s-app: stackdriver-heapster
15 | strategy:
16 | rollingUpdate:
17 | maxSurge: 1
18 | maxUnavailable: 1
19 | type: RollingUpdate
20 | template:
21 | metadata:
22 | creationTimestamp: null
23 | labels:
24 | k8s-app: stackdriver-heapster
25 | version: v1.6.1
26 | spec:
27 | containers:
28 | - env:
29 | - name: CLUSTER_NAME
30 | valueFrom:
31 | configMapKeyRef:
32 | name: cluster-config
33 | key: cluster_name
34 | - name: CLUSTER_LOCATION
35 | valueFrom:
36 | configMapKeyRef:
37 | name: cluster-config
38 | key: cluster_location
39 | - name: GOOGLE_APPLICATION_CREDENTIALS
40 | valueFrom:
41 | configMapKeyRef:
42 | name: google-cloud-config
43 | key: credentials_path
44 | command:
45 | - /heapster
46 | - --source=kubernetes.summary_api:https://kubernetes.default?kubeletHttps=true&kubeletPort=10250&insecure=true
47 | - --sink=stackdriver:?cluster_name=$(CLUSTER_NAME)&cluster_location=$(CLUSTER_LOCATION)&zone=$(CLUSTER_LOCATION)&use_old_resources=false&use_new_resources=true&min_interval_sec=100&batch_export_timeout_sec=110
48 | image: gcr.io/stackdriver-agents/heapster-amd64:v1.6.1
49 | imagePullPolicy: Always
50 | livenessProbe:
51 | failureThreshold: 3
52 | httpGet:
53 | path: /healthz
54 | port: 8082
55 | scheme: HTTP
56 | initialDelaySeconds: 180
57 | periodSeconds: 10
58 | successThreshold: 1
59 | timeoutSeconds: 5
60 | name: heapster
61 | resources:
62 | limits:
63 | cpu: 88m
64 | memory: 204Mi
65 | requests:
66 | cpu: 88m
67 | memory: 204Mi
68 | terminationMessagePath: /dev/termination-log
69 | terminationMessagePolicy: File
70 | volumeMounts:
71 | - mountPath: /etc/google-cloud/
72 | name: google-cloud-config
73 | - command:
74 | - /pod_nanny
75 | - --cpu=80m
76 | - --extra-cpu=0.5m
77 | - --memory=140Mi
78 | - --extra-memory=4Mi
79 | - --threshold=5
80 | - --deployment=heapster
81 | - --container=heapster
82 | - --poll-period=300000
83 | - --estimator=exponential
84 | env:
85 | - name: MY_POD_NAME
86 | valueFrom:
87 | fieldRef:
88 | apiVersion: v1
89 | fieldPath: metadata.name
90 | - name: MY_POD_NAMESPACE
91 | valueFrom:
92 | fieldRef:
93 | apiVersion: v1
94 | fieldPath: metadata.namespace
95 | image: gcr.io/google_containers/addon-resizer:1.7
96 | imagePullPolicy: IfNotPresent
97 | name: heapster-nanny
98 | resources:
99 | limits:
100 | cpu: 50m
101 | memory: 112360Ki
102 | requests:
103 | cpu: 50m
104 | memory: 112360Ki
105 | terminationMessagePath: /dev/termination-log
106 | terminationMessagePolicy: File
107 | dnsPolicy: ClusterFirst
108 | restartPolicy: Always
109 | schedulerName: default-scheduler
110 | securityContext: {}
111 | serviceAccount: heapster
112 | serviceAccountName: heapster
113 | terminationGracePeriodSeconds: 30
114 | volumes:
115 | - configMap:
116 | defaultMode: 420
117 | name: google-cloud-config
118 | name: google-cloud-config
119 |
120 | ---
121 | apiVersion: apps/v1
122 | kind: DaemonSet
123 | metadata:
124 | labels:
125 | app: stackdriver-logging-agent
126 | name: stackdriver-logging-agent
127 | namespace: stackdriver-agents
128 | spec:
129 | selector:
130 | matchLabels:
131 | app: stackdriver-logging-agent
132 | template:
133 | metadata:
134 | labels:
135 | app: stackdriver-logging-agent
136 | spec:
137 | containers:
138 | - env:
139 | - name: NODE_NAME
140 | valueFrom:
141 | fieldRef:
142 | apiVersion: v1
143 | fieldPath: spec.nodeName
144 | - name: K8S_NODE_NAME
145 | valueFrom:
146 | fieldRef:
147 | apiVersion: v1
148 | fieldPath: spec.nodeName
149 | - name: GOOGLE_APPLICATION_CREDENTIALS
150 | valueFrom:
151 | configMapKeyRef:
152 | name: google-cloud-config
153 | key: credentials_path
154 | - name: CLUSTER_NAME
155 | valueFrom:
156 | configMapKeyRef:
157 | name: cluster-config
158 | key: cluster_name
159 | - name: CLUSTER_LOCATION
160 | valueFrom:
161 | configMapKeyRef:
162 | name: cluster-config
163 | key: cluster_location
164 | image: gcr.io/stackdriver-agents/stackdriver-logging-agent:1.10.3
165 | imagePullPolicy: IfNotPresent
166 | livenessProbe:
167 | exec:
168 | command:
169 | - /bin/sh
170 | - -c
171 | - |
172 | LIVENESS_THRESHOLD_SECONDS=${LIVENESS_THRESHOLD_SECONDS:-300}; STUCK_THRESHOLD_SECONDS=${LIVENESS_THRESHOLD_SECONDS:-900}; if [ ! -e /var/run/google-fluentd/buffers ]; then
173 | exit 1;
174 | fi; touch -d "${STUCK_THRESHOLD_SECONDS} seconds ago" /tmp/marker-stuck; if [[ -z "$(find /var/run/google-fluentd/buffers -type f -newer /tmp/marker-stuck -print -quit)" ]]; then
175 | rm -rf /var/run/google-fluentd/buffers;
176 | exit 1;
177 | fi; touch -d "${LIVENESS_THRESHOLD_SECONDS} seconds ago" /tmp/marker-liveness; if [[ -z "$(find /var/run/google-fluentd/buffers -type f -newer /tmp/marker-liveness -print -quit)" ]]; then
178 | exit 1;
179 | fi;
180 | failureThreshold: 3
181 | initialDelaySeconds: 600
182 | periodSeconds: 60
183 | successThreshold: 1
184 | timeoutSeconds: 1
185 | name: logging-agent
186 | resources:
187 | limits:
188 | cpu: "1"
189 | memory: 300Mi
190 | requests:
191 | cpu: 100m
192 | memory: 200Mi
193 | terminationMessagePath: /dev/termination-log
194 | terminationMessagePolicy: File
195 | volumeMounts:
196 | - mountPath: /var/run
197 | name: varrun
198 | - mountPath: /var/log
199 | name: varlog
200 | - mountPath: /var/lib/docker/containers
201 | name: varlibdockercontainers
202 | readOnly: true
203 | - mountPath: /etc/google-fluentd/google-fluentd.conf
204 | subPath: google-fluentd.conf
205 | name: output-config-volume
206 | - mountPath: /etc/google-fluentd/config.d
207 | name: input-config-volume
208 | - mountPath: /etc/google-cloud/
209 | name: google-cloud-config
210 | serviceAccount: logging-agent
211 | serviceAccountName: logging-agent
212 | dnsPolicy: ClusterFirst
213 | restartPolicy: Always
214 | schedulerName: default-scheduler
215 | securityContext: {}
216 | tolerations:
217 | - operator: "Exists"
218 | effect: "NoExecute"
219 | - operator: "Exists"
220 | effect: "NoSchedule"
221 | volumes:
222 | - hostPath:
223 | path: /var/run
224 | type: ""
225 | name: varrun
226 | - hostPath:
227 | path: /var/log
228 | type: ""
229 | name: varlog
230 | - hostPath:
231 | path: /var/lib/docker/containers
232 | type: ""
233 | name: varlibdockercontainers
234 | - configMap:
235 | defaultMode: 420
236 | name: logging-agent-output-config
237 | name: output-config-volume
238 | - configMap:
239 | defaultMode: 420
240 | name: logging-agent-input-config
241 | name: input-config-volume
242 | - configMap:
243 | defaultMode: 420
244 | name: google-cloud-config
245 | name: google-cloud-config
246 | updateStrategy:
247 | rollingUpdate:
248 | maxUnavailable: 1
249 | type: RollingUpdate
250 | ---
251 | # Config map for Logging Agent input and corresponding filter plugins.
252 | apiVersion: v1
253 | kind: ConfigMap
254 | metadata:
255 | name: logging-agent-input-config
256 | namespace: stackdriver-agents
257 | data:
258 | 1.containers.input.conf: |-
259 | # This configuration file for Fluentd is used
260 | # to watch changes to Docker log files that live in the
261 | # directory /var/lib/docker/containers/ and are symbolically
262 | # linked to from the /var/log/containers directory using names that capture the
263 | # pod name and container name. These logs are then submitted to
264 | # Google Cloud Logging which assumes the installation of the cloud-logging plug-in.
265 | #
266 | # Example
267 | # =======
268 | # A line in the Docker log file might look like this JSON:
269 | #
270 | # {"log":"2014/09/25 21:15:03 Got request with path wombat\\n",
271 | # "stream":"stderr",
272 | # "time":"2014-09-25T21:15:03.499185026Z"}
273 | #
274 | # The original tag is derived from the log file's location.
275 | # For example a Docker container's logs might be in the directory:
276 | # /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b
277 | # and in the file:
278 | # 997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
279 | # where 997599971ee6... is the Docker ID of the running container.
280 | # The Kubernetes kubelet makes a symbolic link to this file on the host
281 | # machine in the /var/log/containers directory which includes the pod name,
282 | # the namespace name and the Kubernetes container name:
283 | # synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
284 | # ->
285 | # /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
286 | # The /var/log directory on the host is mapped to the /var/log directory in the container
287 | # running this instance of Fluentd and we end up collecting the file:
288 | # /var/log/containers/synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
289 | # This results in the tag:
290 | # var.log.containers.synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
291 | # where 'synthetic-logger-0.25lps-pod' is the pod name, 'default' is the
292 | # namespace name, 'synth-lgr' is the container name and '997599971ee6..' is
293 | # the container ID.
294 | # The record reformer is used to extract pod_name, namespace_name and
295 | # container_name from the tag and set them in a local_resource_id in the
296 | # format of:
297 | # 'k8s_container...'.
298 | # The reformer also changes the tags to 'stderr' or 'stdout' based on the
299 | # value of 'stream'.
300 | # local_resource_id is later used by google_cloud plugin to determine the
301 | # monitored resource to ingest logs against.
302 |
303 | # Json Log Example:
304 | # {"log":"[info:2016-02-16T16:04:05.930-08:00] Some log text here\n","stream":"stdout","time":"2016-02-17T00:04:05.931087621Z"}
305 | # CRI Log Example:
306 | # 2016-02-17T00:04:05.931087621Z stdout F [info:2016-02-16T16:04:05.930-08:00] Some log text here
307 |
308 | @type tail
309 | path /var/log/containers/*.log
310 | pos_file /var/run/google-fluentd/pos-files/gcp-containers.pos
311 | # Tags at this point are in the format of:
312 | # reform.var.log.containers.__-.log
313 | tag reform.*
314 | read_from_head true
315 |
316 | @type multi_format
317 |
318 | format json
319 | time_key time
320 | time_format %Y-%m-%dT%H:%M:%S.%NZ
321 |
322 |
323 | format /^(?
326 |
327 |
328 |
329 |
330 | @type parser
331 | format /^(?\w)(?
337 |
338 |
339 | # This plugin uses environment variables KUBERNETES_SERVICE_HOST and
340 | # KUBERNETES_SERVICE_PORT to talk to the API server. These environment
341 | # variables are added by kubelet automatically.
342 | @type kubernetes_metadata
343 | # Interval in seconds to dump cache stats locally in the Fluentd log.
344 | stats_interval 300
345 | # TTL in seconds of each cached element.
346 | cache_ttl 30
347 | # Skip fetching unused metadata.
348 | skip_container_metadata true
349 | skip_master_url true
350 | skip_namespace_metadata true
351 |
352 |
353 |
354 | # We have to use record_modifier because only this plugin supports complex
355 | # logic to modify record the way we need.
356 | @type record_modifier
357 | enable_ruby true
358 |
359 | # Extract "kubernetes"->"labels" and set them as
360 | # "logging.googleapis.com/labels". Prefix these labels with
361 | # "k8s-pod" to distinguish with other labels and avoid
362 | # label name collision with other types of labels.
363 | _dummy_ ${if record.is_a?(Hash) && record.has_key?('kubernetes') && record['kubernetes'].has_key?('labels') && record['kubernetes']['labels'].is_a?(Hash); then; record["logging.googleapis.com/labels"] = record['kubernetes']['labels'].map{ |k, v| ["k8s-pod/#{k}", v]}.to_h; end; nil}
364 |
365 | # Delete this dummy field and the rest of "kubernetes" and "docker".
366 | remove_keys _dummy_,kubernetes,docker
367 |
368 |
369 |
370 | @type record_reformer
371 | enable_ruby true
372 |
373 | # Extract local_resource_id from tag for 'k8s_container' monitored
374 | # resource. The format is:
375 | # 'k8s_container...'.
376 | "logging.googleapis.com/local_resource_id" ${"k8s_container.#{tag_suffix[4].rpartition('.')[0].split('_')[1]}.#{tag_suffix[4].rpartition('.')[0].split('_')[0]}.#{tag_suffix[4].rpartition('.')[0].split('_')[2].rpartition('-')[0]}"}
377 | # Rename the field 'log' to a more generic field 'message'. This way the
378 | # fluent-plugin-google-cloud knows to flatten the field as textPayload
379 | # instead of jsonPayload after extracting 'time', 'severity' and
380 | # 'stream' from the record.
381 | message ${record['log']}
382 | # If 'severity' is not set, assume stderr is ERROR and stdout is INFO.
383 | severity ${record['severity'] || if record['stream'] == 'stderr' then 'ERROR' else 'INFO' end}
384 |
385 | tag ${if record['stream'] == 'stderr' then 'raw.stderr' else 'raw.stdout' end}
386 | remove_keys stream,log
387 |
388 |
389 | # Detect exceptions in the log output and forward them as one log entry.
390 |
391 | @type detect_exceptions
392 |
393 | remove_tag_prefix raw
394 | message message
395 | stream "logging.googleapis.com/local_resource_id"
396 | multiline_flush_interval 5
397 | max_bytes 500000
398 | max_lines 1000
399 |
400 | 2.pods.input.conf: |-
401 | # This configuration file for Fluentd is used
402 | # to watch changes to Kubernetes pod log files live in the
403 | # directory /var/log/pods/NAMESPACE_NAME_UID. The file name
404 | # is used to capture the pod namespace, name and uid. These
405 | # logs are then submitted to Google Cloud Logging with a
406 | # local_resource_id 'k8s_pod..'
407 | # which assumes the installation of the cloud-logging plug-in.
408 |
409 | @type tail
410 | path /var/log/pods/*/*.log
411 | pos_file /var/run/google-fluentd/pos-files/gcp-pods.pos
412 | # Tags at this point are in the format of:
413 | # pods.reform.var.log.pods.__..log
414 | tag pods.reform.*
415 | read_from_head true
416 |
417 | @type none
418 |
419 |
420 |
421 | @type record_reformer
422 | enable_ruby true
423 |
424 | # Extract local_resource_id from tag for 'k8s_pod' monitored
425 | # resource. The format is:
426 | # 'k8s_pod..'.
427 | "logging.googleapis.com/local_resource_id" ${"k8s_pod.#{tag_suffix[5].rpartition('.')[0].split('_')[0]}.#{tag_suffix[5].rpartition('.')[0].split('_')[1]}"}
428 |
429 | # Use the log file name as the tag. Currently only `gvisor` log is supported.
430 | tag ${"#{tag_suffix[5].rpartition('.')[0].rpartition('.')[2]}"}
431 |
432 | 7.system.input.conf: |-
433 | # Example:
434 | # Dec 21 23:17:22 gke-foo-1-1-4b5cbd14-node-4eoj startupscript: Finished running startup script /var/run/google.startup.script
435 |
436 | @type tail
437 | format syslog
438 | path /var/log/startupscript.log
439 | pos_file /var/run/google-fluentd/pos-files/gcp-startupscript.pos
440 | tag startupscript
441 |
442 |
443 | # Example:
444 | # I1118 21:26:53.975789 6 proxier.go:1096] Port "nodePort for kube-system/default-http-backend:http" (:31429/tcp) was open before and is still needed
445 |
446 | @type tail
447 | format multiline
448 | multiline_flush_interval 5s
449 | format_firstline /^\w\d{4}/
450 | format1 /^(?\w)(?