├── .gitignore
├── LICENSE
├── README.md
├── cpu_throttling
└── throttling.yaml
├── crashloop_backoff
└── create_crashloop_backoff.yaml
├── crashpod.v2
├── code
│ ├── app.py
│ ├── dockerfile
│ └── requirements.txt
├── crashloop-cert-app.yaml
└── new-cert.yaml
├── crashpod
├── broken.yaml
├── healthy.yaml
└── interactive_demo.sh
├── deployment_image_change
├── after_image_change.yaml
└── before_image_change.yaml
├── evictions
├── first_pod.yaml
├── pod_with_priority.yaml
└── priority.yaml
├── example_images
├── changetracking.png
├── crashingpod.png
├── deployment-image-change.png
├── driftandnamespace.png
├── failedlivenessprobe.png
├── failedreadinessprobe.png
├── failingjobs.png
├── helm_monitoring_kubewatch.png
├── highcputhrottling.png
├── highoverhead.png
├── imagepullbackoff.png
├── ingress-image-change.png
├── oomkillpod.png
└── pendingpod.png
├── gke_node_allocatable
└── gke_issue.yaml
├── holmes-meme-generator
├── failure-more-pods
│ ├── config.yaml
│ ├── curl.yaml
│ └── deployment.yaml
└── failure
│ ├── config.yaml
│ ├── curl.yaml
│ └── deployment.yaml
├── image_pull_backoff
└── no_such_image.yaml
├── ingress_port_path_change
├── after_port_path_change.yaml
└── before_port_path_change.yaml
├── init_crashloop_backoff
└── create_init_crashloop_backoff.yaml
├── job_failure
└── job_crash.yaml
├── job_run_forever
└── job_run_forever.yaml
├── liveness_probe_fail
└── failing_liveness_probe.yaml
├── memory_pressure_evictions
├── demanding-deploy.yaml
└── demo-deploy.yaml
├── minishop-telemetry
├── .gitignore
├── Dockerfile
├── README.md
├── build.sh
├── manifest.yaml
├── package-lock.json
├── package.json
├── src
│ ├── auth-service.ts
│ ├── backend-service.ts
│ ├── checkout-service.ts
│ ├── dev.ts
│ ├── fraud-service.ts
│ ├── public
│ │ └── js
│ │ │ └── main.js
│ ├── telemetry.ts
│ ├── templates
│ │ └── checkout.html
│ └── util
│ │ ├── callout.ts
│ │ ├── db.ts
│ │ └── trace-handler.ts
└── tsconfig.json
├── namespace_drift
└── example.yaml
├── oomkill
├── oomkill_deployment.yaml
└── oomkill_job.yaml
├── pending_pods
├── pending_pod_node_selector.yaml
└── pending_pod_resources.yaml
├── process_data.py
├── prometheus_rule_failure
└── bad_prometheus_rule.yaml
├── pvc-misconfiguration
├── pvc-fix.yaml
└── redis-deployment.yaml
├── readiness_probe_fail
└── failing_readiness_probe.yaml
├── slow-rds-query
├── Dockerfile
├── app.py
├── build.sh
├── manifest.yaml
└── requirements.txt
└── sock-shop
├── sock-shop.yaml
└── trigger-carts-issue.yaml
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | .vscode/
3 | *.egg/
4 | *.egg-info/
5 | *.pyc
6 | venv/
7 | docs/_build
8 | .DS_Store
9 | *.env
10 | .run
11 | dist/**/*
12 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Robusta
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Introduction
2 | Practice Kubernetes troubleshooting with realistic error scenarios.
3 |
4 | Each scenario is run with `kubectl apply` commands. To cleanup, run `kubectl delete` on the same.
5 |
6 | # Simple Scenarios
7 |
8 |
9 | Crashing Pod (CrashLoopBackoff)
10 |
11 | ```
12 | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/crashpod/broken.yaml
13 | ```
14 |
15 | To get notifications like below, install [Robusta](https://github.com/robusta-dev/robusta):
16 |
17 |
18 |
19 |
20 |
21 | OOMKilled Pod (Out of Memory Kill)
22 |
23 | ```
24 | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/oomkill/oomkill_job.yaml
25 | ```
26 |
27 | To get notifications like below, install [Robusta](https://github.com/robusta-dev/robusta):
28 |
29 |
30 |
31 |
32 |
33 | High CPU Throttling (CPUThrottlingHigh)
34 |
35 | Apply the following YAML and wait **15 minutes**. (CPU throttling is only an issue if it occurs for a meaningful period of time. Less than 15 minutes of throttling typically does not trigger an alert.)
36 |
37 | ```
38 | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/cpu_throttling/throttling.yaml
39 | ```
40 |
41 | To get notifications like below, install [Robusta](https://github.com/robusta-dev/robusta):
42 |
43 |
44 |
45 |
46 |
47 | Pending Pod (Unschedulable due to Node Selectors)
48 |
49 | Apply the following YAML and wait **15 minutes**. (By default, most systems only alert after pods are pending for 15 minutes. This prevents false alarms on autoscaled clusters, where it's OK for pods to be temporarily pending.)
50 |
51 | ```
52 | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/pending_pods/pending_pod_node_selector.yaml
53 | ```
54 |
55 | To get notifications like below, install [Robusta](https://github.com/robusta-dev/robusta):
56 |
57 |
58 |
59 |
60 |
61 |
62 | ImagePullBackOff
63 |
64 | ```
65 | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/image_pull_backoff/no_such_image.yaml
66 | ```
67 |
68 | To get notifications like below, install [Robusta](https://github.com/robusta-dev/robusta):
69 |
70 |
71 |
72 |
73 |
74 |
75 | Liveness Probe Failure
76 |
77 | ```
78 | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/liveness_probe_fail/failing_liveness_probe.yaml
79 | ```
80 |
81 | To get notifications like below, install [Robusta](https://github.com/robusta-dev/robusta):
82 |
83 |
84 |
85 |
86 | Readiness Probe Failure
87 |
88 | ```
89 | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/readiness_probe_fail/failing_readiness_probe.yaml
90 | ```
91 |
92 |
93 |
94 |
95 |
96 | Job Failure
97 | The job will fail after 60 seconds, then attempt to run again. After two attempts, it will fail for good.
98 |
99 | ```
100 | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/job_failure/job_crash.yaml
101 | ```
102 |
103 | To get notifications like below, install [Robusta](https://github.com/robusta-dev/robusta):
104 |
105 |
106 |
107 |
108 |
109 |
110 | Failed Helm Releases
111 | Deliberately deploy a failing Helm release:
112 |
113 | ```shell
114 | helm repo add robusta https://robusta-charts.storage.googleapis.com && helm repo update
115 | helm install kubewatch robusta/kubewatch --set='rbac.create=true,updateStrategy.type=Error' --namespace demo-namespace
116 | ```
117 |
118 | Upgrade the release so it succeeds:
119 | ```shell
120 | helm upgrade kubewatch robusta/kubewatch --set='rbac.create=true' --namespace demo-namespace --create-namespace
121 | ```
122 |
123 | Clean up by removing the release and deleting the namespace:
124 | ```shell
125 | helm del kubewatch --namespace demo-namespace
126 | kubectl delete namespace demo-namespace
127 | ```
128 |
129 | To get notifications like below, install [Robusta](https://github.com/robusta-dev/robusta) and setup [Helm Releases Monitoring](https://docs.robusta.dev/master/playbook-reference/triggers/helm-releases-monitoring.html)
130 |
131 |
132 |
133 |
134 | # Advanced Scenarios
135 |
136 |
137 | Correlate Changes and Errors
138 |
139 | Deploy a healthy pod. Then break it.
140 |
141 | ```shell
142 | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/crashpod/healthy.yaml
143 | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/crashpod/broken.yaml
144 | ```
145 | If someone else made this change, would you be able to immediately pinpoint the change that broke the application?
146 |
147 | To get notifications like below, install [Robusta](https://github.com/robusta-dev/robusta).
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 | Track Deployment Changes
156 |
157 | Create an nginx deployment. Then simulate multiple unexpected changes to this deployment.
158 |
159 | ```shell
160 | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/deployment_image_change/before_image_change.yaml
161 | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/deployment_image_change/after_image_change.yaml
162 | ```
163 |
164 | To get notifications like below, install [Robusta](https://github.com/robusta-dev/robusta) and [setup Kubernetes change tracking](https://docs.robusta.dev/master/tutorials/playbook-track-changes.html)
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 | Track Ingress Changes
173 |
174 | Create an ingress. Then changes its path and secretName to simulate an unexpected ingress modification.
175 |
176 | ```shell
177 | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/ingress_port_path_change/before_port_path_change.yaml
178 | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/ingress_port_path_change/after_port_path_change.yaml
179 | ```
180 |
181 | To get notifications like below, install [Robusta](https://github.com/robusta-dev/robusta) and [setup Kubernetes change tracking](https://docs.robusta.dev/master/tutorials/playbook-track-changes.html)
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 | Drift Detection and Namespace Diff
190 |
191 | Deploy two variants of the same application in different namespaces:
192 |
193 | ```shell
194 | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/namespace_drift/example.yaml
195 | ```
196 |
197 | Can you quickly tell the difference between the `compare1` and `compare2` namespaces? What is the drift between them?
198 |
199 | To do so with Robusta, install [Robusta](https://github.com/robusta-dev/robusta) and enable the UI.
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 | Inefficient GKE Nodes
209 |
210 | On GKE, nodes can reserve more than 50% of CPU for themselves. Users pay for CPU that is unavailable to applications.
211 |
212 | Reproduction:
213 |
214 | 1. Create a default GKE cluster with autopilot disabled. Don't change any other settings.
215 | 2. Deploy the following pod:
216 |
217 | ```
218 | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/gke_node_allocatable/gke_issue.yaml
219 | ```
220 |
221 | 3. Run `kubectl get pods -o wide gke-node-allocatable-issue`
222 |
223 | The pod will be Pending. **A Pod requesting 1 CPU cannot run on an empty node with 2 CPUs!**
224 |
225 | To see problems like this with Robusta, install [Robusta](https://github.com/robusta-dev/robusta) and enable the UI.
226 |
227 |
228 |
229 |
230 |
--------------------------------------------------------------------------------
/cpu_throttling/throttling.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: frontend-service
5 | spec:
6 | containers:
7 | - name: stress
8 | image: jfusterm/stress
9 | imagePullPolicy: IfNotPresent
10 | command:
11 | - "stress"
12 | - "--cpu"
13 | - "100"
14 | resources:
15 | requests:
16 | cpu: 10m
17 | memory: 64Mi
18 | limits:
19 | cpu: 10m # you really shouldn't set this - see https://home.robusta.dev/blog/stop-using-cpu-limits/
20 | memory: 64Mi
21 |
--------------------------------------------------------------------------------
/crashloop_backoff/create_crashloop_backoff.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: inventory-management-api
5 | spec:
6 | containers:
7 | - name: nginx
8 | image: nginx
9 | ports:
10 | - containerPort: 80
11 | command:
12 | - wge
13 | - "-O"
14 | - "/work-dir/index.html"
15 | - https://home.robusta.dev
16 |
--------------------------------------------------------------------------------
/crashpod.v2/code/app.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, request, jsonify
2 | import os
3 | import signal
4 | import threading
5 | import time
6 | from datetime import datetime, timedelta
7 | import logging
8 | from prometheus_flask_instrumentator import Instrumentator
9 | import random
10 | import requests
11 | import datetime
12 | from dateutil.parser import isoparse
13 | from cryptography import x509
14 | from cryptography.hazmat.backends import default_backend
15 |
16 | start_time = datetime.datetime.now()
17 |
18 | PAGE_CONTENT = """
19 |
20 |
21 |
22 |
23 |
24 | Welcome to Flask App
25 |
63 |
64 |
65 |
66 |
Welcome to Flask App
67 |
Your application is running successfully!
68 |
Explore the features and enjoy your stay.
69 |
API Documentation
70 |
71 |
72 |
73 | """
74 |
75 | app = Flask(__name__)
76 | Instrumentator().instrument(app).expose(app)
77 |
78 | CONFIG_PATH = '/config/crash_counter.txt'
79 | CERT_PATH = '/certs/certificate.pem'
80 | KEY_PATH = '/certs/key.pem'
81 |
82 | def get_certificate_expiration():
83 | if not os.path.exists(CERT_PATH):
84 | raise FileNotFoundError(f"Certificate file not found: {CERT_PATH}")
85 |
86 | with open(CERT_PATH, 'rb') as cert_file:
87 | cert_data = cert_file.read()
88 |
89 | try:
90 | cert = x509.load_pem_x509_certificate(cert_data, default_backend())
91 | expiration_date = cert.not_valid_after
92 | return expiration_date
93 | except Exception as e:
94 | raise ValueError(f"Error loading certificate: {e}")
95 |
96 |
97 | def latency_test_thread():
98 | while True:
99 | latency_test()
100 | sleep_time = random.uniform(0.5, 10.0) # Random sleep time between 0.5 and 2 seconds
101 | time.sleep(sleep_time)
102 |
103 | def latency_test():
104 | try:
105 | url = "http://localhost:5000/"
106 | response = requests.get(url)
107 | except requests.exceptions.RequestException as e:
108 | logging.exception(f"An error occurred: {e}")
109 |
110 | class ExpiredCertException(Exception):
111 | pass
112 |
113 | def read_file(file_path):
114 | try:
115 | with open(file_path, 'r') as f:
116 | return f.read().strip()
117 | except Exception as e:
118 | return ""
119 |
120 | def write_file(file_path, content):
121 | dir_path = os.path.dirname(file_path)
122 | if not os.path.exists(dir_path):
123 | os.makedirs(dir_path)
124 | with open(file_path, 'w') as f:
125 | f.write(content)
126 |
127 | def check_certificate_expiry():
128 | time.sleep(2)
129 | logging.info(f"check_certificate_expiry thread started")
130 | while True:
131 | time.sleep(60)
132 | try:
133 | cert_expiry_str = read_file(CERT_PATH)
134 | if not cert_expiry_str:
135 | raise Exception("Null certificate")
136 | logging.info(f"Validating cert")
137 |
138 | cert_expiry = get_certificate_expiration()
139 | if datetime.datetime.now() > cert_expiry:
140 | logging.warning("Certificate has expired. Update the ssl certificate using the '/update_certificate' API or update the config map.")
141 | raise ExpiredCertException(f"Certificate expired on {cert_expiry}")
142 | else:
143 | logging.debug(f"Cert good until {cert_expiry}")
144 |
145 | except ExpiredCertException:
146 | logging.exception("SSL certificate expired")
147 | os._exit(1)
148 | except:
149 | logging.exception("check_certificate_expiry failed")
150 |
151 |
152 | @app.route('/')
153 | def home():
154 | return PAGE_CONTENT
155 |
156 | @app.route('/update_certificate', methods=['POST'])
157 | def update_certificate():
158 | new_certificate = request.json.get('certificate')
159 | duration = request.json.get('duration', 1440) # Default to 1 day (1440 minutes)
160 | duration = min(max(duration, 1), 10080) # Ensure duration is between 1 minute and 1 week
161 |
162 | write_file(CERT_PATH, new_certificate)
163 | return jsonify({"message": "Certificate updated successfully"}), 200
164 |
165 | if __name__ == '__main__':
166 | logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
167 | current_certificate = read_file(CERT_PATH)
168 |
169 | threading.Thread(target=check_certificate_expiry, daemon=True).start()
170 | threading.Thread(target=latency_test_thread, daemon=True).start()
171 | app.run(host='0.0.0.0', port=5000)
172 |
--------------------------------------------------------------------------------
/crashpod.v2/code/dockerfile:
--------------------------------------------------------------------------------
1 | # Use an official Python runtime as a parent image
2 | FROM python:3.9-slim
3 |
4 | # Set the working directory in the container
5 | WORKDIR /app
6 |
7 | # Copy the current directory contents into the container at /app
8 | COPY . /app
9 |
10 | # Install any needed packages specified in requirements.txt
11 | RUN pip install --no-cache-dir -r requirements.txt
12 |
13 | # Make port 5000 available to the world outside this container
14 | EXPOSE 5000
15 |
16 | # Run app.py when the container launches
17 | CMD ["python", "app.py"]
18 |
--------------------------------------------------------------------------------
/crashpod.v2/code/requirements.txt:
--------------------------------------------------------------------------------
1 | Flask==1.1.4
2 | Werkzeug
3 | markupsafe==2.0.1
4 | prometheus-flask-instrumentator
5 | requests
6 | python-dateutil
7 | cryptography==39.0.1
--------------------------------------------------------------------------------
/crashpod.v2/crashloop-cert-app.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: db-certs-config
5 | data:
6 | certificate.pem: |-
7 | -----BEGIN CERTIFICATE-----
8 | MIIFazCCA1OgAwIBAgIUAxlaoUqIXtwMdnsh4S4wj0ME5WIwDQYJKoZIhvcNAQEL
9 | BQAwRTELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM
10 | GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDAeFw0yNDA4MDIxNzAwMDhaFw0yNDA4
11 | MDMxNzAwMDhaMEUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEw
12 | HwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQwggIiMA0GCSqGSIb3DQEB
13 | AQUAA4ICDwAwggIKAoICAQDBDoWaoNwcybbgVvRXEK2reUd0TdICoOGtYykyGmox
14 | E18q4n3X3dEV1to6Wb2FZDz8rIo0jCB0hiqo66yLW8tD6QRKhuL8iPKN6Ng2mv9E
15 | VMb58RyfwTIqgMYNaDAajfS3Ri65A+ZshFVWjQKthtUvOnh/eWYlX5f3v+uovGmY
16 | 2QvOA5B3CGy76uS8CJIyaOJLSCwQLHDp4AVQyZHo4e25paM7QGZU6iPPQB009ggR
17 | F96F6L9rH4XiXgHcyVE7AZWiKvCxl8EWu+yHkKLyCjL9K4qbXHr2+eTSR83YCROw
18 | 38CGJ8Z6K+rsIH7XpuOXm5+4lCUyXZjUGdSh2fRJbG7FB1FdDAd/Gf54Z1968vYh
19 | qjf0CQr6ll2YFnLQmSknF5EyD0SsOm53hI5Da2CpU0s35Uew5oscLD29zRyCd8qC
20 | E5koJCWZ0ACboCaWMkm5UaXDcMDvV+GcGyXsd1hapcml9XUYH7zXQ5pmb9k3JYJD
21 | 8ItLt2YHMq4N49cL2pWACzRrDSjRcHCssRuTZYiahF7E3q4ZwKcEkeyEhFBB7MuW
22 | LiWhdmSo2ii9fg2iyU3nZnLNfGlc1Fs6nhsUasfFmLZYhc+T+3ZVEBVi2kSds6Lc
23 | 7Aclwrcpx6kWycTaNeV7Nb9keAXWixt7GGXjd9JCGmyefmeDZCBdvUf1pHlIzZmp
24 | 8QIDAQABo1MwUTAdBgNVHQ4EFgQUCMojvEuoHG0KqTHXlXwRYfxxie0wHwYDVR0j
25 | BBgwFoAUCMojvEuoHG0KqTHXlXwRYfxxie0wDwYDVR0TAQH/BAUwAwEB/zANBgkq
26 | hkiG9w0BAQsFAAOCAgEAnMkLAH/06bg3akz9UOufEcicZGKmg1Bl/xgtsTEXorfI
27 | MVYiTieU5H2fAGIuzk+xKWP9tu6y/UJPw1jh9j6uQ160SXhYSGbvRbbqsnXT2NN4
28 | 25VQVRWJ7Q23djZTuD+6SNB7coUf1D93IFgzFR8vEIRcLS1HhKA7f1FYDXP56Wjn
29 | yqj31Z+Rq668qPc+HPS52TOJDy/e2LusKIVMAUbIUomdeteaoPWq95w4m+U6kn/q
30 | P9UQRK4yOD8vlgExCUySJzT/KeggWIdorEfoNb9g7YsGurFHjTW079/sYPYYqWr+
31 | 1bqzfWTBnAUuHtlw5U65d8MMvmajhaB3xRBpr0aubufhQL5wKkvUF8JHAkPm0zSa
32 | dFRPh1yPwach2nMpknN7RwK/29SGhK7eQMCbr3GMPc9hCpNdaDJR0rxkng0KsNRx
33 | JpAzBxhQ3KblCWDm4w9KpPjwJaYV3sqrR8Uvm+rpS+OCcwMPpuIy7dbsJCPQy6KT
34 | Dz32B5ALt08vyTcLQmCuksuoviLNLJUovSr52kDLl1LoUZyXlRTxLZM8V07c7wiX
35 | Qdx5youUmJws8cGM2I8xnzhs94RO7HQ9QsJz5MPggMD2EB5dQVYYZxLLLdCWLCCp
36 | Yb+A6eku3FU3frOXCF8KlSKrRqTGMUQwHZWf1CSQsjOcAy/xVRaydLq5D4bL294=
37 | -----END CERTIFICATE-----
38 | key.pem: |-
39 | -----BEGIN PRIVATE KEY-----
40 | MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQDBDoWaoNwcybbg
41 | VvRXEK2reUd0TdICoOGtYykyGmoxE18q4n3X3dEV1to6Wb2FZDz8rIo0jCB0hiqo
42 | 66yLW8tD6QRKhuL8iPKN6Ng2mv9EVMb58RyfwTIqgMYNaDAajfS3Ri65A+ZshFVW
43 | jQKthtUvOnh/eWYlX5f3v+uovGmY2QvOA5B3CGy76uS8CJIyaOJLSCwQLHDp4AVQ
44 | yZHo4e25paM7QGZU6iPPQB009ggRF96F6L9rH4XiXgHcyVE7AZWiKvCxl8EWu+yH
45 | kKLyCjL9K4qbXHr2+eTSR83YCROw38CGJ8Z6K+rsIH7XpuOXm5+4lCUyXZjUGdSh
46 | 2fRJbG7FB1FdDAd/Gf54Z1968vYhqjf0CQr6ll2YFnLQmSknF5EyD0SsOm53hI5D
47 | a2CpU0s35Uew5oscLD29zRyCd8qCE5koJCWZ0ACboCaWMkm5UaXDcMDvV+GcGyXs
48 | d1hapcml9XUYH7zXQ5pmb9k3JYJD8ItLt2YHMq4N49cL2pWACzRrDSjRcHCssRuT
49 | ZYiahF7E3q4ZwKcEkeyEhFBB7MuWLiWhdmSo2ii9fg2iyU3nZnLNfGlc1Fs6nhsU
50 | asfFmLZYhc+T+3ZVEBVi2kSds6Lc7Aclwrcpx6kWycTaNeV7Nb9keAXWixt7GGXj
51 | d9JCGmyefmeDZCBdvUf1pHlIzZmp8QIDAQABAoICAA43jc64lGUZVLqaJLXJsGRt
52 | kr3FolbXtq7gW88JA3N4P/ymP8q3FtfzIdCEvdB2NIiOA0KB5Dz4iWk1q9f6a1Jl
53 | tU9wtgQPaL9bG5sP2IcVcC7FWwyHu3RGVe90UgCOniwmArhUyXIJ38f3K/CP+qCW
54 | /3sxuzw62TPhQg9Dysn7B+PEvu2JJQrErlILloqYTzvohXgucaVDfli382MRSkKu
55 | 6EGriS2tLXRsfgohqS/rEFPPs1MF+APW1O/Hso/0Gqsd2upVKN7N+Yn1NI67AgzD
56 | WVp+Gb8Y4Yk7yPSlHHpcy+6FPNhP5iW78eNbMqz+RKmg3Ulje6b4/P/rT1Jd5h1C
57 | 4NX60YZTqCzDn78FRIYScun+ToBS6K+HhblsCzNKsOx1nn7PNS6+S/LmRDhvkxOg
58 | Lp3zVJUNPp7MSQeFdDACysz5WzgVpn3/9lxQeo2AlL5HHz4V5BKHH4S0vFqS8HAt
59 | DOmOHbzhdsAa1VAS/70Tl3Fqw3lWmDYiH9HPlnM+1upHuZfnHPga9VL98Hotk2Sr
60 | zyh8OmpqpVGs+z07MKyQOueUt4clfY6Kd5f2/BeL3IHoFhEf5PPPGYYVtkCIp1Hg
61 | Avrztx5ds9SUJ79vfoPfS7VOXf8NjAKCrXm2MSHCnvWxKNro/3nEP6YWl2JsfTM4
62 | 4+OjNIGRRdq74KVFBegRAoIBAQD7u97/IDpWuP/AE5QlPk6ocn47TML8JYJZga3T
63 | BhgnMjYcIISP1wcPWsZmejJGBg2switzBAxz1Nu5fco9fJtXTCFO62r7SziSQhz9
64 | 0Xjr36LK6s94Dnh8pIhUuGxzk38rLZXyIaC8TcIgEW9anAkm3sQZEJ9rhLgBkoqo
65 | Zk1SOuGOKxvJCUepO2tyupX5L9bPVOtn/N8kw3eytU6cknY3lSFTDL/45f4ou7eR
66 | OoLRBMLdbvBduPYS90juCEpxGxa6Bgv2LwPQHFSbP2I76daEnT2OC65++BZ+gs7g
67 | v2EMOWsSnKrugzQmKvHGVYAexbmjgSgFGjN5PqIUu7G780gZAoIBAQDEVBWWOAhS
68 | 2Jqtm6vEBaWu01+zEAQnvOv3/f0PK8eJBKFZgPwDps+ohgbJnjkAigSIeK2D1i6C
69 | BJpC1nBGhQqpAM18HhzowTvv8ZZElrMLH418hyU2GsB4kFT29CWOyNzw+D43CE9U
70 | LAek2lZUbISUeHQ/KqJEjZahwZjsrxFXzeOwTSzDzVUKxSxFUK7+0Fc4gXcMU8+F
71 | BG7koDQmNGP7obyCEqfcdnnl/QNxQNzSbT6w9hboHaYZvgIiyDGTUY1JGKkYkb76
72 | tui4BXPyJjHzAYn/NnWKK4PHO+xz2x1lzv+HkXP4cf4NievAw+a1e35N7nUywHpz
73 | zleKaoejPouZAoIBAQCYX1aSjPpjBeXgaLlyda187piT4lnf8ABfl0wVAIpcJ1tz
74 | WQeEXX6rf6G549tMYVujp4BPRTMYZc5+D7L71tHcXHuI3pzJ8PUUKqqLxGByPdK3
75 | Crx8Ch5KK/lNdrhGmRAPna82eb5mXJbVfFgvu2UavxpppZMprJMIeKAvYIUK8hMX
76 | T+aooFJdEFdwgZpH0/otU1SOhBS5lwt8A5zKzyh5am5aa8xDp0tZco6hfI40YLJn
77 | AEDEKjHgtwfQ9PnCTshJ32EcKccgEBXyRsJ6S4eKi/b3ZQDBX/+Voi4BAjaHBLZX
78 | 9LbOWmKVe8cO6y/68Gn2mzYTKJHhoNTQwe/KOW75AoIBAHYT/Ee0u4zYKW9wyQXE
79 | P7m99zjC0Tz1Kxaj86dENWX7Gu+x2+B2Wz6IHVz1hGMGn2NU8UbxcldQXVcFkdFY
80 | AulSSkPz+H8keN/+9NmbqWNvPrE+tuqrfEcGx5Mfk9ckh32GUCTZIp0FPjOv/WwZ
81 | OEPzjjbPdXi1j3sJ7OetA0lD00UdpWa9///SRqYJuLYYp9ycQPVxEsZ1vBxVHgg8
82 | SkX1KGMyxTKbLwVb/s4GrMkCW0ojN/OyoUuqM66nqOlpiwgDhO/5+Frh12WWIf3p
83 | avE0Y8LTBi5rrX4OVE/54Nxyes0WdiFereWGpfvx1rgf5f8JBAPaL8umto/RMnLE
84 | E0ECggEBAIn97w/dd1Z6Tjcjz7HxHXST+LlimszFSvx40gZ8VfL+qDKAEYT3MM7M
85 | oluvgTJMBydWgcUzwlJ48UhQCnrNA4Y+rwWXjYpXXBnMd6xT4dP7iDSqHWf9zX5a
86 | JWdjVPpDtnfdKFZy2cTEUykt+zyzgjD4DN8XXHQtZnfeYjcos0vvg1bb/4mwtpjY
87 | MoinuTwgF+inVT2Bdm6xVvQDCCFzOhn/NAFDQWWkQHW9n5IZCqIg591Wri2sDkNi
88 | 3kKaLUtGP/qobqHNGMIemuxDYXjmoINzg0jKOeqOvJgVYHPj9gNoNtLDfYSjcnz4
89 | admqkDkna+swqOW5BXU8Th65/KGiUZc=
90 | -----END PRIVATE KEY-----
91 | ---
92 | apiVersion: apps/v1
93 | kind: Deployment
94 | metadata:
95 | name: db-certs-authenticator
96 | labels:
97 | app: flask
98 | spec:
99 | replicas: 1
100 | selector:
101 | matchLabels:
102 | app: flask
103 | template:
104 | metadata:
105 | labels:
106 | app: flask
107 | spec:
108 | containers:
109 | - name: flask
110 | image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/flask-app:latest
111 | ports:
112 | - containerPort: 5000
113 | volumeMounts:
114 | - name: cert-volume
115 | mountPath: /certs/certificate.pem
116 | subPath: certificate.pem
117 | - name: key-volume
118 | mountPath: /certs/key.pem
119 | subPath: key.pem
120 | - name: writable-certs
121 | mountPath: /certs
122 | volumes:
123 | - name: cert-volume
124 | configMap:
125 | name: db-certs-config
126 | items:
127 | - key: certificate.pem
128 | path: certificate.pem
129 | - name: key-volume
130 | configMap:
131 | name: db-certs-config
132 | items:
133 | - key: key.pem
134 | path: key.pem
135 | - name: writable-certs
136 | emptyDir: {}
137 | ---
138 | apiVersion: v1
139 | kind: Service
140 | metadata:
141 | name: flask-service
142 | labels:
143 | app: flask
144 | spec:
145 | selector:
146 | app: flask
147 | ports:
148 | - protocol: TCP
149 | name: http
150 | port: 80
151 | targetPort: 5000
152 |
--------------------------------------------------------------------------------
/crashpod.v2/new-cert.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: db-certs-config
5 | data:
6 | certificate.pem: |-
7 | -----BEGIN CERTIFICATE-----
8 | MIIFazCCA1OgAwIBAgIUL72qRpggT3Ej4461BBnm+xAtKKkwDQYJKoZIhvcNAQEL
9 | BQAwRTELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM
10 | GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDAeFw0yNDA4MDYwODAxMjJaFw0yNzA1
11 | MDMwODAxMjJaMEUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEw
12 | HwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQwggIiMA0GCSqGSIb3DQEB
13 | AQUAA4ICDwAwggIKAoICAQDWG8wy1Bx6bW/rEwxgqPLccSiW8DfT9eFpoS/8+EgU
14 | 89wbfmrPKXZNUqRKaPfmL05PoHbXTz1miV82IyBtIoexixsXNWpkwnE+P9aBJDMh
15 | xwVr9y+J0g2s/q0gTBtekX2himC5UoZD3SZTFBNSkb1HBPutgpBW9p9NnjWxtWtW
16 | SQQOlxESysDpjlGGJ78FKTeeJiJni/0Mqx5wcfCBvxpu4ZHZpMl7lus1ea+DkhnT
17 | GukWJaNvQ0rEvZ4SS8iNz3gT+GCMChYBqzHKSrdDcrettCh85gt+zwF1dONX+E/0
18 | Co6pZtQKs0UE8DkYA+EgLqFS+fv2aASEiwFgpIY5hB72oOph6GcXm6J84OOlNMF/
19 | /uOLxGi7Q06t5653ZhjeI5mmWliTQWaYVa889imz7+Zi7GWFNZBH+n3o9wiEdz8O
20 | UCqyNxlLjsrGXtQRlMGXiis+ZlavOXHVaKZmwFDF1MxY54toooPpTjSaASNDH+JJ
21 | Jq/NHAgRpqTg+YbnFbPokyuHcvFLfsReBY/12geSrPRboXO2uOonCDljyJ19EQ5d
22 | 4KM+ysqjtsS419lDBfDmBkl4456RQTgf+K6J+yVJYPEzZExe+waDQrg9XxRDwDES
23 | D/rfCryWT8oYwhrIWJjbCYmDsvcJljXJkSOUb0Yo+PEXcu0EWrXYsojvs6Dkl694
24 | MwIDAQABo1MwUTAdBgNVHQ4EFgQUgPrkSFk+pEHyUqbJMtMlkkEYFZMwHwYDVR0j
25 | BBgwFoAUgPrkSFk+pEHyUqbJMtMlkkEYFZMwDwYDVR0TAQH/BAUwAwEB/zANBgkq
26 | hkiG9w0BAQsFAAOCAgEAlUM0P766Wxwm6ZHXWsgjiICXwI5WyoPeixfFNWQJssnm
27 | JU3Ftio25J1XLvVHZsqNNlsE0c+5dyNN/KKVzQ8eMZW/4/54aaTNRe8mUK4tLtwS
28 | 3jFpBH5GACajPpyEp6M38QyRXtTFWQ0PtnWKcb6IBU2ZsfWmW3+qEUxpPEQgbtDE
29 | LKHP374NSsev9An35A+2qhIwPv1fPLcMOyPNaZqv1IatFmi1ElOIcIfdXvLUfLwA
30 | PgV6yf424XBAk+izAxoDWgiw9l4E8mj0M8JTe5CeFulBk2sZngbqmzcL/GJsPvht
31 | f/4ZWv0RuEm1jRy2yUADNWkxIB1gZqHptLWxUPiaf+YfgWXuZR94tT6z2LTXZAvw
32 | p/VsIqYGqYFHmVm6x7JLGClCAfz/ypTvQgnYcfY1dGFjY6Wexhp0UcHjKee5JQfd
33 | BnM+1yeksI4kbZIvi22D2uRDA49/Zopbaqg9IzUKuOtGGD+u8LKdVnS28j2XF2/s
34 | 2Mi1ieeXz7K04MtGjv+Sg60N86AN3p72f44MB3fm9/d9ybopTB4VzgzR5NKXzgDp
35 | TqpL2r0SGSJNfQUDYMerAEa7ULNKjXaiipl6clhDtC5sclEC2NyaIQPg9P2lTclP
36 | xH2/9xeP4aa3KJn2izKqqRxJDA6roGXrtH5heD3EvJleyhlpG4o72KgcJmapt30=
37 | -----END CERTIFICATE-----
38 |
39 | key.pem: |-
40 | ----BEGIN PRIVATE KEY-----
41 | MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQDWG8wy1Bx6bW/r
42 | EwxgqPLccSiW8DfT9eFpoS/8+EgU89wbfmrPKXZNUqRKaPfmL05PoHbXTz1miV82
43 | IyBtIoexixsXNWpkwnE+P9aBJDMhxwVr9y+J0g2s/q0gTBtekX2himC5UoZD3SZT
44 | FBNSkb1HBPutgpBW9p9NnjWxtWtWSQQOlxESysDpjlGGJ78FKTeeJiJni/0Mqx5w
45 | cfCBvxpu4ZHZpMl7lus1ea+DkhnTGukWJaNvQ0rEvZ4SS8iNz3gT+GCMChYBqzHK
46 | SrdDcrettCh85gt+zwF1dONX+E/0Co6pZtQKs0UE8DkYA+EgLqFS+fv2aASEiwFg
47 | pIY5hB72oOph6GcXm6J84OOlNMF//uOLxGi7Q06t5653ZhjeI5mmWliTQWaYVa88
48 | 9imz7+Zi7GWFNZBH+n3o9wiEdz8OUCqyNxlLjsrGXtQRlMGXiis+ZlavOXHVaKZm
49 | wFDF1MxY54toooPpTjSaASNDH+JJJq/NHAgRpqTg+YbnFbPokyuHcvFLfsReBY/1
50 | 2geSrPRboXO2uOonCDljyJ19EQ5d4KM+ysqjtsS419lDBfDmBkl4456RQTgf+K6J
51 | +yVJYPEzZExe+waDQrg9XxRDwDESD/rfCryWT8oYwhrIWJjbCYmDsvcJljXJkSOU
52 | b0Yo+PEXcu0EWrXYsojvs6Dkl694MwIDAQABAoICAGE3O+90Sf34xNFq07rr22Hm
53 | a3SlW+P9B72Ld5uYLDp/Q1ikfODVRab0OC6lHLJCln8nL+6xRwLQqw+MwYNsI0ql
54 | C8bs9QMNGpgWKlJ22lSi5j5QfSBJ5PXAqWE5zU+SJgX8K9DZvKcBug4M1n0uOVAO
55 | 3iu5Vcp2rnxyspSe+D60ZogtZY4vy4oAmdmdIGm6fktrq0Fy2ZIv/GE4CukOmXTx
56 | LOUyol1geCnOt7e41cr5tA3fRycU2dUc97H9APIK2s3OgB/10Govjir4499XLjlL
57 | zPpxBZUtbs8jJLrjv4HibUZTr0Gwhk0oufPChyyyn4NqiFIohqyb5SM5cMyaxeeG
58 | belfhdJrwuGh79mLYTb1SZsW7JYL4cqzBaRYS+n6o17V7woFMvLFVzIf+pqksHsi
59 | qWLKKkhS5A9bHQkwOFges5StkVQqVbZKpDNewXdwk7y0oJO1vittd4d/FjwISmAt
60 | gqZTp2r4JYg2wL0jt1hJNB8WXRAz+sYpG3UeZQ5CASWntfJb6zmzfqCFN+aq6mUo
61 | wh7g/37kuGAXbwG2MWdgis7DEgPrEoAm0SlzhB+1vp2eA0vDtcRMj3wquAEPYlwD
62 | F/DwmXsXBpU2dFyCGuw50rzAKktn/KT5x2H3rwPB6G8RL0e2mMFyRxZFHsp3ioeH
63 | F+MORC58Ec3LjCmXn7JBAoIBAQD+gmxGN1YHGwut3ufxKiTMsImV0J/t5M68DUFm
64 | MkJNxWU9MpCQMbjcKsonFIqqA086VmYYJD6BIOnMuvCfGRK0ICBa1Wry4Zxui7uw
65 | dj6YtgL2PEQ4VBW5oir96yZjHTjf56L6mcluxLppalmRqp29aE3B9Jkh0hQ7QjXE
66 | +K9+89GvnD0GANTY3AzVNoRs4EYpfvc4R3rH3VIyEmubeHKTSIlKi1wLyqoRg1AU
67 | JihytY8ohosV2vAZdQwjN4ev6jZ2SwSsgbnePKMlJxw7f3U58lzGPw37RAyXx5R0
68 | KsI2Vg8jqBLrSvHmZ529F8wgmj0B3pE1qqFvFrcfJdpX7gahAoIBAQDXXM2XY1lV
69 | GK78273mD7E9OTVDHXrHtL9XQuSUyExFEl0zOZL9E6piCj9IsEtJa/pvlD2iOecs
70 | H0Axp74/FDKOfn3+qPKrboOJNtQJx3G8mXc1YpperNDDhsjX8yidW6v8Fl2d9Fbr
71 | LQH1krLah9mEycmksoS8GRgFSh6DxYPhtuQvpc3Ja7GV3P1QjdDB7Fq/UwOylNKr
72 | rhMEjsrEQCjMCDzjQKx6L3sD4zvrBmi8EcNxJtK9kVaiZP2juWIia4ljJPXabQuH
73 | NRlCE709whoOIA8ZizBwdz327XCWBbZ3pWKiZ3c/2UiHlhjZJMYb1q0kMYpLSnEM
74 | OsIup9oZjBJTAoIBACqFFZvBgPDc9t2MgQmMdetwjvvlh+KRuGw4drWhLeyaTuij
75 | ev1sISs/RtiymNwAg4sGhjYAUCT4n7bhgHPEF5jaSS2DvENpp93cIHZBuwF+Xw5O
76 | UUNr4NlLm40ibR5f8nGN3G+ro7iKy23zI5WvgHz1YRBHUhIRa+nejOckeQVnrJ9/
77 | Jb7MAsNcOBSi8ArUiURDfv/7CSs88Y+F/k5SGCo9ExrOH4mkyTb1zjofieBoLsuh
78 | drknA7ftEyMLEvMz2Pd4XwaHAct8K/F4EEJsb+RHdnJ0dFyHKOduo4Go5xt20z6s
79 | xMRkl/dbVkISKLtFqW2XhoEFycSqcduVUl4gWSECggEBAJHIPVdMJi2HbvS1nv98
80 | AeNKtXZAH1vOS3O7UVYxnAdpnfNN9uUficeh3iLLS7tgv38GXw5Qmd7xLJgwT4UN
81 | nAd/MrTPEq3fOmUI21HfU2tyixJ1M8AAxbSFqbEc5ZJXer1DOo3w12GM7vWYHz/0
82 | k9bibX/yZA2cAvmZqbxTWKnTDvX+C6ZcIzMqC8OarASJhQhcHnWlblJOnIsKSdG9
83 | CKDMtsKyhc6UE3aXPHkktoMQlzciK6etGXMGX/eTRw27s52MaGw6QoJ8/CMRj5md
84 | qduD8hS9dsW6whRDuGGlblXhezoqRbvPztwRS7K5xO0VTdDhRBchRhMcN1QzxVxj
85 | yAcCggEBAO2cszxCCbxwSBF9XfKw2GiiaSKoPZIop10pS647Qe4PMGLBZ7t2HrIX
86 | 1IowcO6jxdpc2LCGKZbxunIjM/VA8KDhqPhdPatjFn3Ie3KSAunGS/0NnQvipFLh
87 | +W1i0GCQC8+ALklgnRG0nSlE84W6u7ebBmg9vKGzfbkX/S1+PyNcYX9L9+wbCPK3
88 | gFkyCdVDG8PTKJYr0WLY9jEILlcbkYZEe6gWEqDzrp4CDdAwemig85Q343RtmmAy
89 | NaB3n7PTNwSMNNxQ/p2d5n/jMDwQ+kjkfrBNMF5rBNMPhsXN8t+pxDvEphbD7/c2
90 | rxYMX6jST8KQpOe8s16s1I0KouILlRw=
91 | -----END PRIVATE KEY-----
--------------------------------------------------------------------------------
/crashpod/broken.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: payment-processing-worker
5 | spec:
6 | replicas: 1
7 | selector:
8 | matchLabels:
9 | app: payment-processing-worker
10 | template:
11 | metadata:
12 | labels:
13 | app: payment-processing-worker
14 | spec:
15 | containers:
16 | - name: payment-processing-container
17 | image: bash
18 | command: ["/bin/sh"]
19 | args: ["-c", "if [[ -z \"${DEPLOY_ENV}\" ]]; then echo Environment variable DEPLOY_ENV is undefined ; else while true; do echo hello; sleep 10;done; fi"]
20 |
--------------------------------------------------------------------------------
/crashpod/healthy.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: payment-processing-worker
5 | spec:
6 | replicas: 1
7 | selector:
8 | matchLabels:
9 | app: payment-processing-worker
10 | template:
11 | metadata:
12 | labels:
13 | app: payment-processing-worker
14 | spec:
15 | containers:
16 | - name: payment-processing-container
17 | image: bash
18 | env:
19 | - name: DEPLOY_ENV
20 | value: "Deployment.1.13"
21 | command: ["/bin/sh"]
22 | args: ["-c", "if [[ -z \"${DEPLOY_ENV}\" ]]; then echo Environment variable DEPLOY_ENV is undefined ; else while true; do echo hello; sleep 10;done; fi"]
23 |
--------------------------------------------------------------------------------
/crashpod/interactive_demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | echo "Deploying a healthy Deployment"
3 | kubectl apply -f ./healthy.yaml
4 |
5 | read -p "Press enter to break the Deployment..."
6 | kubectl apply -f ./broken.yaml
7 |
8 | echo "\nWaiting 60 seconds"
9 | sleep 60
10 | echo "Done waiting. Check your Slack channel and the Robusta UI"
11 |
12 | read -p "Press enter to cleanup..."
13 | kubectl delete deployment payment-processing-worker
14 |
--------------------------------------------------------------------------------
/deployment_image_change/after_image_change.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: prod-endpoint
5 | spec:
6 | replicas: 1
7 | selector:
8 | matchLabels:
9 | app: nginx
10 | strategy:
11 | type: RollingUpdate
12 | rollingUpdate:
13 | maxSurge: 2
14 | maxUnavailable: 1
15 | template:
16 | metadata:
17 | labels:
18 | app: nginx
19 | spec:
20 | containers:
21 | - name: nginx
22 | image: nginx:1.21.0
23 | readinessProbe:
24 | httpGet:
25 | path: /
26 | port: 80
27 | initialDelaySeconds: 10
28 | periodSeconds: 5
29 |
--------------------------------------------------------------------------------
/deployment_image_change/before_image_change.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: prod-endpoint
5 | labels:
6 | app: nginx
7 | spec:
8 | replicas: 1
9 | selector:
10 | matchLabels:
11 | app: nginx
12 | strategy:
13 | type: RollingUpdate
14 | rollingUpdate:
15 | maxSurge: 1
16 | maxUnavailable: 0
17 | template:
18 | metadata:
19 | labels:
20 | app: nginx
21 | spec:
22 | containers:
23 | - name: prod-nginx
24 | image: nginx:1.19.0
25 | ports:
26 | - containerPort: 80
27 | readinessProbe:
28 | httpGet:
29 | path: /
30 | port: 80
31 | initialDelaySeconds: 5
32 | periodSeconds: 10
33 | successThreshold: 1
34 |
--------------------------------------------------------------------------------
/evictions/first_pod.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | labels:
5 | app: analytics-worker
6 | name: analytics-worker
7 | spec:
8 | containers:
9 | - image: busybox:1.28
10 | name: analytics-worker-container
11 | resources:
12 | requests:
13 | cpu: "600m"
14 | command:
15 | - sleep
16 | - "1000"
17 | dnsPolicy: ClusterFirst
18 | restartPolicy: Always
19 | tolerations:
20 | - key: "workload-type"
21 | operator: "Equal"
22 | value: "analytics"
23 | effect: "NoSchedule"
24 |
--------------------------------------------------------------------------------
/evictions/pod_with_priority.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | labels:
5 | app: payment-processor
6 | name: payment-processor
7 | spec:
8 | containers:
9 | - image: busybox:1.28
10 | name: payment-processor-container
11 | resources:
12 | requests:
13 | cpu: "600m"
14 | command:
15 | - sleep
16 | - "1000"
17 | dnsPolicy: ClusterFirst
18 | restartPolicy: Always
19 | tolerations:
20 | - key: "workload-type"
21 | operator: "Equal"
22 | value: "critical"
23 | effect: "NoSchedule"
24 | priorityClassName: high-priority
25 |
--------------------------------------------------------------------------------
/evictions/priority.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: scheduling.k8s.io/v1
2 | kind: PriorityClass
3 | metadata:
4 | name: high-priority
5 | value: 1000000
6 | globalDefault: false
7 | description: "Use for critical service pods requiring prioritized scheduling."
8 |
--------------------------------------------------------------------------------
/example_images/changetracking.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/9fe386f8283675594f4767af562da2cbbd54ce39/example_images/changetracking.png
--------------------------------------------------------------------------------
/example_images/crashingpod.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/9fe386f8283675594f4767af562da2cbbd54ce39/example_images/crashingpod.png
--------------------------------------------------------------------------------
/example_images/deployment-image-change.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/9fe386f8283675594f4767af562da2cbbd54ce39/example_images/deployment-image-change.png
--------------------------------------------------------------------------------
/example_images/driftandnamespace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/9fe386f8283675594f4767af562da2cbbd54ce39/example_images/driftandnamespace.png
--------------------------------------------------------------------------------
/example_images/failedlivenessprobe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/9fe386f8283675594f4767af562da2cbbd54ce39/example_images/failedlivenessprobe.png
--------------------------------------------------------------------------------
/example_images/failedreadinessprobe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/9fe386f8283675594f4767af562da2cbbd54ce39/example_images/failedreadinessprobe.png
--------------------------------------------------------------------------------
/example_images/failingjobs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/9fe386f8283675594f4767af562da2cbbd54ce39/example_images/failingjobs.png
--------------------------------------------------------------------------------
/example_images/helm_monitoring_kubewatch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/9fe386f8283675594f4767af562da2cbbd54ce39/example_images/helm_monitoring_kubewatch.png
--------------------------------------------------------------------------------
/example_images/highcputhrottling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/9fe386f8283675594f4767af562da2cbbd54ce39/example_images/highcputhrottling.png
--------------------------------------------------------------------------------
/example_images/highoverhead.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/9fe386f8283675594f4767af562da2cbbd54ce39/example_images/highoverhead.png
--------------------------------------------------------------------------------
/example_images/imagepullbackoff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/9fe386f8283675594f4767af562da2cbbd54ce39/example_images/imagepullbackoff.png
--------------------------------------------------------------------------------
/example_images/ingress-image-change.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/9fe386f8283675594f4767af562da2cbbd54ce39/example_images/ingress-image-change.png
--------------------------------------------------------------------------------
/example_images/oomkillpod.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/9fe386f8283675594f4767af562da2cbbd54ce39/example_images/oomkillpod.png
--------------------------------------------------------------------------------
/example_images/pendingpod.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/9fe386f8283675594f4767af562da2cbbd54ce39/example_images/pendingpod.png
--------------------------------------------------------------------------------
/gke_node_allocatable/gke_issue.yaml:
--------------------------------------------------------------------------------
1 | # demonstrate that a pod requesting 1 CPU cannot run on the default GKE 2CPU nodes (e2-medium)
2 | apiVersion: v1
3 | kind: Pod
4 | metadata:
5 | name: resource-optimization-tool
6 | spec:
7 | containers:
8 | - name: nginx
9 | image: nginx
10 | resources:
11 | requests:
12 | cpu: "1"
--------------------------------------------------------------------------------
/holmes-meme-generator/failure-more-pods/config.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: meme-gen-replicas-config
5 | data:
6 | MEME_API_URL: "https://memcom/gimme/"
7 | SUBREDDITS: "Kubernetes,dockermemes,ProgrrHumor"
8 |
--------------------------------------------------------------------------------
/holmes-meme-generator/failure-more-pods/curl.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: curl-gen-deployment
5 | spec:
6 | replicas: 1
7 | selector:
8 | matchLabels:
9 | app: curl-gen-app
10 | template:
11 | metadata:
12 | labels:
13 | app: curl-gen-app
14 | spec:
15 | containers:
16 | - name: curl-container
17 | image: curlimages/curl:7.78.0
18 | command: ["/bin/sh"]
19 | args:
20 | - -c
21 | - |
22 | while true; do
23 | echo "Checking endpoint http://meme-gen-service:5000...";
24 | curl -s http://meme-gen-service:5000 || echo "Failed to reach http://meme-gen-service:5000";
25 | sleep 30;
26 | done;
27 |
--------------------------------------------------------------------------------
/holmes-meme-generator/failure-more-pods/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: meme-gen-deployment
5 | labels:
6 | app: meme-gen-app
7 | spec:
8 | replicas: 4
9 | selector:
10 | matchLabels:
11 | app: meme-gen-app
12 | template:
13 | metadata:
14 | labels:
15 | app: meme-gen-app
16 | spec:
17 | containers:
18 | - name: meme-gen-app
19 | # meme-generator source code available here: https://github.com/robusta-dev/Hello-DevOps-Project/tree/demo/code/meme-generator
20 | image: pavangudiwada/meme-generator:latest
21 | ports:
22 | - containerPort: 5000
23 | protocol: TCP
24 | envFrom:
25 | - configMapRef:
26 | name: meme-gen-replicas-config
27 | ---
28 | apiVersion: v1
29 | kind: Service
30 | metadata:
31 | name: meme-gen-service
32 | spec:
33 | type: ClusterIP
34 | ports:
35 | - port: 5000
36 | selector:
37 | app: meme-gen-app
--------------------------------------------------------------------------------
/holmes-meme-generator/failure/config.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: meme-generator-config
5 | data:
6 | MEME_API_URL: "https://memcom/gimme/"
7 | SUBREDDITS: "Kubernetes,dockermemes,ProgrrHumor"
8 |
--------------------------------------------------------------------------------
/holmes-meme-generator/failure/curl.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: curl-deployment
5 | spec:
6 | replicas: 1
7 | selector:
8 | matchLabels:
9 | app: curl-app
10 | template:
11 | metadata:
12 | labels:
13 | app: curl-app
14 | spec:
15 | containers:
16 | - name: curl-container
17 | image: curlimages/curl:7.78.0
18 | command: ["/bin/sh"]
19 | args:
20 | - -c
21 | - |
22 | while true; do
23 | echo "Checking endpoint http://meme-service:5000...";
24 | curl -s http://meme-service:5000 || echo "Failed to reach http://meme-service:5000";
25 | sleep 30;
26 | done;
27 |
--------------------------------------------------------------------------------
/holmes-meme-generator/failure/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: meme-deployment
5 | labels:
6 | app: meme-app
7 | spec:
8 | replicas: 2
9 | selector:
10 | matchLabels:
11 | app: meme-app
12 | template:
13 | metadata:
14 | labels:
15 | app: meme-app
16 | spec:
17 | containers:
18 | - name: meme-app
19 | # meme-generator source code available here: https://github.com/robusta-dev/Hello-DevOps-Project/tree/demo/code/meme-generator
20 | image: pavangudiwada/meme-generator:latest
21 | ports:
22 | - containerPort: 5000
23 | protocol: TCP
24 | envFrom:
25 | - configMapRef:
26 | name: meme-generator-config
27 | ---
28 | apiVersion: v1
29 | kind: Service
30 | metadata:
31 | name: meme-service
32 | spec:
33 | type: ClusterIP
34 | ports:
35 | - port: 5000
36 | selector:
37 | app: meme-app
--------------------------------------------------------------------------------
/image_pull_backoff/no_such_image.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: customer-relations-webapp
5 | labels:
6 | app.kubernetes.io/name: customer-relations
7 | spec:
8 | replicas: 3
9 | selector:
10 | matchLabels:
11 | app: customer-relations
12 | template:
13 | metadata:
14 | labels:
15 | app: customer-relations
16 | visualize: "true"
17 | spec:
18 | containers:
19 | - name: crw-main-container
20 | image: yourcompany/crw:latest
21 | imagePullPolicy: Always
22 |
--------------------------------------------------------------------------------
/ingress_port_path_change/after_port_path_change.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: networking.k8s.io/v1
2 | kind: Ingress
3 | metadata:
4 | name: production-ingress
5 | annotations:
6 | nginx.ingress.kubernetes.io/rewrite-target: /v2
7 | nginx.ingress.kubernetes.io/ssl-redirect: "true"
8 | spec:
9 | tls:
10 | - hosts:
11 | - mywebsite.com
12 | secretName: mywebsite-tls-updated
13 | rules:
14 | - host: mywebsite.com
15 | http:
16 | paths:
17 | - path: /api/v2
18 | pathType: Prefix
19 | backend:
20 | service:
21 | name: backend-service
22 | port:
23 | number: 80
--------------------------------------------------------------------------------
/ingress_port_path_change/before_port_path_change.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: networking.k8s.io/v1
2 | kind: Ingress
3 | metadata:
4 | name: production-ingress
5 | annotations:
6 | nginx.ingress.kubernetes.io/rewrite-target: /
7 | nginx.ingress.kubernetes.io/ssl-redirect: "true"
8 | spec:
9 | tls:
10 | - hosts:
11 | - mywebsite.com
12 | secretName: mywebsite-tls
13 | rules:
14 | - host: mywebsite.com
15 | http:
16 | paths:
17 | - path: /api
18 | pathType: Prefix
19 | backend:
20 | service:
21 | name: backend-service
22 | port:
23 | number: 80
--------------------------------------------------------------------------------
/init_crashloop_backoff/create_init_crashloop_backoff.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: logging-agent
5 | spec:
6 | containers:
7 | - name: exporter
8 | image: nginx
9 | ports:
10 | - containerPort: 80
11 | volumeMounts:
12 | - name: workdir
13 | mountPath: /usr/share/nginx/html
14 | # These containers are run during pod initialization
15 | initContainers:
16 | - name: downloader
17 | image: busybox:1.28
18 | command:
19 | - wge
20 | - "-O"
21 | - "/work-dir/index.html"
22 | - https://home.robusta.dev
23 | volumeMounts:
24 | - name: workdir
25 | mountPath: "/work-dir"
26 | dnsPolicy: Default
27 | volumes:
28 | - name: workdir
29 | emptyDir: {}
30 |
--------------------------------------------------------------------------------
/job_failure/job_crash.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: batch/v1
2 | kind: Job
3 | metadata:
4 | name: java-api-checker
5 | spec:
6 | template:
7 | spec:
8 | containers:
9 | - name: java-beans
10 | image: busybox
11 | command: ["/bin/sh", "-c"]
12 | args: ["echo 'Java Network Exception: \nAll host(s) tried for db query failed (tried: prod-db:3333) - no available connection and the queue has reached its max size 256 \nAll host(s) tried for db query failed (tried: prod-db:3333) - no available connection and the queue has reached its max size 256 \nAll host(s) tried for db query failed (tried: prod-db:3333) - no available connection and the queue has reached its max size 256 \nAll host(s) tried for db query failed (tried: prod-db:3333) - no available connection and the queue has reached its max size 256'; sleep 60; exit 1"]
13 | restartPolicy: Never
14 | backoffLimit: 1
15 |
16 |
--------------------------------------------------------------------------------
/job_run_forever/job_run_forever.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: batch/v1
2 | kind: Job
3 | metadata:
4 | name: product-data-scraper
5 | spec:
6 | suspend: false
7 | parallelism: 1
8 | completions: 5
9 | template:
10 | spec:
11 | containers:
12 | - name: run-forever
13 | image: bash
14 | command: ["/bin/sh"]
15 | args: ["-c", "wget -O - https://gist.githubusercontent.com/odyssomay/1078370/raw/35c5981f8c139bc9dc02186f187ebee61f5b9eb9/gistfile1.txt
16 | 2>/dev/null; while true; do sleep 10;done; fi"]
17 | restartPolicy: Never
18 | backoffLimit: 4
--------------------------------------------------------------------------------
/liveness_probe_fail/failing_liveness_probe.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: order-processor
5 | spec:
6 | containers:
7 | - name: my-container
8 | image: busybox
9 | command: ["sh", "-c", "while true; do echo 'Running...'; sleep 5; done"]
10 | livenessProbe:
11 | exec:
12 | command:
13 | - sh
14 | - -c
15 | - "exit 1"
16 | initialDelaySeconds: 5
17 | periodSeconds: 5
18 | failureThreshold: 1000
19 |
--------------------------------------------------------------------------------
/memory_pressure_evictions/demanding-deploy.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: stress-test
5 | spec:
6 | replicas: 3
7 | selector:
8 | matchLabels:
9 | app: stress-test
10 | template:
11 | metadata:
12 | labels:
13 | app: stress-test
14 | spec:
15 | containers:
16 | - name: stress-ng
17 | image: polinux/stress-ng
18 | command: ["stress-ng"]
19 | # Adjust args to tweak memory stress levels; if pods are OOM killed immediately, consider lowering the value below 2G
20 | args: ["--vm-bytes", "2G", "--vm-keep", "--vm", "4"]
21 | nodeSelector:
22 | # Specify the node where you want to induce evictions
23 | kubernetes.io/hostname: your-host-name
24 | # IMPORTANT NOTE:
25 | # Ensure these files are deployed on nodes that do not have Robusta Runner, Robusta Forwarder, or Robusta Holmes running.
26 | # Otherwise, you will probably get OOMKilled events instead of eviction events.
27 |
--------------------------------------------------------------------------------
/memory_pressure_evictions/demo-deploy.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: nginx-deployment
5 | labels:
6 | app: nginx
7 | spec:
8 | replicas: 5
9 | selector:
10 | matchLabels:
11 | app: nginx
12 | template:
13 | metadata:
14 | labels:
15 | app: nginx
16 | spec:
17 | containers:
18 | - name: nginx
19 | image: nginx:1.14.2
20 | ports:
21 | - containerPort: 80
22 | nodeSelector:
23 | # Specify the node where you want to induce evictions
24 | kubernetes.io/hostname: your-host-name
--------------------------------------------------------------------------------
/minishop-telemetry/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | dist
3 |
--------------------------------------------------------------------------------
/minishop-telemetry/Dockerfile:
--------------------------------------------------------------------------------
1 |
2 | FROM node:23-alpine AS builder
3 |
4 | WORKDIR /app
5 |
6 | COPY package*.json ./
7 | RUN npm ci
8 |
9 | COPY . .
10 | RUN npm run build
11 |
12 | FROM node:23-alpine
13 |
14 | WORKDIR /app
15 |
16 | COPY --from=builder /app/package*.json ./
17 | COPY --from=builder /app/dist ./dist
18 |
19 | RUN npm ci --only=production
20 |
21 | ENV NODE_ENV=production
22 |
23 | EXPOSE 3003
24 | EXPOSE 3004
25 | EXPOSE 3005
26 | EXPOSE 3006
27 | EXPOSE 9464
28 |
29 | CMD ["node", "--require", "dist/telemetry.js", "dist/dev.js"]
30 |
--------------------------------------------------------------------------------
/minishop-telemetry/README.md:
--------------------------------------------------------------------------------
1 | # Running locally
2 |
3 | ```bash
4 | npm install
5 | npm run dev
6 | ```
7 |
8 | # Deploying
9 |
10 | ```bash
11 | kubectl apply -f ./manifest.yaml
12 | ```
13 |
--------------------------------------------------------------------------------
/minishop-telemetry/build.sh:
--------------------------------------------------------------------------------
1 | docker buildx build --platform linux/amd64 . -t us-central1-docker.pkg.dev/genuine-flight-317411/devel/shop-app-demo:v1
2 |
--------------------------------------------------------------------------------
/minishop-telemetry/manifest.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 | name: minishop
5 | ---
6 | # Backend Service Deployment
7 | apiVersion: apps/v1
8 | kind: Deployment
9 | metadata:
10 | name: backend-service
11 | namespace: minishop
12 | labels:
13 | app: minishop
14 | service: backend
15 | spec:
16 | replicas: 1
17 | selector:
18 | matchLabels:
19 | app: minishop
20 | service: backend
21 | template:
22 | metadata:
23 | labels:
24 | app: minishop
25 | service: backend
26 | spec:
27 | containers:
28 | - name: backend
29 | image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/shop-app-demo:v1
30 | imagePullPolicy: Always
31 | command:
32 | [
33 | "node",
34 | "--require",
35 | "./dist/telemetry.js",
36 | "./dist/backend-service.js",
37 | ]
38 | ports:
39 | - containerPort: 3003
40 | name: http
41 | env:
42 | - name: TEMPO_URL
43 | value: "http://opentelemetry-collector-agent.tempo:4318/v1/traces"
44 | - name: SERVICE_NAME
45 | value: "backend-service"
46 | resources:
47 | limits:
48 | cpu: "500m"
49 | memory: "512Mi"
50 | requests:
51 | cpu: "200m"
52 | memory: "256Mi"
53 | readinessProbe:
54 | httpGet:
55 | path: /backend/health
56 | port: 3003
57 | initialDelaySeconds: 5
58 | periodSeconds: 10
59 | livenessProbe:
60 | httpGet:
61 | path: /backend/health
62 | port: 3003
63 | initialDelaySeconds: 15
64 | periodSeconds: 20
65 | - name: checkout-sidecar
66 | image: curlimages/curl:7.86.0
67 | command: ["/bin/sh", "-c"]
68 | args:
69 | - |
70 | while true; do
71 | curl -X POST \
72 | -H "Content-Type: application/json" \
73 | -d '{"name":"John Doe","email":"john.doe@example.com","address":"main street","cardNumber":"1234-5678-9101-1121"}' \
74 | http://localhost:3003/backend/api/checkout;
75 | sleep 5;
76 | done
77 | ---
78 | # Checkout Service Deployment
79 | apiVersion: apps/v1
80 | kind: Deployment
81 | metadata:
82 | name: checkout-service
83 | namespace: minishop
84 | labels:
85 | app: minishop
86 | service: checkout
87 | spec:
88 | replicas: 1
89 | selector:
90 | matchLabels:
91 | app: minishop
92 | service: checkout
93 | template:
94 | metadata:
95 | labels:
96 | app: minishop
97 | service: checkout
98 | spec:
99 | containers:
100 | - name: checkout
101 | image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/shop-app-demo:v1
102 | imagePullPolicy: Always
103 | command:
104 | [
105 | "node",
106 | "--require",
107 | "./dist/telemetry.js",
108 | "./dist/checkout-service.js",
109 | ]
110 | ports:
111 | - containerPort: 3004
112 | name: http
113 | env:
114 | - name: TEMPO_URL
115 | value: "http://opentelemetry-collector-agent.tempo:4318/v1/traces"
116 | - name: SERVICE_NAME
117 | value: "checkout-service"
118 | livenessProbe:
119 | httpGet:
120 | path: /checkout/health
121 | port: 3004
122 | initialDelaySeconds: 15
123 | periodSeconds: 20
124 | ---
125 | # Fraud Service Deployment
126 | apiVersion: apps/v1
127 | kind: Deployment
128 | metadata:
129 | name: fraud-service
130 | namespace: minishop
131 | labels:
132 | app: minishop
133 | service: fraud
134 | spec:
135 | replicas: 1
136 | selector:
137 | matchLabels:
138 | app: minishop
139 | service: fraud
140 | template:
141 | metadata:
142 | labels:
143 | app: minishop
144 | service: fraud
145 | spec:
146 | containers:
147 | - name: fraud
148 | image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/shop-app-demo:v1
149 | imagePullPolicy: Always
150 | command:
151 | [
152 | "node",
153 | "--require",
154 | "./dist/telemetry.js",
155 | "./dist/fraud-service.js",
156 | ]
157 | ports:
158 | - containerPort: 3005
159 | name: http
160 | env:
161 | - name: TEMPO_URL
162 | value: "http://opentelemetry-collector-agent.tempo:4318/v1/traces"
163 | - name: SERVICE_NAME
164 | value: "fraud-service"
165 | livenessProbe:
166 | httpGet:
167 | path: /fraud/health
168 | port: 3005
169 | initialDelaySeconds: 15
170 | periodSeconds: 20
171 | ---
172 | # Auth Service Deployment
173 | apiVersion: apps/v1
174 | kind: Deployment
175 | metadata:
176 | name: auth-service
177 | namespace: minishop
178 | labels:
179 | app: minishop
180 | service: auth
181 | spec:
182 | replicas: 1
183 | selector:
184 | matchLabels:
185 | app: minishop
186 | service: auth
187 | template:
188 | metadata:
189 | labels:
190 | app: minishop
191 | service: auth
192 | spec:
193 | containers:
194 | - name: auth
195 | image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/shop-app-demo:v1
196 | imagePullPolicy: Always
197 | command:
198 | [
199 | "node",
200 | "--require",
201 | "./dist/telemetry.js",
202 | "./dist/auth-service.js",
203 | ]
204 | ports:
205 | - containerPort: 3006
206 | name: http
207 | env:
208 | - name: TEMPO_URL
209 | value: "http://opentelemetry-collector-agent.tempo:4318/v1/traces"
210 | - name: SERVICE_NAME
211 | value: "auth-service"
212 | livenessProbe:
213 | httpGet:
214 | path: /auth/health
215 | port: 3006
216 | initialDelaySeconds: 15
217 | periodSeconds: 20
218 | ---
219 | # Services
220 | apiVersion: v1
221 | kind: Service
222 | metadata:
223 | name: backend-service
224 | namespace: minishop
225 | labels:
226 | app: minishop
227 | service: backend
228 | spec:
229 | selector:
230 | app: minishop
231 | service: backend
232 | ports:
233 | - name: http
234 | port: 3003
235 | targetPort: 3003
236 | ---
237 | apiVersion: v1
238 | kind: Service
239 | metadata:
240 | name: checkout-service
241 | namespace: minishop
242 | labels:
243 | app: minishop
244 | service: checkout
245 | spec:
246 | selector:
247 | app: minishop
248 | service: checkout
249 | ports:
250 | - name: http
251 | port: 3004
252 | targetPort: 3004
253 | ---
254 | apiVersion: v1
255 | kind: Service
256 | metadata:
257 | name: fraud-service
258 | namespace: minishop
259 | labels:
260 | app: minishop
261 | service: fraud
262 | spec:
263 | selector:
264 | app: minishop
265 | service: fraud
266 | ports:
267 | - name: http
268 | port: 3005
269 | targetPort: 3005
270 | ---
271 | apiVersion: v1
272 | kind: Service
273 | metadata:
274 | name: auth-service
275 | namespace: minishop
276 | labels:
277 | app: minishop
278 | service: auth
279 | spec:
280 | selector:
281 | app: minishop
282 | service: auth
283 | ports:
284 | - name: http
285 | port: 3006
286 | targetPort: 3006
287 | ---
288 | apiVersion: monitoring.coreos.com/v1
289 | kind: ServiceMonitor
290 | metadata:
291 | name: minishop-services
292 | namespace: minishop
293 | labels:
294 | app: minishop
295 | release: robusta
296 | spec:
297 | selector:
298 | matchLabels:
299 | app: minishop
300 | namespaceSelector:
301 | matchNames:
302 | - minishop
303 | endpoints:
304 | - port: http
305 | interval: 15s
306 | path: /metrics
307 | ---
308 | # Prometheus Rule for High Latency
309 | apiVersion: monitoring.coreos.com/v1
310 | kind: PrometheusRule
311 | metadata:
312 | name: minishop-high-latency
313 | namespace: minishop
314 | labels:
315 | app: minishop
316 | release: robusta
317 | role: alert-rules
318 | spec:
319 | groups:
320 | - name: minishop.rules
321 | rules:
322 | - alert: MinishopHighLatency
323 | expr: histogram_quantile(0.9, sum(rate(http_request_duration_seconds_bucket[5m])) by (le)) > 4
324 | labels:
325 | severity: warning
326 | annotations:
327 | summary: "High latency detected in Minishop application"
328 | description: "The service is experiencing high latency (> 4s 90% percentile)"
329 |
--------------------------------------------------------------------------------
/minishop-telemetry/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "checkout-app",
3 | "version": "1.0.0",
4 | "description": "Checkout web application with OpenTelemetry and Prometheus metrics",
5 | "main": "dist/index.js",
6 | "type": "module",
7 | "scripts": {
8 | "build": "tsc && npm run copy-files",
9 | "copy-files": "copyfiles -u 1 \"src/templates/**/*\" \"src/public/**/*\" dist",
10 | "start:backend": "node --require ./dist/telemetry.js dist/backend-service.js",
11 | "start:fraud": "node --require ./dist/telemetry.js dist/fraud-service.js",
12 | "start:auth": "node --require ./dist/telemetry.js dist/auth-service.js",
13 | "start:checkout": "node --require ./dist/telemetry.js dist/checkout-service.js",
14 | "dev": "tsx src/dev.ts --require ./src/telemetry.ts",
15 | "start": "node dist/dev.js"
16 | },
17 | "dependencies": {
18 | "@autotelic/fastify-opentelemetry": "^0.22.1",
19 | "@fastify/otel": "^0.5.0",
20 | "@fastify/static": "^8.1.1",
21 | "@opentelemetry/api": "^1.9.0",
22 | "@opentelemetry/auto-instrumentations-node": "^0.56.1",
23 | "@opentelemetry/exporter-prometheus": "^0.57.2",
24 | "@opentelemetry/exporter-trace-otlp-http": "^0.57.2",
25 | "@opentelemetry/instrumentation": "^0.57.2",
26 | "@opentelemetry/instrumentation-fastify": "^0.44.2",
27 | "@opentelemetry/instrumentation-http": "^0.57.2",
28 | "@opentelemetry/resources": "^1.30.1",
29 | "@opentelemetry/sdk-metrics": "^1.30.1",
30 | "@opentelemetry/sdk-node": "^0.57.2",
31 | "@opentelemetry/sdk-trace-base": "^1.30.1",
32 | "@opentelemetry/sdk-trace-node": "^1.30.1",
33 | "@opentelemetry/semantic-conventions": "^1.30.0",
34 | "fastify": "^5.2.1",
35 | "fastify-metrics": "^12.1.0",
36 | "pino": "^9.6.0"
37 | },
38 | "devDependencies": {
39 | "@types/node": "^22.13.10",
40 | "copyfiles": "^2.4.1",
41 | "tsx": "^4.19.3",
42 | "typescript": "^5.8.2"
43 | },
44 | "engines": {
45 | "node": ">=18"
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/minishop-telemetry/src/auth-service.ts:
--------------------------------------------------------------------------------
1 | import { fastifyOtelInstrumentation } from "./telemetry.js";
2 | import Fastify, { FastifyInstance } from "fastify";
3 | import { trace } from "@opentelemetry/api";
4 | import { fileURLToPath, pathToFileURL } from "url";
5 | import { createTracedHandler } from "./util/trace-handler.js";
6 | import { executePostgresQuery } from "./util/db.js";
7 | import fastifyMetrics from "fastify-metrics";
8 |
9 | const tracer = trace.getTracer("auth-service");
10 |
11 | export const setup = async (fastify: FastifyInstance) => {
12 | fastify.get(
13 | "/auth/health",
14 | createTracedHandler("health", tracer, async (request, reply, span) => {
15 | return reply.send({
16 | ok: true,
17 | });
18 | }),
19 | );
20 | fastify.post(
21 | "/auth/api/auth",
22 | createTracedHandler(
23 | "authenticate",
24 | tracer,
25 | async (request, reply, span) => {
26 | await executePostgresQuery("SELECT * FROM users WHERE id=$1", 500);
27 | return {
28 | success: true,
29 | };
30 | },
31 | ),
32 | );
33 | };
34 |
35 | const isMainModule = () => {
36 | const mainModulePath = import.meta.url;
37 | const executedFilePath = pathToFileURL(process.argv[1]).href;
38 | return mainModulePath === executedFilePath;
39 | };
40 |
41 | if (isMainModule()) {
42 | const fastify = Fastify({
43 | logger: true,
44 | });
45 | await fastify.register(fastifyMetrics as any, { endpoint: "/metrics" });
46 | // await fastify.register(fastifyOtelInstrumentation.plugin());
47 | await setup(fastify);
48 | try {
49 | await fastify.listen({ port: 3006, host: "0.0.0.0" });
50 | console.log("Backend server is running on http://localhost:3004");
51 | } catch (err) {
52 | fastify.log.error(err);
53 | process.exit(1);
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/minishop-telemetry/src/backend-service.ts:
--------------------------------------------------------------------------------
1 | import { fastifyOtelInstrumentation } from "./telemetry.js";
2 | import Fastify, { FastifyInstance } from "fastify";
3 | import fastifyStatic from "@fastify/static";
4 | import {
5 | context,
6 | propagation,
7 | SpanStatusCode,
8 | trace,
9 | } from "@opentelemetry/api";
10 | import fs from "fs";
11 | import { fileURLToPath, pathToFileURL } from "url";
12 | import path from "path";
13 | import { callout, getUrl } from "./util/callout.js";
14 | import { createTracedHandler } from "./util/trace-handler.js";
15 | import fastifyMetrics from "fastify-metrics";
16 |
17 | const __filename = fileURLToPath(import.meta.url);
18 | const __dirname = path.dirname(__filename);
19 |
20 | const tracer = trace.getTracer("backend-service");
21 |
22 | export const setup = async (fastify: FastifyInstance) => {
23 | fastify.register(fastifyStatic, {
24 | root: path.join(__dirname, "./public"),
25 | prefix: "/",
26 | });
27 |
28 | fastify.get("/", async (request, reply) => {
29 | return reply.redirect("/backend/checkout");
30 | });
31 | fastify.get(
32 | "/backend/health",
33 | createTracedHandler("health", tracer, async (request, reply, span) => {
34 | return reply.send({
35 | ok: true,
36 | });
37 | }),
38 | );
39 |
40 | fastify.get(
41 | "/backend/checkout",
42 | createTracedHandler(
43 | "serve_checkout_page",
44 | tracer,
45 | async (request, reply, span) => {
46 | return reply
47 | .type("text/html")
48 | .send(
49 | fs.readFileSync(
50 | path.join(__dirname, "./templates/checkout.html"),
51 | "utf8",
52 | ),
53 | );
54 | },
55 | ),
56 | );
57 |
58 | // Process checkout API
59 | fastify.post(
60 | "/backend/api/checkout",
61 | createTracedHandler(
62 | "/backend/api/checkout",
63 | tracer,
64 | async (request, reply, span) => {
65 | const checkoutData = request.body as any;
66 |
67 | const authUrl = getUrl("auth-service", 3006, "/auth/api/auth");
68 | await callout(authUrl, checkoutData, request.log);
69 |
70 | const checkoutUrl = getUrl(
71 | "checkout-service",
72 | 3004,
73 | "/checkout/api/checkout",
74 | );
75 | const data = await callout(checkoutUrl, checkoutData, request.log);
76 |
77 | return data;
78 | },
79 | ),
80 | );
81 | };
82 |
83 | const isMainModule = () => {
84 | const mainModulePath = import.meta.url;
85 | const executedFilePath = pathToFileURL(process.argv[1]).href;
86 | return mainModulePath === executedFilePath;
87 | };
88 |
89 | if (isMainModule()) {
90 | const fastify = Fastify({
91 | logger: true,
92 | });
93 | await fastify.register(fastifyMetrics as any, { endpoint: "/metrics" });
94 | // await fastify.register(fastifyOtelInstrumentation.plugin());
95 | await setup(fastify);
96 | try {
97 | await fastify.listen({ port: 3003, host: "0.0.0.0" });
98 | console.log("Backend server is running on http://localhost:3003");
99 | } catch (err) {
100 | fastify.log.error(err);
101 | process.exit(1);
102 | }
103 | }
104 |
--------------------------------------------------------------------------------
/minishop-telemetry/src/checkout-service.ts:
--------------------------------------------------------------------------------
1 | import { fastifyOtelInstrumentation } from "./telemetry.js";
2 | import Fastify, { FastifyInstance } from "fastify";
3 | import {
4 | context,
5 | propagation,
6 | SpanStatusCode,
7 | trace,
8 | } from "@opentelemetry/api";
9 | import fs from "fs";
10 | import { fileURLToPath, pathToFileURL } from "url";
11 | import path from "path";
12 | import { createTracedHandler } from "./util/trace-handler.js";
13 | import { callout, getUrl } from "./util/callout.js";
14 | import { executePostgresQuery } from "./util/db.js";
15 | import fastifyMetrics from "fastify-metrics";
16 |
17 | const __filename = fileURLToPath(import.meta.url);
18 | const __dirname = path.dirname(__filename);
19 |
20 | const tracer = trace.getTracer("checkout-service");
21 |
22 | export const setup = async (fastify: FastifyInstance) => {
23 | fastify.get(
24 | "/checkout/health",
25 | createTracedHandler("health", tracer, async (request, reply, span) => {
26 | return reply.send({
27 | ok: true,
28 | });
29 | }),
30 | );
31 | fastify.post(
32 | "/checkout/api/checkout",
33 | createTracedHandler(
34 | "process_checkout",
35 | tracer,
36 | async (request, reply, span) => {
37 | const checkoutData = request.body as any;
38 |
39 | span.addEvent("processing_payment", {
40 | email: checkoutData.email,
41 | });
42 | const url = getUrl("fraud-service", 3005, "/fraud/api/fraud");
43 | await callout(url, checkoutData, request.log);
44 | await executePostgresQuery(
45 | "SELECT * FROM products WHERE status='available'",
46 | 500,
47 | );
48 | await new Promise((resolve) => setTimeout(resolve, 200));
49 | span.addEvent("checkout_successful");
50 |
51 | return {
52 | success: true,
53 | message: "Order placed successfully!",
54 | orderId: `ORDER-${Math.floor(Math.random() * 10000)}`,
55 | };
56 | },
57 | ),
58 | );
59 | };
60 |
61 | const isMainModule = () => {
62 | const mainModulePath = import.meta.url;
63 | const executedFilePath = pathToFileURL(process.argv[1]).href;
64 | return mainModulePath === executedFilePath;
65 | };
66 |
67 | if (isMainModule()) {
68 | const fastify = Fastify({
69 | logger: true,
70 | });
71 | await fastify.register(fastifyMetrics as any, { endpoint: "/metrics" });
72 | // await fastify.register(fastifyOtelInstrumentation.plugin());
73 | await setup(fastify);
74 | try {
75 | await fastify.listen({ port: 3004, host: "0.0.0.0" });
76 | console.log("Backend server is running on http://localhost:3004");
77 | } catch (err) {
78 | fastify.log.error(err);
79 | process.exit(1);
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/minishop-telemetry/src/dev.ts:
--------------------------------------------------------------------------------
1 | // import { fastifyOtelInstrumentation } from "./telemetry.js";
2 | import Fastify from "fastify";
3 | import { setup as setupCheckout } from "./checkout-service.js";
4 | import { setup as setupBackend } from "./backend-service.js";
5 | import { setup as setupFraud } from "./fraud-service.js";
6 | import { setup as setupAuth } from "./auth-service.js";
7 | import fastifyMetrics from "fastify-metrics";
8 |
9 | async function start() {
10 | const fastify = Fastify({
11 | logger: true,
12 | });
13 | // await fastify.register(fastifyOtelInstrumentation.plugin());
14 | await fastify.register(fastifyMetrics as any, { endpoint: "/metrics" });
15 | await setupFraud(fastify);
16 | await setupCheckout(fastify);
17 | await setupAuth(fastify);
18 | await setupBackend(fastify);
19 | try {
20 | await fastify.listen({ port: 3003, host: "0.0.0.0" });
21 | console.log("Backend server is running on http://localhost:3003");
22 | } catch (err) {
23 | fastify.log.error(err);
24 | process.exit(1);
25 | }
26 | }
27 |
28 | start();
29 |
--------------------------------------------------------------------------------
/minishop-telemetry/src/fraud-service.ts:
--------------------------------------------------------------------------------
1 | import { fastifyOtelInstrumentation } from "./telemetry.js";
2 | import Fastify, { FastifyInstance } from "fastify";
3 | import { trace } from "@opentelemetry/api";
4 | import { pathToFileURL } from "url";
5 | import { createTracedHandler } from "./util/trace-handler.js";
6 | import { executePostgresQuery } from "./util/db.js";
7 | import fastifyMetrics from "fastify-metrics";
8 |
9 | const tracer = trace.getTracer("fraud-service");
10 |
11 | export const setup = async (fastify: FastifyInstance) => {
12 | fastify.get(
13 | "/fraud/health",
14 | createTracedHandler("health", tracer, async (request, reply, span) => {
15 | return reply.send({
16 | ok: true,
17 | });
18 | }),
19 | );
20 | fastify.post(
21 | "/fraud/api/fraud",
22 | createTracedHandler(
23 | "check_for_fraud",
24 | tracer,
25 | async (request, reply, span) => {
26 | const data = request.body as any;
27 | let is_fraud = true;
28 | if (data.cardNumber && data.cardNumber.startsWith("1234")) {
29 | is_fraud = false;
30 | }
31 | await executePostgresQuery(
32 | "SELECT * FROM banned_card_numbers WHERE id=$1",
33 | 7000,
34 | );
35 | span.addEvent("validated_payment", {
36 | cardNumber: data.cardNumber,
37 | });
38 |
39 | await new Promise((resolve) => setTimeout(resolve, 200));
40 |
41 | span.addEvent("check_for_fraud_completed");
42 |
43 | return {
44 | is_fraud: is_fraud,
45 | };
46 | },
47 | ),
48 | );
49 | };
50 |
51 | const isMainModule = () => {
52 | const mainModulePath = import.meta.url;
53 | const executedFilePath = pathToFileURL(process.argv[1]).href;
54 | return mainModulePath === executedFilePath;
55 | };
56 |
57 | if (isMainModule()) {
58 | const fastify = Fastify({
59 | logger: true,
60 | });
61 | await fastify.register(fastifyMetrics as any, { endpoint: "/metrics" });
62 | // await fastify.register(fastifyOtelInstrumentation.plugin());
63 | await setup(fastify);
64 | try {
65 | await fastify.listen({ port: 3005, host: "0.0.0.0" });
66 | console.log("Backend server is running on http://localhost:3004");
67 | } catch (err) {
68 | fastify.log.error(err);
69 | process.exit(1);
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/minishop-telemetry/src/public/js/main.js:
--------------------------------------------------------------------------------
1 | // Create a unique trace ID for this user session
2 | function generateTraceId() {
3 | return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (c) {
4 | const r = (Math.random() * 16) | 0;
5 | const v = c === "x" ? r : (r & 0x3) | 0x8;
6 | return v.toString(16);
7 | });
8 | }
9 |
10 | // Set up tracing
11 | const traceId = generateTraceId();
12 | const spanId = generateTraceId().substring(0, 16);
13 |
14 | document.addEventListener("DOMContentLoaded", () => {
15 | // Fetch the page with the trace context
16 | console.log(`Frontend trace started with ID: ${traceId}`);
17 |
18 | const checkoutForm = document.getElementById("checkoutForm");
19 | const resultDiv = document.getElementById("result");
20 |
21 | checkoutForm.addEventListener("submit", async (e) => {
22 | e.preventDefault();
23 |
24 | const formData = new FormData(checkoutForm);
25 | const checkoutData = Object.fromEntries(formData.entries());
26 |
27 | try {
28 | // Include the trace context in the headers
29 | const response = await fetch("/backend/api/checkout", {
30 | method: "POST",
31 | headers: {
32 | "Content-Type": "application/json",
33 | // traceparent: `00-${traceId}-${spanId}-01`,
34 | },
35 | body: JSON.stringify(checkoutData),
36 | });
37 |
38 | const result = await response.json();
39 |
40 | resultDiv.textContent = `Checkout ${result.success ? "completed" : "failed"}: ${result.message}`;
41 | resultDiv.style.display = "block";
42 | resultDiv.style.backgroundColor = result.success ? "#d4edda" : "#f8d7da";
43 |
44 | if (result.success) {
45 | checkoutForm.reset();
46 | }
47 | } catch (error) {
48 | resultDiv.textContent = `Error: ${error.message}`;
49 | resultDiv.style.display = "block";
50 | resultDiv.style.backgroundColor = "#f8d7da";
51 | }
52 | });
53 | });
54 |
--------------------------------------------------------------------------------
/minishop-telemetry/src/telemetry.ts:
--------------------------------------------------------------------------------
1 | import { NodeSDK } from "@opentelemetry/sdk-node";
2 | import opentelemetry from "@opentelemetry/api";
3 | import {
4 | BatchSpanProcessor,
5 | ConsoleSpanExporter,
6 | NodeTracerProvider,
7 | } from "@opentelemetry/sdk-trace-node";
8 | import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
9 | import {
10 | PeriodicExportingMetricReader,
11 | ConsoleMetricExporter,
12 | MeterProvider,
13 | } from "@opentelemetry/sdk-metrics";
14 | import pkg from "@fastify/otel";
15 | import {
16 | ATTR_SERVICE_NAME,
17 | ATTR_SERVICE_VERSION,
18 | } from "@opentelemetry/semantic-conventions";
19 | import { Resource } from "@opentelemetry/resources";
20 | import { PrometheusExporter } from "@opentelemetry/exporter-prometheus";
21 | import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
22 | const { FastifyOtelInstrumentation } = pkg;
23 |
24 | const service_name = process.env.SERVICE_NAME || "minishop";
25 |
26 | const resource = new Resource({
27 | [ATTR_SERVICE_NAME]: service_name,
28 | [ATTR_SERVICE_VERSION]: "1.0",
29 | });
30 |
31 | const metricReader = new PrometheusExporter({
32 | port: parseInt(process.env.METRICS_PORT) || 9463,
33 | });
34 |
35 | const traceExporter = new OTLPTraceExporter({
36 | url: process.env.TEMPO_URL || "http://localhost:4318/v1/traces",
37 | keepAlive: true,
38 | concurrencyLimit: 100,
39 | timeoutMillis: 5000,
40 | });
41 |
42 | const spanProcessor = new BatchSpanProcessor(traceExporter, {
43 | maxExportBatchSize: 10,
44 | scheduledDelayMillis: 100,
45 | });
46 |
47 | const traceProvider = new NodeTracerProvider({
48 | resource: resource,
49 | spanProcessors: [spanProcessor],
50 | });
51 |
52 | traceProvider.register();
53 | const sdk = new NodeSDK({
54 | serviceName: service_name,
55 | resource: resource,
56 | traceExporter: traceExporter,
57 | spanProcessor: spanProcessor,
58 | metricReader: metricReader,
59 | });
60 |
61 | sdk.start();
62 |
63 | const fastifyOtelInstrumentation = new FastifyOtelInstrumentation({
64 | servername: service_name,
65 | });
66 |
67 | export { fastifyOtelInstrumentation };
68 |
--------------------------------------------------------------------------------
/minishop-telemetry/src/templates/checkout.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Checkout
7 |
40 |
41 |
42 | Checkout
43 |
44 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
--------------------------------------------------------------------------------
/minishop-telemetry/src/util/callout.ts:
--------------------------------------------------------------------------------
1 | import {
2 | context,
3 | propagation,
4 | Span,
5 | SpanStatusCode,
6 | trace,
7 | } from "@opentelemetry/api";
8 |
9 | const tracer = trace.getTracer("callout");
10 |
11 | export function getUrl(host: string, port: number, path: string) {
12 | if (process.env.NODE_ENV !== "production") {
13 | return `http://localhost:3003${path}`;
14 | }
15 | return `http://${host}:${port}${path}`;
16 | }
17 |
18 | export async function callout(url, data, logger) {
19 | return tracer.startActiveSpan("callout", async (span: Span) => {
20 | try {
21 | const headers: Record = {
22 | "Content-Type": "application/json",
23 | Accept: "application/json",
24 | };
25 |
26 | span.setAttribute("downstream_url", url);
27 |
28 | propagation.inject(context.active(), headers);
29 | const response = await fetch(url, {
30 | method: "POST",
31 | headers: headers,
32 | body: JSON.stringify(data),
33 | });
34 |
35 | if (!response.ok) {
36 | logger.error(
37 | `Downstream response failed ${response.status} ${await response.text()}`,
38 | );
39 | const errorMsg = `HTTP error! Status: ${response.status} ${await response.text()}`;
40 | span.recordException(new Error(errorMsg));
41 | span.setStatus({ code: SpanStatusCode.ERROR });
42 | throw new Error(errorMsg);
43 | }
44 | const result = await response.json();
45 | span.end();
46 | return result;
47 | } catch (error) {
48 | span.recordException(error);
49 | span.setStatus({ code: SpanStatusCode.ERROR });
50 | throw error;
51 | } finally {
52 | span.end();
53 | }
54 | });
55 | }
56 |
--------------------------------------------------------------------------------
/minishop-telemetry/src/util/db.ts:
--------------------------------------------------------------------------------
1 | import * as opentelemetry from "@opentelemetry/api";
2 | import { setTimeout } from "timers/promises";
3 |
4 | /**
5 | * Simulates a PostgreSQL database query with OpenTelemetry instrumentation
6 | */
7 | export async function executePostgresQuery(
8 | query: string,
9 | max_delay_ms: number = 1000,
10 | ) {
11 | const tracer = opentelemetry.trace.getTracer("postgres-service");
12 |
13 | return tracer.startActiveSpan(
14 | "postgres.query",
15 | { attributes: { "db.system": "postgresql", "db.statement": query } },
16 | async (span) => {
17 | try {
18 | // Simulate DB query execution time
19 | const executionTimeMs = Math.floor(Math.random() * max_delay_ms) + 5;
20 | span.setAttribute("db.execution_time_ms", executionTimeMs);
21 |
22 | // Simulate network latency and query execution
23 | await setTimeout(executionTimeMs);
24 |
25 | // Simulate a query result
26 | const result = {
27 | rowCount: Math.floor(Math.random() * 10),
28 | rows: Array(Math.floor(Math.random() * 10))
29 | .fill(0)
30 | .map(() => ({})),
31 | };
32 |
33 | span.setAttribute("db.rows_affected", result.rowCount);
34 |
35 | span.setStatus({ code: opentelemetry.SpanStatusCode.OK });
36 | return result;
37 | } catch (error) {
38 | // Properly record errors in the span
39 | span.setStatus({
40 | code: opentelemetry.SpanStatusCode.ERROR,
41 | message: error instanceof Error ? error.message : "Unknown error",
42 | });
43 |
44 | if (error instanceof Error) {
45 | span.recordException(error);
46 | }
47 |
48 | throw error;
49 | } finally {
50 | span.end();
51 | }
52 | },
53 | );
54 | }
55 |
--------------------------------------------------------------------------------
/minishop-telemetry/src/util/trace-handler.ts:
--------------------------------------------------------------------------------
1 | import { FastifyRequest, FastifyReply } from "fastify";
2 | import {
3 | context,
4 | propagation,
5 | Span,
6 | SpanStatusCode,
7 | Tracer,
8 | trace,
9 | } from "@opentelemetry/api";
10 |
11 | /**
12 | * Creates a traced route handler with automatic context propagation
13 | *
14 | * @param spanName The name of the span to create
15 | * @param tracer The tracer to use for creating spans
16 | * @param handler The function to execute within the span context
17 | * @returns A Fastify route handler function
18 | */
19 | export function createTracedHandler(
20 | spanName: string,
21 | tracer: Tracer,
22 | handler: (
23 | request: FastifyRequest,
24 | reply: FastifyReply,
25 | span: Span,
26 | ) => Promise,
27 | ) {
28 | return async (request: FastifyRequest, reply: FastifyReply) => {
29 | // Extract propagated context from request headers
30 | const extractedContext = propagation.extract(
31 | context.active(),
32 | request.headers,
33 | );
34 |
35 | console.log(
36 | `** headers ${spanName} -> ${JSON.stringify(request.headers["traceparent"])}`,
37 | );
38 |
39 | // Run the handler within the extracted context
40 | return context.with(extractedContext, () => {
41 | return tracer.startActiveSpan(spanName, async (span) => {
42 | try {
43 | // Add useful attributes to the span
44 | span.setAttribute("http.method", request.method);
45 | span.setAttribute("http.url", request.url);
46 |
47 | // Execute the actual handler
48 | return await handler(request, reply, span);
49 | } catch (error) {
50 | // Record any errors
51 | span.recordException(error as Error);
52 | span.setStatus({ code: SpanStatusCode.ERROR });
53 | throw error;
54 | } finally {
55 | span.end();
56 | }
57 | });
58 | });
59 | };
60 | }
61 |
--------------------------------------------------------------------------------
/minishop-telemetry/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ES2022",
4 | "module": "NodeNext",
5 | "moduleResolution": "NodeNext",
6 | "lib": ["ES2022", "DOM"],
7 | "outDir": "./dist",
8 | "rootDir": "./src",
9 | "strict": false,
10 | "esModuleInterop": true,
11 | "allowSyntheticDefaultImports": true,
12 | "skipLibCheck": true,
13 | "forceConsistentCasingInFileNames": true,
14 | "resolveJsonModule": true
15 | },
16 | "include": ["src/**/*"],
17 | "exclude": ["node_modules"]
18 | }
19 |
--------------------------------------------------------------------------------
/namespace_drift/example.yaml:
--------------------------------------------------------------------------------
1 | kind: Namespace
2 | apiVersion: v1
3 | metadata:
4 | name: compare1
5 | labels:
6 | name: compare1
7 | ---
8 | apiVersion: apps/v1
9 | kind: Deployment
10 | metadata:
11 | name: product-catalog-app
12 | namespace: compare1
13 | labels:
14 | app: web-content
15 | function: delivery
16 | spec:
17 | selector:
18 | matchLabels:
19 | app: web-content
20 | replicas: 1
21 | template:
22 | metadata:
23 | labels:
24 | app: web-content
25 | spec:
26 | containers:
27 | - name: content-server
28 | image: nginx:1.14.2
29 | ports:
30 | - containerPort: 80
31 | ---
32 | kind: Namespace
33 | apiVersion: v1
34 | metadata:
35 | name: compare2
36 | labels:
37 | name: compare2
38 | ---
39 | apiVersion: apps/v1
40 | kind: Deployment
41 | metadata:
42 | name: product-catalog-app
43 | namespace: compare2
44 | labels:
45 | app: product-catalog
46 | function: listing
47 | spec:
48 | selector:
49 | matchLabels:
50 | app: product-catalog
51 | replicas: 1
52 | template:
53 | metadata:
54 | labels:
55 | app: product-catalog
56 | spec:
57 | containers:
58 | - name: catalog-server
59 | image: nginx:1.13.0
60 | ports:
61 | - containerPort: 80
62 |
--------------------------------------------------------------------------------
/oomkill/oomkill_deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: analytics-exporter
5 | spec:
6 | replicas: 1
7 | selector:
8 | matchLabels:
9 | app: analytics-exporter
10 | template:
11 | metadata:
12 | labels:
13 | app: analytics-exporter
14 | spec:
15 | containers:
16 | - name: memory-eater
17 | image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/memory-eater:1.0
18 | imagePullPolicy: Always
19 | args:
20 | - 40Mi
21 | - "0"
22 | - 80Mi
23 | - "400"
24 | - "2"
25 | resources:
26 | limits:
27 | memory: 100Mi
28 | requests:
29 | memory: 100Mi
30 | restartPolicy: Always
31 | nodeSelector:
32 | kubernetes.io/arch: amd64
33 |
--------------------------------------------------------------------------------
/oomkill/oomkill_job.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: batch/v1
2 | kind: Job
3 | metadata:
4 | name: analytics-exporter
5 | spec:
6 | ttlSecondsAfterFinished: 36000
7 | backoffLimit: 0
8 | template:
9 | spec:
10 | containers:
11 | - args:
12 | - 40Mi
13 | - "0"
14 | - 80Mi
15 | - "400"
16 | - "2"
17 | image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/memory-eater:1.0
18 | imagePullPolicy: Always
19 | name: memory-eater
20 | resources:
21 | limits:
22 | memory: 100Mi
23 | requests:
24 | memory: 100Mi
25 | restartPolicy: Never
26 | nodeSelector:
27 | kubernetes.io/arch: amd64
28 |
--------------------------------------------------------------------------------
/pending_pods/pending_pod_node_selector.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: user-profile-import
5 | spec:
6 | containers:
7 | - name: nginx
8 | image: nginx
9 | nodeSelector:
10 | label: someLabel
11 |
12 |
--------------------------------------------------------------------------------
/pending_pods/pending_pod_resources.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: user-profile-resources
5 | spec:
6 | replicas: 1
7 | selector:
8 | matchLabels:
9 | app: user-profile-resources
10 | template:
11 | metadata:
12 | labels:
13 | app: user-profile-resources
14 | spec:
15 | containers:
16 | - name: profile-getter
17 | image: nginx
18 | imagePullPolicy: IfNotPresent
19 | resources:
20 | requests:
21 | nvidia.com/gpu: 5
22 | cpu: 3
23 | memory: 5Gi
24 | limits:
25 | nvidia.com/gpu: 5
26 | cpu: 3
27 | memory: 5Gi
--------------------------------------------------------------------------------
/process_data.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import time
3 |
4 | global_i = -1
5 |
6 | # Configure logging
7 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
8 |
9 | def start_server():
10 | logging.info("Starting server...")
11 | time.sleep(1) # Simulate delay for starting the server
12 |
13 | def load_modules():
14 | logging.info("Loading modules...")
15 | time.sleep(1) # Simulate delay for loading modules
16 |
17 | def process_data():
18 | global global_i
19 | global_i = global_i+1
20 | if global_i % 50 == 0:
21 | logging.info('Processing data...')
22 | process_data()
23 |
24 | def main():
25 | start_server()
26 | load_modules()
27 | process_data()
28 |
29 | if __name__ == "__main__":
30 | main()
31 |
--------------------------------------------------------------------------------
/prometheus_rule_failure/bad_prometheus_rule.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: monitoring.coreos.com/v1
2 | kind: PrometheusRule
3 | metadata:
4 | labels:
5 | prometheus: kube-prometheus-stack-prometheus
6 | role: alert-rules
7 | release: robusta # change for release name
8 | name: bad-rules
9 | namespace: default # change for namespace
10 | spec:
11 | groups:
12 | - name: bad-rules.rules
13 | partial_response_strategy: ""
14 | rules:
15 | - alert: BadAlert
16 | annotations:
17 | description: BadAlert
18 | summary: this should fail rule evaluation
19 | expr: |
20 | node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace="kube-system"} / on(namespace) group_left(node,pod,container) node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace="kube-system"}
21 | labels:
22 | severity: none
--------------------------------------------------------------------------------
/pvc-misconfiguration/pvc-fix.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: PersistentVolumeClaim
3 | metadata:
4 | name: redis-standard-pvc
5 | spec:
6 | accessModes:
7 | - ReadWriteOnce
8 | resources:
9 | requests:
10 | storage: 1Gi
11 | storageClassName: gp2
--------------------------------------------------------------------------------
/pvc-misconfiguration/redis-deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: PersistentVolumeClaim
3 | metadata:
4 | name: redis-standard-pvc
5 | spec:
6 | accessModes:
7 | - ReadWriteOnce
8 | resources:
9 | requests:
10 | storage: 1Gi
11 | storageClassName: standard-v2
12 | ---
13 | apiVersion: apps/v1
14 | kind: Deployment
15 | metadata:
16 | name: redis
17 | spec:
18 | replicas: 1
19 | selector:
20 | matchLabels:
21 | app: redis
22 | template:
23 | metadata:
24 | labels:
25 | app: redis
26 | spec:
27 | containers:
28 | - name: redis
29 | image: redis:6.2.6
30 | command: ["redis-server", "--save", "60", "1", "--stop-writes-on-bgsave-error", "no", "--dir", "/data"]
31 | ports:
32 | - containerPort: 6379
33 | volumeMounts:
34 | - mountPath: /data
35 | name: redis-storage
36 | resources:
37 | requests:
38 | memory: "256Mi"
39 | cpu: "100m"
40 | limits:
41 | memory: "2Gi"
42 | cpu: "500m"
43 | volumes:
44 | - name: redis-storage
45 | persistentVolumeClaim:
46 | claimName: redis-standard-pvc
47 |
48 | ---
49 |
50 | apiVersion: v1
51 | kind: Service
52 | metadata:
53 | name: redis
54 | spec:
55 | ports:
56 | - port: 6379
57 | targetPort: 6379
58 | selector:
59 | app: redis
60 | type: ClusterIP
61 |
--------------------------------------------------------------------------------
/readiness_probe_fail/failing_readiness_probe.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: search-engine-service
5 | spec:
6 | containers:
7 | - name: get-details
8 | image: busybox
9 | command: ["sh", "-c", "while true; do echo 'Running...'; sleep 5; done"]
10 | readinessProbe:
11 | exec:
12 | command:
13 | - sh
14 | - -c
15 | - "exit 1"
16 | initialDelaySeconds: 5
17 | periodSeconds: 5
18 | failureThreshold: 3
--------------------------------------------------------------------------------
/slow-rds-query/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.10-slim
2 |
3 | # Set working directory
4 | WORKDIR /app
5 |
6 | # Copy requirements.txt
7 | COPY requirements.txt .
8 |
9 | # Install dependencies
10 | RUN pip install --no-cache-dir -r requirements.txt
11 |
12 | # Copy the FastAPI app
13 | COPY . .
14 |
15 | # Expose the ports
16 | EXPOSE 8000 8001
17 |
18 | # Run the FastAPI app
19 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
20 |
21 |
--------------------------------------------------------------------------------
/slow-rds-query/app.py:
--------------------------------------------------------------------------------
1 | import os
2 | import logging
3 | import time
4 | from fastapi import FastAPI, Request
5 | from fastapi.responses import HTMLResponse
6 | from sqlalchemy import create_engine, text
7 | from prometheus_fastapi_instrumentator import Instrumentator
8 |
9 | app = FastAPI()
10 |
11 | # Configure logging
12 | logging.basicConfig(level=logging.INFO)
13 | logger = logging.getLogger(__name__)
14 |
15 | # Database connection settings
16 | DB_USERNAME = os.getenv("DB_USERNAME")
17 | DB_PASSWORD = os.getenv("DB_PASSWORD")
18 | DB_HOST = os.getenv("DB_HOST")
19 | DB_DATABASE = os.getenv("DB_DATABASE")
20 | DB_URL = f"mysql+pymysql://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}/{DB_DATABASE}"
21 | STORED_PROCEDURE = "sp_CheckUserNotifications"
22 |
23 | # Initialize database connection
24 | engine = create_engine(DB_URL)
25 |
26 | # Add Prometheus middleware
27 | Instrumentator().instrument(app).expose(app)
28 |
29 | def check_promotional_notifications():
30 | logger.info("Connecting to promotions database to see if we should try to upsell user")
31 | try:
32 | with engine.connect() as connection:
33 | logger.info(f"Connecting to database at {DB_HOST}")
34 | start_time = time.time()
35 | logger.info(f"Fetching data using stored procedure: {STORED_PROCEDURE}")
36 | # Execute the stored procedure
37 | connection.execute(text(f"CALL {STORED_PROCEDURE}(@result);"))
38 | # Fetch the result
39 | result = connection.execute(text("SELECT @result AS UserNotifications;")).fetchall()
40 | end_time = time.time()
41 | logger.info(f"Database call completed in {end_time - start_time:.2f} seconds.")
42 | for row in result:
43 | notifications = row[0] # Access the first element of the tuple
44 | logger.info(f"Promotions result: {notifications}")
45 | return notifications
46 | except Exception as e:
47 | logger.error(f"Error checking for promotions: {e}")
48 | return False
49 |
50 | @app.get("/", response_class=HTMLResponse)
51 | def read_root():
52 | logger.info("Received request for checkout page.")
53 | start_time = time.time()
54 | has_promotions = check_promotional_notifications()
55 | end_time = time.time()
56 | logger.info(f"Page rendered in {end_time - start_time:.2f} seconds.")
57 | return f"""
58 |
59 |
60 | Checkout Status
61 |
62 |
63 | Success!
64 | Promotions: {has_promotions}
65 |
66 |
67 | """
68 |
69 | if __name__ == "__main__":
70 | # Start Prometheus metrics server
71 | start_http_server(8001)
72 | uvicorn.run(app, host="0.0.0.0", port=8000)
73 |
74 |
--------------------------------------------------------------------------------
/slow-rds-query/build.sh:
--------------------------------------------------------------------------------
1 | docker buildx build --platform linux/amd64 . -t us-central1-docker.pkg.dev/genuine-flight-317411/devel/rds-demo:v1
2 |
--------------------------------------------------------------------------------
/slow-rds-query/manifest.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: customer-orders
5 | spec:
6 | replicas: 1
7 | selector:
8 | matchLabels:
9 | app: customer-orders
10 | template:
11 | metadata:
12 | labels:
13 | app: customer-orders
14 | spec:
15 | containers:
16 | - name: fastapi-app
17 | image: us-central1-docker.pkg.dev/genuine-flight-317411/devel/rds-demo:v1
18 | ports:
19 | - containerPort: 8000
20 | - containerPort: 8001
21 | env:
22 | - name: DB_USERNAME
23 | valueFrom:
24 | secretKeyRef:
25 | name: db-secrets
26 | key: username
27 | - name: DB_PASSWORD
28 | valueFrom:
29 | secretKeyRef:
30 | name: db-secrets
31 | key: password
32 | - name: DB_HOST
33 | valueFrom:
34 | secretKeyRef:
35 | name: db-secrets
36 | key: host
37 | - name: DB_DATABASE
38 | valueFrom:
39 | secretKeyRef:
40 | name: db-secrets
41 | key: database
42 | - name: curl-sidecar
43 | image: curlimages/curl
44 | args:
45 | - /bin/sh
46 | - -c
47 | - while true; do curl -s http://localhost:8000; sleep 60; done
48 | ---
49 | apiVersion: v1
50 | kind: Service
51 | metadata:
52 | name: customer-orders-service
53 | labels:
54 | app: customer-orders
55 | spec:
56 | selector:
57 | app: customer-orders
58 | ports:
59 | - protocol: TCP
60 | port: 80
61 | targetPort: 8000
62 | name: http
63 | type: ClusterIP
64 | ---
65 | apiVersion: monitoring.coreos.com/v1
66 | kind: ServiceMonitor
67 | metadata:
68 | name: customer-orders-service-monitor
69 | labels:
70 | release: robusta
71 | spec:
72 | selector:
73 | matchLabels:
74 | app: customer-orders
75 | endpoints:
76 | - port: http
77 | path: /metrics
78 | interval: 15s
79 | namespaceSelector:
80 | matchNames:
81 | - default
82 | ---
83 | apiVersion: monitoring.coreos.com/v1
84 | kind: PrometheusRule
85 | metadata:
86 | name: customer-orders-alert-rules
87 | labels:
88 | release: robusta
89 | spec:
90 | groups:
91 | - name: customerorders.rules
92 | rules:
93 | - alert: HighLatencyForCustomerCheckout
94 | expr: rate(http_request_duration_seconds_sum[24h]) / (rate(http_request_duration_seconds_count[24h])) > 3
95 | for: 1m
96 | labels:
97 | severity: critical
98 | deployment: customer-orders
99 | namespace: default
100 | annotations:
101 | summary: "HTTP Requests to the '/checkout' endpoint in customer-orders-app are taking longer than 3 seconds"
102 | description: "HTTP Requests to the '/checkout' endpoint in customer-orders-app are taking longer than 3 seconds"
103 |
104 |
--------------------------------------------------------------------------------
/slow-rds-query/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi
2 | uvicorn[standard]
3 | sqlalchemy
4 | pymysql
5 | prometheus-fastapi-instrumentator
6 |
--------------------------------------------------------------------------------
/sock-shop/sock-shop.yaml:
--------------------------------------------------------------------------------
1 | # based on https://raw.githubusercontent.com/microservices-demo/microservices-demo/master/deploy/kubernetes/complete-demo.yaml
2 |
3 | ---
4 | apiVersion: v1
5 | kind: Namespace
6 | metadata:
7 | name: sock-shop
8 | ---
9 | apiVersion: apps/v1
10 | kind: Deployment
11 | metadata:
12 | name: carts
13 | labels:
14 | name: carts
15 | namespace: sock-shop
16 | spec:
17 | replicas: 1
18 | selector:
19 | matchLabels:
20 | name: carts
21 | template:
22 | metadata:
23 | labels:
24 | name: carts
25 | annotations:
26 | admission.datadoghq.com/dotnet-lib.version: "v2.53.2-musl"
27 | spec:
28 | containers:
29 | - name: carts
30 | image: weaveworksdemos/carts:0.4.8
31 | env:
32 | - name: JAVA_OPTS
33 | value: -Xms64m -Xmx128m -XX:+UseG1GC -Djava.security.egd=file:/dev/urandom -Dspring.zipkin.enabled=false
34 | resources:
35 | limits:
36 | cpu: 300m
37 | memory: 500Mi
38 | requests:
39 | cpu: 100m
40 | memory: 200Mi
41 | ports:
42 | - containerPort: 80
43 | securityContext:
44 | runAsNonRoot: true
45 | runAsUser: 10001
46 | capabilities:
47 | drop:
48 | - all
49 | add:
50 | - NET_BIND_SERVICE
51 | readOnlyRootFilesystem: true
52 | volumeMounts:
53 | - mountPath: /tmp
54 | name: tmp-volume
55 | volumes:
56 | - name: tmp-volume
57 | emptyDir:
58 | medium: Memory
59 | nodeSelector:
60 | beta.kubernetes.io/os: linux
61 | ---
62 | apiVersion: v1
63 | kind: Service
64 | metadata:
65 | name: carts
66 | annotations:
67 | prometheus.io/scrape: 'true'
68 | labels:
69 | name: carts
70 | namespace: sock-shop
71 | spec:
72 | ports:
73 | # the port that this service should serve on
74 | - port: 80
75 | targetPort: 80
76 | selector:
77 | name: carts
78 | ---
79 | apiVersion: apps/v1
80 | kind: Deployment
81 | metadata:
82 | name: carts-db
83 | labels:
84 | name: carts-db
85 | namespace: sock-shop
86 | spec:
87 | replicas: 1
88 | selector:
89 | matchLabels:
90 | name: carts-db
91 | template:
92 | metadata:
93 | labels:
94 | name: carts-db
95 | spec:
96 | containers:
97 | - name: carts-db
98 | image: mongo
99 | ports:
100 | - name: mongo
101 | containerPort: 27017
102 | securityContext:
103 | capabilities:
104 | drop:
105 | - all
106 | add:
107 | - CHOWN
108 | - SETGID
109 | - SETUID
110 | readOnlyRootFilesystem: true
111 | volumeMounts:
112 | - mountPath: /tmp
113 | name: tmp-volume
114 | volumes:
115 | - name: tmp-volume
116 | emptyDir:
117 | medium: Memory
118 | nodeSelector:
119 | beta.kubernetes.io/os: linux
120 | ---
121 | apiVersion: v1
122 | kind: Service
123 | metadata:
124 | name: carts-db
125 | labels:
126 | name: carts-db
127 | namespace: sock-shop
128 | spec:
129 | ports:
130 | # the port that this service should serve on
131 | - port: 27017
132 | targetPort: 27017
133 | selector:
134 | name: carts-db
135 | ---
136 | apiVersion: apps/v1
137 | kind: Deployment
138 | metadata:
139 | name: catalogue
140 | labels:
141 | name: catalogue
142 | namespace: sock-shop
143 | spec:
144 | replicas: 1
145 | selector:
146 | matchLabels:
147 | name: catalogue
148 | template:
149 | metadata:
150 | labels:
151 | name: catalogue
152 | spec:
153 | containers:
154 | - name: catalogue
155 | image: weaveworksdemos/catalogue:0.3.5
156 | command: ["/app"]
157 | args:
158 | - -port=80
159 | resources:
160 | limits:
161 | cpu: 200m
162 | memory: 200Mi
163 | requests:
164 | cpu: 100m
165 | memory: 100Mi
166 | ports:
167 | - containerPort: 80
168 | securityContext:
169 | runAsNonRoot: true
170 | runAsUser: 10001
171 | capabilities:
172 | drop:
173 | - all
174 | add:
175 | - NET_BIND_SERVICE
176 | readOnlyRootFilesystem: true
177 | livenessProbe:
178 | httpGet:
179 | path: /health
180 | port: 80
181 | initialDelaySeconds: 300
182 | periodSeconds: 3
183 | readinessProbe:
184 | httpGet:
185 | path: /health
186 | port: 80
187 | initialDelaySeconds: 180
188 | periodSeconds: 3
189 | nodeSelector:
190 | beta.kubernetes.io/os: linux
191 | ---
192 | apiVersion: v1
193 | kind: Service
194 | metadata:
195 | name: catalogue
196 | annotations:
197 | prometheus.io/scrape: 'true'
198 | labels:
199 | name: catalogue
200 | namespace: sock-shop
201 | spec:
202 | ports:
203 | # the port that this service should serve on
204 | - port: 80
205 | targetPort: 80
206 | selector:
207 | name: catalogue
208 | ---
209 | apiVersion: apps/v1
210 | kind: Deployment
211 | metadata:
212 | name: catalogue-db
213 | labels:
214 | name: catalogue-db
215 | namespace: sock-shop
216 | spec:
217 | replicas: 1
218 | selector:
219 | matchLabels:
220 | name: catalogue-db
221 | template:
222 | metadata:
223 | labels:
224 | name: catalogue-db
225 | spec:
226 | containers:
227 | - name: catalogue-db
228 | image: weaveworksdemos/catalogue-db:0.3.0
229 | env:
230 | - name: MYSQL_ROOT_PASSWORD
231 | value: fake_password
232 | - name: MYSQL_DATABASE
233 | value: socksdb
234 | ports:
235 | - name: mysql
236 | containerPort: 3306
237 | nodeSelector:
238 | beta.kubernetes.io/os: linux
239 | ---
240 | apiVersion: v1
241 | kind: Service
242 | metadata:
243 | name: catalogue-db
244 | labels:
245 | name: catalogue-db
246 | namespace: sock-shop
247 | spec:
248 | ports:
249 | # the port that this service should serve on
250 | - port: 3306
251 | targetPort: 3306
252 | selector:
253 | name: catalogue-db
254 | ---
255 | apiVersion: apps/v1
256 | kind: Deployment
257 | metadata:
258 | name: front-end
259 | namespace: sock-shop
260 | spec:
261 | replicas: 1
262 | selector:
263 | matchLabels:
264 | name: front-end
265 | template:
266 | metadata:
267 | labels:
268 | name: front-end
269 | annotations:
270 | admission.datadoghq.com/dotnet-lib.version: "v2.53.2-musl"
271 | spec:
272 | containers:
273 | - name: front-end
274 | image: weaveworksdemos/front-end:0.3.12
275 | resources:
276 | limits:
277 | cpu: 300m
278 | memory: 1000Mi
279 | requests:
280 | cpu: 100m
281 | memory: 300Mi
282 | ports:
283 | - containerPort: 8079
284 | env:
285 | - name: SESSION_REDIS
286 | value: "true"
287 | securityContext:
288 | runAsNonRoot: true
289 | runAsUser: 10001
290 | capabilities:
291 | drop:
292 | - all
293 | readOnlyRootFilesystem: true
294 | livenessProbe:
295 | httpGet:
296 | path: /
297 | port: 8079
298 | initialDelaySeconds: 300
299 | periodSeconds: 3
300 | readinessProbe:
301 | httpGet:
302 | path: /
303 | port: 8079
304 | initialDelaySeconds: 30
305 | periodSeconds: 3
306 | nodeSelector:
307 | beta.kubernetes.io/os: linux
308 | ---
309 | apiVersion: v1
310 | kind: Service
311 | metadata:
312 | name: front-end
313 | annotations:
314 | prometheus.io/scrape: 'true'
315 | labels:
316 | name: front-end
317 | namespace: sock-shop
318 | spec:
319 | type: NodePort
320 | ports:
321 | - port: 80
322 | targetPort: 8079
323 | nodePort: 30001
324 | selector:
325 | name: front-end
326 | ---
327 | apiVersion: apps/v1
328 | kind: Deployment
329 | metadata:
330 | name: orders
331 | labels:
332 | name: orders
333 | namespace: sock-shop
334 | spec:
335 | replicas: 1
336 | selector:
337 | matchLabels:
338 | name: orders
339 | template:
340 | metadata:
341 | labels:
342 | name: orders
343 | annotations:
344 | admission.datadoghq.com/dotnet-lib.version: "v2.53.2-musl"
345 | spec:
346 | containers:
347 | - name: orders
348 | image: weaveworksdemos/orders:0.4.7
349 | env:
350 | - name: JAVA_OPTS
351 | value: -Xms64m -Xmx128m -XX:+UseG1GC -Djava.security.egd=file:/dev/urandom -Dspring.zipkin.enabled=false
352 | resources:
353 | limits:
354 | cpu: 500m
355 | memory: 500Mi
356 | requests:
357 | cpu: 100m
358 | memory: 300Mi
359 | ports:
360 | - containerPort: 80
361 | securityContext:
362 | runAsNonRoot: true
363 | runAsUser: 10001
364 | capabilities:
365 | drop:
366 | - all
367 | add:
368 | - NET_BIND_SERVICE
369 | readOnlyRootFilesystem: true
370 | volumeMounts:
371 | - mountPath: /tmp
372 | name: tmp-volume
373 | volumes:
374 | - name: tmp-volume
375 | emptyDir:
376 | medium: Memory
377 | nodeSelector:
378 | beta.kubernetes.io/os: linux
379 | ---
380 | apiVersion: v1
381 | kind: Service
382 | metadata:
383 | name: orders
384 | annotations:
385 | prometheus.io/scrape: 'true'
386 | labels:
387 | name: orders
388 | namespace: sock-shop
389 | spec:
390 | ports:
391 | # the port that this service should serve on
392 | - port: 80
393 | targetPort: 80
394 | selector:
395 | name: orders
396 | ---
397 | apiVersion: apps/v1
398 | kind: Deployment
399 | metadata:
400 | name: orders-db
401 | labels:
402 | name: orders-db
403 | namespace: sock-shop
404 | spec:
405 | replicas: 1
406 | selector:
407 | matchLabels:
408 | name: orders-db
409 | template:
410 | metadata:
411 | labels:
412 | name: orders-db
413 | spec:
414 | containers:
415 | - name: orders-db
416 | image: mongo
417 | ports:
418 | - name: mongo
419 | containerPort: 27017
420 | securityContext:
421 | capabilities:
422 | drop:
423 | - all
424 | add:
425 | - CHOWN
426 | - SETGID
427 | - SETUID
428 | readOnlyRootFilesystem: true
429 | volumeMounts:
430 | - mountPath: /tmp
431 | name: tmp-volume
432 | volumes:
433 | - name: tmp-volume
434 | emptyDir:
435 | medium: Memory
436 | nodeSelector:
437 | beta.kubernetes.io/os: linux
438 | ---
439 | apiVersion: v1
440 | kind: Service
441 | metadata:
442 | name: orders-db
443 | labels:
444 | name: orders-db
445 | namespace: sock-shop
446 | spec:
447 | ports:
448 | # the port that this service should serve on
449 | - port: 27017
450 | targetPort: 27017
451 | selector:
452 | name: orders-db
453 | ---
454 | apiVersion: apps/v1
455 | kind: Deployment
456 | metadata:
457 | name: payment
458 | labels:
459 | name: payment
460 | namespace: sock-shop
461 | spec:
462 | replicas: 1
463 | selector:
464 | matchLabels:
465 | name: payment
466 | template:
467 | metadata:
468 | labels:
469 | name: payment
470 | spec:
471 | containers:
472 | - name: payment
473 | image: weaveworksdemos/payment:0.4.3
474 | resources:
475 | limits:
476 | cpu: 200m
477 | memory: 200Mi
478 | requests:
479 | cpu: 99m
480 | memory: 100Mi
481 | ports:
482 | - containerPort: 80
483 | securityContext:
484 | runAsNonRoot: true
485 | runAsUser: 10001
486 | capabilities:
487 | drop:
488 | - all
489 | add:
490 | - NET_BIND_SERVICE
491 | readOnlyRootFilesystem: true
492 | livenessProbe:
493 | httpGet:
494 | path: /health
495 | port: 80
496 | initialDelaySeconds: 300
497 | periodSeconds: 3
498 | readinessProbe:
499 | httpGet:
500 | path: /health
501 | port: 80
502 | initialDelaySeconds: 180
503 | periodSeconds: 3
504 | nodeSelector:
505 | beta.kubernetes.io/os: linux
506 | ---
507 | apiVersion: v1
508 | kind: Service
509 | metadata:
510 | name: payment
511 | annotations:
512 | prometheus.io/scrape: 'true'
513 | labels:
514 | name: payment
515 | namespace: sock-shop
516 | spec:
517 | ports:
518 | # the port that this service should serve on
519 | - port: 80
520 | targetPort: 80
521 | selector:
522 | name: payment
523 | ---
524 | apiVersion: apps/v1
525 | kind: Deployment
526 | metadata:
527 | name: queue-master
528 | labels:
529 | name: queue-master
530 | namespace: sock-shop
531 | spec:
532 | replicas: 1
533 | selector:
534 | matchLabels:
535 | name: queue-master
536 | template:
537 | metadata:
538 | labels:
539 | name: queue-master
540 | annotations:
541 | admission.datadoghq.com/dotnet-lib.version: "v2.53.2-musl"
542 | spec:
543 | containers:
544 | - name: queue-master
545 | image: weaveworksdemos/queue-master:0.3.1
546 | env:
547 | - name: JAVA_OPTS
548 | value: -Xms64m -Xmx128m -XX:+UseG1GC -Djava.security.egd=file:/dev/urandom -Dspring.zipkin.enabled=false
549 | resources:
550 | limits:
551 | cpu: 300m
552 | memory: 500Mi
553 | requests:
554 | cpu: 100m
555 | memory: 300Mi
556 | ports:
557 | - containerPort: 80
558 | nodeSelector:
559 | beta.kubernetes.io/os: linux
560 | ---
561 | apiVersion: v1
562 | kind: Service
563 | metadata:
564 | name: queue-master
565 | annotations:
566 | prometheus.io/scrape: 'true'
567 | labels:
568 | name: queue-master
569 | namespace: sock-shop
570 | spec:
571 | ports:
572 | # the port that this service should serve on
573 | - port: 80
574 | targetPort: 80
575 | selector:
576 | name: queue-master
577 | ---
578 | apiVersion: apps/v1
579 | kind: Deployment
580 | metadata:
581 | name: rabbitmq
582 | labels:
583 | name: rabbitmq
584 | namespace: sock-shop
585 | spec:
586 | replicas: 1
587 | selector:
588 | matchLabels:
589 | name: rabbitmq
590 | template:
591 | metadata:
592 | labels:
593 | name: rabbitmq
594 | annotations:
595 | prometheus.io/scrape: "false"
596 | spec:
597 | containers:
598 | - name: rabbitmq
599 | image: rabbitmq:3.6.8-management
600 | ports:
601 | - containerPort: 15672
602 | name: management
603 | - containerPort: 5672
604 | name: rabbitmq
605 | securityContext:
606 | capabilities:
607 | drop:
608 | - all
609 | add:
610 | - CHOWN
611 | - SETGID
612 | - SETUID
613 | - DAC_OVERRIDE
614 | readOnlyRootFilesystem: true
615 | - name: rabbitmq-exporter
616 | image: kbudde/rabbitmq-exporter
617 | ports:
618 | - containerPort: 9090
619 | name: exporter
620 | nodeSelector:
621 | beta.kubernetes.io/os: linux
622 | ---
623 | apiVersion: v1
624 | kind: Service
625 | metadata:
626 | name: rabbitmq
627 | annotations:
628 | prometheus.io/scrape: 'true'
629 | prometheus.io/port: '9090'
630 | labels:
631 | name: rabbitmq
632 | namespace: sock-shop
633 | spec:
634 | ports:
635 | # the port that this service should serve on
636 | - port: 5672
637 | name: rabbitmq
638 | targetPort: 5672
639 | - port: 9090
640 | name: exporter
641 | targetPort: exporter
642 | protocol: TCP
643 | selector:
644 | name: rabbitmq
645 | ---
646 | apiVersion: apps/v1
647 | kind: Deployment
648 | metadata:
649 | name: session-db
650 | labels:
651 | name: session-db
652 | namespace: sock-shop
653 | spec:
654 | replicas: 1
655 | selector:
656 | matchLabels:
657 | name: session-db
658 | template:
659 | metadata:
660 | labels:
661 | name: session-db
662 | annotations:
663 | prometheus.io.scrape: "false"
664 | admission.datadoghq.com/dotnet-lib.version: "v2.53.2-musl"
665 | spec:
666 | containers:
667 | - name: session-db
668 | image: redis:alpine
669 | ports:
670 | - name: redis
671 | containerPort: 6379
672 | securityContext:
673 | capabilities:
674 | drop:
675 | - all
676 | add:
677 | - CHOWN
678 | - SETGID
679 | - SETUID
680 | readOnlyRootFilesystem: true
681 | nodeSelector:
682 | beta.kubernetes.io/os: linux
683 | ---
684 | apiVersion: v1
685 | kind: Service
686 | metadata:
687 | name: session-db
688 | labels:
689 | name: session-db
690 | namespace: sock-shop
691 | spec:
692 | ports:
693 | # the port that this service should serve on
694 | - port: 6379
695 | targetPort: 6379
696 | selector:
697 | name: session-db
698 | ---
699 | apiVersion: apps/v1
700 | kind: Deployment
701 | metadata:
702 | name: shipping
703 | labels:
704 | name: shipping
705 | namespace: sock-shop
706 | spec:
707 | replicas: 1
708 | selector:
709 | matchLabels:
710 | name: shipping
711 | template:
712 | metadata:
713 | labels:
714 | name: shipping
715 | annotations:
716 | admission.datadoghq.com/dotnet-lib.version: "v2.53.2-musl"
717 | spec:
718 | containers:
719 | - name: shipping
720 | image: weaveworksdemos/shipping:0.4.8
721 | env:
722 | - name: ZIPKIN
723 | value: zipkin.jaeger.svc.cluster.local
724 | - name: JAVA_OPTS
725 | value: -Xms64m -Xmx128m -XX:+UseG1GC -Djava.security.egd=file:/dev/urandom -Dspring.zipkin.enabled=false
726 | resources:
727 | limits:
728 | cpu: 300m
729 | memory: 500Mi
730 | requests:
731 | cpu: 100m
732 | memory: 300Mi
733 | ports:
734 | - containerPort: 80
735 | securityContext:
736 | runAsNonRoot: true
737 | runAsUser: 10001
738 | capabilities:
739 | drop:
740 | - all
741 | add:
742 | - NET_BIND_SERVICE
743 | readOnlyRootFilesystem: true
744 | volumeMounts:
745 | - mountPath: /tmp
746 | name: tmp-volume
747 | volumes:
748 | - name: tmp-volume
749 | emptyDir:
750 | medium: Memory
751 | nodeSelector:
752 | beta.kubernetes.io/os: linux
753 | ---
754 | apiVersion: v1
755 | kind: Service
756 | metadata:
757 | name: shipping
758 | annotations:
759 | prometheus.io/scrape: 'true'
760 | labels:
761 | name: shipping
762 | namespace: sock-shop
763 | spec:
764 | ports:
765 | # the port that this service should serve on
766 | - port: 80
767 | targetPort: 80
768 | selector:
769 | name: shipping
770 |
771 | ---
772 | apiVersion: apps/v1
773 | kind: Deployment
774 | metadata:
775 | name: user
776 | labels:
777 | name: user
778 | namespace: sock-shop
779 | spec:
780 | replicas: 1
781 | selector:
782 | matchLabels:
783 | name: user
784 | template:
785 | metadata:
786 | labels:
787 | name: user
788 | spec:
789 | containers:
790 | - name: user
791 | image: weaveworksdemos/user:0.4.7
792 | resources:
793 | limits:
794 | cpu: 300m
795 | memory: 200Mi
796 | requests:
797 | cpu: 100m
798 | memory: 100Mi
799 | ports:
800 | - containerPort: 80
801 | env:
802 | - name: mongo
803 | value: user-db:27017
804 | securityContext:
805 | runAsNonRoot: true
806 | runAsUser: 10001
807 | capabilities:
808 | drop:
809 | - all
810 | add:
811 | - NET_BIND_SERVICE
812 | readOnlyRootFilesystem: true
813 | livenessProbe:
814 | httpGet:
815 | path: /health
816 | port: 80
817 | initialDelaySeconds: 300
818 | periodSeconds: 3
819 | readinessProbe:
820 | httpGet:
821 | path: /health
822 | port: 80
823 | initialDelaySeconds: 180
824 | periodSeconds: 3
825 | nodeSelector:
826 | beta.kubernetes.io/os: linux
827 | ---
828 | apiVersion: v1
829 | kind: Service
830 | metadata:
831 | name: user
832 | annotations:
833 | prometheus.io/scrape: 'true'
834 | labels:
835 | name: user
836 | namespace: sock-shop
837 | spec:
838 | ports:
839 | # the port that this service should serve on
840 | - port: 80
841 | targetPort: 80
842 | selector:
843 | name: user
844 |
845 | ---
846 | apiVersion: apps/v1
847 | kind: Deployment
848 | metadata:
849 | name: user-db
850 | labels:
851 | name: user-db
852 | namespace: sock-shop
853 | spec:
854 | replicas: 1
855 | selector:
856 | matchLabels:
857 | name: user-db
858 | template:
859 | metadata:
860 | labels:
861 | name: user-db
862 | spec:
863 | containers:
864 | - name: user-db
865 | image: weaveworksdemos/user-db:0.3.0
866 |
867 | ports:
868 | - name: mongo
869 | containerPort: 27017
870 | securityContext:
871 | capabilities:
872 | drop:
873 | - all
874 | add:
875 | - CHOWN
876 | - SETGID
877 | - SETUID
878 | readOnlyRootFilesystem: true
879 | volumeMounts:
880 | - mountPath: /tmp
881 | name: tmp-volume
882 | volumes:
883 | - name: tmp-volume
884 | emptyDir:
885 | medium: Memory
886 | nodeSelector:
887 | beta.kubernetes.io/os: linux
888 | ---
889 | apiVersion: v1
890 | kind: Service
891 | metadata:
892 | name: user-db
893 | labels:
894 | name: user-db
895 | namespace: sock-shop
896 | spec:
897 | ports:
898 | # the port that this service should serve on
899 | - port: 27017
900 | targetPort: 27017
901 | selector:
902 | name: user-db
903 |
904 |
--------------------------------------------------------------------------------
/sock-shop/trigger-carts-issue.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: batch/v1
2 | kind: Job
3 | metadata:
4 | name: curl-job
5 | namespace: sock-shop
6 | spec:
7 | template:
8 | spec:
9 | containers:
10 | - name: curl-container
11 | image: curlimages/curl:latest
12 | command: ["/bin/sh"]
13 | args:
14 | - -c
15 | - |
16 | # this works, but not if triggered too frequently (then the svc might not have ready endpoints so communicating with it fails?)
17 | SERVICE_URL="http://front-end.sock-shop.svc.cluster.local:80"
18 | echo "Logging in"
19 | curl ${SERVICE_URL}/login -u Eve_Berger:eve -c cookie.txt
20 | echo "Logged in"
21 | curl ${SERVICE_URL}/orders -b cookie.txt || true
22 | echo "Done triggering bug"
23 | restartPolicy: Never
24 | volumes:
25 | - name: output-volume
26 | emptyDir: {}
27 | backoffLimit: 4
28 |
--------------------------------------------------------------------------------