├── .github └── workflows │ └── build_and_push_images.yml ├── .gitignore ├── 01-welcome-setup.md ├── 02-tracing-introduction.md ├── 03-auto-instrumentation.md ├── 04-manual-instrumentation.md ├── 05-sampling.md ├── 06-RED-metrics.md ├── 07-ottl.md ├── 08-k8s-tracing.md ├── LICENSE ├── README.md ├── app ├── README.md ├── api-server │ └── tracing-config.yaml ├── backend1 │ ├── Dockerfile │ ├── app.py │ ├── requirements.txt │ └── run.sh ├── backend2 │ ├── .gitignore │ ├── Dockerfile │ ├── build.gradle │ ├── build │ │ └── libs │ │ │ └── dice-0.0.1-SNAPSHOT.jar │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── gradlew │ ├── gradlew.bat │ ├── run.sh │ ├── settings.gradle │ └── src │ │ └── main │ │ ├── java │ │ └── io │ │ │ └── opentelemetry │ │ │ └── dice │ │ │ ├── DiceApplication.java │ │ │ └── RollController.java │ │ └── resources │ │ └── application.properties ├── backend3 │ ├── Dockerfile │ ├── Program.cs │ ├── Properties │ │ └── launchSettings.json │ ├── appsettings.Development.json │ ├── appsettings.json │ └── backend3.csproj ├── backend4-no-instrumentation │ ├── Dockerfile │ ├── go.mod │ ├── go.sum │ └── main.go ├── backend4 │ ├── Dockerfile │ ├── go.mod │ ├── go.sum │ └── main.go ├── collector-docker.yaml ├── frontend │ ├── .gitignore │ ├── Dockerfile │ ├── index.js │ ├── instrument.js │ ├── package-lock.json │ ├── package.json │ └── run.sh ├── instrumentation-head-sampling.yaml ├── instrumentation-java-custom-config.yaml ├── instrumentation-replace-backend2.yaml ├── instrumentation.yaml ├── k8s.yaml ├── loadgen │ ├── Dockerfile │ └── run.sh ├── otel-daemonset.yaml ├── otel-env └── package-lock.json ├── backend ├── 01-backend.yaml ├── 03-collector.yaml ├── 05-collector-1.yaml ├── 05-collector-2.yaml ├── 06-backend.yaml ├── 06-collector.yaml └── 07-collector.yaml ├── images ├── api-server.png ├── jaeger-capture-custom-headers.jpg ├── jaeger-spm.png ├── jaeger-tail-sampling.jpg ├── jaeger-trace-detail.jpg ├── jaeger-trace-search.jpg ├── jaeger-with-span.jpg ├── otel-collector.png ├── prometheus_javaagent_metrics_list.jpg ├── prometheus_javaagent_red_metrics.jpg ├── prometheus_spanmetrics.png ├── rolldice-delay.png ├── rolldice-error.png ├── sampling-comparision.jpg ├── sampling-venn.svg ├── scaling-otel-collector.jpg ├── terminated.png ├── terminating.png └── tracing-setup.png ├── intro-slides.pdf ├── kind-1.29.yaml └── tracing-theory.pdf /.github/workflows/build_and_push_images.yml: -------------------------------------------------------------------------------- 1 | name: "Build and Push Images" 2 | 3 | on: 4 | push: 5 | paths: 6 | - "app/**" 7 | 8 | jobs: 9 | build-and-push-image: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | contents: read 13 | packages: write 14 | 15 | strategy: 16 | matrix: 17 | app: 18 | - frontend 19 | - backend1 20 | - backend2 21 | - backend4 22 | - loadgen 23 | 24 | steps: 25 | - name: Checkout repository 26 | uses: actions/checkout@v4 27 | 28 | - name: Log in to the Container registry 29 | uses: docker/login-action@v3 30 | with: 31 | registry: ghcr.io 32 | username: ${{ github.actor }} 33 | password: ${{ secrets.GITHUB_TOKEN }} 34 | 35 | - name: Set up QEMU 36 | uses: docker/setup-qemu-action@v3 37 | 38 | - name: Set up Docker Buildx 39 | uses: docker/setup-buildx-action@v3 40 | with: 41 | config-inline: | 42 | [worker.oci] 43 | max-parallelism = 2 44 | 45 | - name: Build and push images 46 | uses: docker/build-push-action@v5 47 | with: 48 | context: ./app/${{ matrix.app }} 49 | file: ./app/${{ matrix.app }}/Dockerfile 50 | platforms: linux/amd64,linux/arm64 51 | push: true 52 | tags: ghcr.io/${{ github.repository }}-${{ matrix.app }} 53 | cache-from: type=gha 54 | cache-to: type=gha 55 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | # Go workspace file 21 | go.work 22 | -------------------------------------------------------------------------------- /01-welcome-setup.md: -------------------------------------------------------------------------------- 1 | # OpenTelemetry distributed tracing on Kubernetes tutorial 2 | 3 | Welcome to the OpenTelemetry distributed tracing on Kubernetes tutorial! 4 | This tutorial is continuation of: 5 | * [KubeCon NA 2023 OpenTelemetry metrics on Kubernetes tutorial](https://github.com/pavolloffay/kubecon-na-2023-opentelemetry-kubernetes-metrics-tutorial). 6 | * [KubeCon EU 2023 OpenTelemetry on Kubernetes tutorial](https://github.com/pavolloffay/kubecon-eu-2023-opentelemetry-kubernetes-tutorial). 7 | 8 | Today we will focus on distributed tracing. The tutorial will cover using OpenTelemetry instrumentation, API/SDK, collector 9 | and deploying the stack on Kubernetes. The readmes cover also more advanced topics (collecting traces from Kubernetes, tracing with service meshes) that can be done offline. 10 | 11 | See [the agenda](./README.md#agenda) 12 | 13 | ## Setup infrastructure 14 | 15 | ### Kubectl 16 | 17 | Almost all the following steps in this tutorial require kubectl. Your used version should not differ more than +-1 from the used cluster version. Please follow [this](https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/#install-kubectl-binary-with-curl-on-linux) installation guide. 18 | 19 | ### Kind 20 | 21 | [Kind Quickstart](https://kind.sigs.k8s.io/docs/user/quick-start/). 22 | 23 | If [go](https://go.dev/) is installed on your machine, `kind` can be easily installed as follows: 24 | 25 | ```bash 26 | go install sigs.k8s.io/kind@v0.22.0 27 | ``` 28 | 29 | If this is not the case, simply download the [kind-v0.22.0](https://github.com/kubernetes-sigs/kind/releases/tag/v0.22.0) binary from the release page. (Other versions will probably work too. :cowboy_hat_face:) 30 | 31 | ### Create a workshop cluster 32 | 33 | After a successful installation, a cluster can be created as follows: 34 | 35 | ```bash 36 | kind create cluster --name=workshop --config=kind-1.29.yaml 37 | ``` 38 | 39 | Kind automatically sets the kube context to the created workshop cluster. We can easily check this by getting information about our nodes. 40 | 41 | ```bash 42 | kubectl get nodes 43 | ``` 44 | Expected is the following: 45 | 46 | ```bash 47 | NAME STATUS ROLES AGE VERSION 48 | workshop-control-plane Ready control-plane 75s v1.29.1 49 | ``` 50 | 51 | ### Cleanup 52 | ```bash 53 | kind delete cluster --name=workshop 54 | ``` 55 | 56 | ## Deploy initial services 57 | 58 | ### Deploy cert-manager 59 | 60 | [cert-manager](https://cert-manager.io/docs/) is used by OpenTelemetry operator to provision TLS certificates for admission webhooks. 61 | 62 | ```bash 63 | kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.11.0/cert-manager.yaml 64 | kubectl get pods -n cert-manager -w 65 | ``` 66 | 67 | ### Deploy OpenTelemetry operator 68 | 69 | ```bash 70 | kubectl apply -f https://github.com/open-telemetry/opentelemetry-operator/releases/download/v0.94.0/opentelemetry-operator.yaml 71 | kubectl get pods -n opentelemetry-operator-system -w 72 | ``` 73 | 74 | ### Deploy observability backend 75 | 76 | This course is all about Observabilty, so a backend is needed. If you don't have one, you can install Prometheus for metrics and Jaeger for traces as follows: 77 | 78 | ```bash 79 | kubectl apply -f https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/main/backend/01-backend.yaml 80 | kubectl get pods -n observability-backend -w 81 | ``` 82 | 83 | Afterwards, the backend can be found in the namespace `observability-backend`. 84 | 85 | ```bash 86 | kubectl port-forward -n observability-backend service/jaeger-query 16686:16686 87 | ``` 88 | 89 | Open it in the browser [localhost:16686](http://localhost:16686/) 90 | 91 | --- 92 | 93 | [Next steps](./02-tracing-introduction.md) 94 | -------------------------------------------------------------------------------- /02-tracing-introduction.md: -------------------------------------------------------------------------------- 1 | # Introduction to distributed tracing 2 | 3 | [Presentation](./tracing-theory.pdf) 4 | 5 | --- 6 | 7 | [Next steps](./03-auto-instrumentation.md) 8 | -------------------------------------------------------------------------------- /03-auto-instrumentation.md: -------------------------------------------------------------------------------- 1 | # Auto-instrumentation 2 | 3 | 4 | 5 | ## Application Description 6 | 7 | The sample application is a simple _"dice game"_, where two players roll a 8 | dice, and the player with the highest number wins. 9 | 10 | There are 3 microservices within this application: 11 | 12 | - Service `frontend` in Node.JS, that has an API endpoint `/` which takes two 13 | player names as query parameters (player1 and player2). The service calls 2 14 | down stream services (backend1, backend2), which each returning a random number 15 | between 1-6. The winner is computed and returned. 16 | - Service `backend1` in python, that has an API endpoint `/rolldice` which takes 17 | a player name as query parameter. The service returns a random number between 18 | 1 and 6. 19 | - Service `backend2` in Java, that also has an API endpoint `/rolldice` which 20 | takes a player name as query parameter. The service returns a random number 21 | between 1 and 6. 22 | 23 | Additionally there is a `loadgen` service, which utilizes `curl` to periodically 24 | call the frontend service. 25 | 26 | Let's assume player `alice` and `bob` use our service, here's a potential 27 | sequence diagram: 28 | 29 | ```mermaid 30 | sequenceDiagram 31 | loadgen->>frontend: /?player1=bob&player2=alice 32 | frontend->>backend1: /rolldice?player=bob 33 | frontend->>backend2: /rolldice?player=alice 34 | backend1-->>frontend: 3 35 | frontend-->>loadgen: bob rolls: 3 36 | backend2-->>frontend: 6 37 | frontend-->>loadgen: alice rolls: 6 38 | frontend-->>loadgen: alice wins 39 | ``` 40 | 41 | ### Deploy the app into Kubernetes 42 | 43 | Deploy the application into the kubernetes cluster. The app will be deployed into `tutorial-application` namespace. 44 | 45 | ```bash 46 | kubectl apply -f https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/main/app/k8s.yaml 47 | kubectl get pods -n tutorial-application -w 48 | ... 49 | NAME READY STATUS RESTARTS AGE 50 | backend1-deployment-577cf945b4-tz5kv 1/1 Running 0 62s 51 | backend2-deployment-59d4b47774-xbq84 1/1 Running 0 62s 52 | frontend-deployment-678795956d-zwg4q 1/1 Running 0 62s 53 | loadgen-deployment-5c7d6896f8-2fz6h 1/1 Running 0 62s 54 | ``` 55 | 56 | Now port-forward the frontend app: 57 | 58 | ```bash 59 | kubectl port-forward service/frontend-service -n tutorial-application 4000:4000 60 | ``` 61 | 62 | Open browser at [http://localhost:4000/](http://localhost:4000/). 63 | 64 | ## Manual or Automatic Instrumentation? 65 | 66 | To make your application emit traces, metrics & logs you can either instrument 67 | your application _manually_ or _automatically_: 68 | 69 | - Manual instrumentation means that you modify your code yourself: you initialize and 70 | configure the SDK, you load instrumentation libraries, you create your own spans, 71 | metrics using the API. 72 | Developers can use this approach to tune the observability of their application to 73 | their needs, but it requires a lot of initial time investment, expertise how (RPC) frameworks and client work and maintenance over time. 74 | - Automatic instrumentation means that you don't have to touch your code to get your 75 | application emit telemetry data. 76 | Automatic instrumentation is great to get you started with OpenTelemetry, and it is 77 | also valuable for Application Operators, who have no access or insights about the 78 | source code. 79 | 80 | In this chapter we will cover using OpenTelemetry auto-instrumentation. 81 | 82 | ## Instrument the demo application 83 | 84 | In this section we will deploy the app into Kubernetes and instrument it with OpenTelemetry auto-instrumentation 85 | using the [Instrumentation CRD](https://github.com/open-telemetry/opentelemetry-operator?tab=readme-ov-file#opentelemetry-auto-instrumentation-injection) provided by the OpenTelemetry operator. 86 | Then we will modify the app to create custom spans and collector additional attributes. 87 | 88 | ### Deploy OpenTelemetry collector 89 | 90 | ![OpenTelemetry Collector](images/otel-collector.png) 91 | 92 | Deploy OpenTelemetry collector that will receive data from the instrumented workloads. 93 | 94 | See the [OpenTelemetryCollector CR](./backend/03-collector.yaml). 95 | 96 | ```bash 97 | kubectl apply -f https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/main/backend/03-collector.yaml 98 | kubectl get pods -n observability-backend -w 99 | ``` 100 | 101 | ### Create instrumentation CR and see traces in the Jaeger console 102 | 103 | Now let's instrument the app with the `Instrumentation` CR and see traces in the Jaeger console. 104 | 105 | First the Instrumentation CR needs to be created in the `tutorial-application` namespace: 106 | 107 | See the [Instrumentation CR](./app/instrumentation.yaml). 108 | 109 | ```bash 110 | kubectl apply -f https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/main/app/instrumentation.yaml 111 | kubectl get pods -n tutorial-application -w 112 | ... 113 | NAME READY STATUS RESTARTS AGE 114 | backend1-deployment-577cf945b4-tz5kv 1/1 Running 0 8m59s 115 | backend2-deployment-59d4b47774-xbq84 1/1 Running 0 8m59s 116 | frontend-deployment-678795956d-zwg4q 1/1 Running 0 8m59s 117 | loadgen-deployment-5c7d6896f8-2fz6h 1/1 Running 0 8m59s 118 | ``` 119 | 120 | The `Instrumentation` CR does not instrument the workloads. The instrumentation needs to be enabled by annotating a pod: 121 | 122 | ```bash 123 | kubectl patch deployment frontend-deployment -n tutorial-application -p '{"spec": {"template":{"metadata":{"annotations":{"instrumentation.opentelemetry.io/inject-sdk":"true"}}}} }' 124 | kubectl patch deployment backend1-deployment -n tutorial-application -p '{"spec": {"template":{"metadata":{"annotations":{"instrumentation.opentelemetry.io/inject-python":"true"}}}} }' 125 | kubectl patch deployment backend2-deployment -n tutorial-application -p '{"spec": {"template":{"metadata":{"annotations":{"instrumentation.opentelemetry.io/inject-java":"true"}}}} }' 126 | kubectl get pods -n tutorial-application -w 127 | # Port forward again -> kubectl port-forward service/frontend-service -n tutorial-application 4000:4000 128 | ... 129 | NAME READY STATUS RESTARTS AGE 130 | backend1-deployment-559946d88-c6zq7 0/1 Init:0/1 0 1s 131 | backend2-deployment-5658ddfd6d-gz6ql 0/1 Init:0/1 0 1s 132 | frontend-deployment-79b9c46d76-n74gr 0/1 ContainerCreating 0 1s 133 | ``` 134 | 135 | See the `backend2` pod spec: 136 | 137 | ```bash 138 | kubectl describe pod backend2-deployment-5658ddfd6d-gz6ql -n tutorial-application 139 | ... 140 | Init Containers: 141 | opentelemetry-auto-instrumentation-java: 142 | Image: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-java:1.32.1 143 | Command: 144 | cp 145 | /javaagent.jar 146 | /otel-auto-instrumentation-java/javaagent.jar 147 | Mounts: 148 | /otel-auto-instrumentation-java from opentelemetry-auto-instrumentation-java (rw) 149 | /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-48z6x (ro) 150 | Containers: 151 | backend2: 152 | Image: ghcr.io/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial-backend2:latest 153 | Environment: 154 | OTEL_LOGS_EXPORTER: otlp 155 | JAVA_TOOL_OPTIONS: -javaagent:/otel-auto-instrumentation-java/javaagent.jar 156 | OTEL_SERVICE_NAME: backend2-deployment 157 | OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector.observability-backend.svc.cluster.local:4317 158 | OTEL_RESOURCE_ATTRIBUTES_POD_NAME: backend2-deployment-5658ddfd6d-gz6ql (v1:metadata.name) 159 | OTEL_RESOURCE_ATTRIBUTES_NODE_NAME: (v1:spec.nodeName) 160 | OTEL_PROPAGATORS: tracecontext,baggage,b3 161 | OTEL_TRACES_SAMPLER: parentbased_traceidratio 162 | OTEL_TRACES_SAMPLER_ARG: 1 163 | OTEL_RESOURCE_ATTRIBUTES: k8s.container.name=backend2,k8s.deployment.name=backend2-deployment,k8s.namespace.name=tutorial-application,k8s.node.name=$(OTEL_RESOURCE_ATTRIBUTES_NODE_NAME),k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME),k8s.replicaset.name=backend2-deployment-5658ddfd6d,service.version=latest 164 | Mounts: 165 | /otel-auto-instrumentation-java from opentelemetry-auto-instrumentation-java (rw) 166 | ``` 167 | 168 | Now let's execute some requests on the app [http://localhost:4000/](http://localhost:4000/) and see traces in the Jaeger console [http://localhost:16686/](http://localhost:16686/). 169 | 170 | ![Trace search](./images/jaeger-trace-search.jpg) 171 | ![Trace detail](./images/jaeger-trace-detail.jpg) 172 | 173 | In addition to traces in the Java auto-instrumentation also emits **logs** and **metrics**. 174 | The logs in our case are printed into the collector stdout via `debug` exporter and metrics are sent via OTLP HTTP into Prometheus. 175 | The OpenTelemetry spec defines that the following metrics should be collected: [HTTP metrics](https://opentelemetry.io/docs/specs/semconv/http/http-metrics/). 176 | 177 | ```bash 178 | kubectl logs deployment.apps/otel-collector -n observability-backend 179 | ... 180 | 2024-02-28T10:08:21.807Z info LogsExporter {"kind": "exporter", "data_type": "logs", "name": "debug", "resource logs": 1, "log records": 7} 181 | 2024-02-28T10:08:21.807Z info ResourceLog #0 182 | Resource SchemaURL: https://opentelemetry.io/schemas/1.21.0 183 | Resource attributes: 184 | -> container.id: Str(462d8e356c9b801d76edab5886730965f7f37b3d8b47d5eadfaea134141a35c1) 185 | -> host.arch: Str(amd64) 186 | -> host.name: Str(backend2-deployment-c7c8dc78c-wvhnk) 187 | -> k8s.container.name: Str(backend2) 188 | -> k8s.deployment.name: Str(backend2-deployment) 189 | -> k8s.namespace.name: Str(tutorial-application) 190 | -> k8s.node.name: Str(minikube) 191 | -> k8s.pod.name: Str(backend2-deployment-c7c8dc78c-wvhnk) 192 | -> k8s.replicaset.name: Str(backend2-deployment-c7c8dc78c) 193 | -> os.description: Str(Linux 6.5.12-100.fc37.x86_64) 194 | -> os.type: Str(linux) 195 | -> process.command_args: Slice(["/opt/java/openjdk/bin/java","-jar","./build/libs/dice-0.0.1-SNAPSHOT.jar"]) 196 | -> process.executable.path: Str(/opt/java/openjdk/bin/java) 197 | -> process.pid: Int(7) 198 | -> process.runtime.description: Str(Eclipse Adoptium OpenJDK 64-Bit Server VM 21.0.2+13-LTS) 199 | -> process.runtime.name: Str(OpenJDK Runtime Environment) 200 | -> process.runtime.version: Str(21.0.2+13-LTS) 201 | -> service.name: Str(backend2-deployment) 202 | -> service.version: Str(withspan) 203 | -> telemetry.auto.version: Str(1.32.1) 204 | -> telemetry.sdk.language: Str(java) 205 | -> telemetry.sdk.name: Str(opentelemetry) 206 | -> telemetry.sdk.version: Str(1.34.1) 207 | ScopeLogs #0 208 | ScopeLogs SchemaURL: 209 | InstrumentationScope org.apache.catalina.core.ContainerBase.[Tomcat].[localhost].[/] 210 | LogRecord #0 211 | ObservedTimestamp: 2024-02-28 10:08:21.178481174 +0000 UTC 212 | Timestamp: 2024-02-28 10:08:21.178 +0000 UTC 213 | SeverityText: INFO 214 | SeverityNumber: Info(9) 215 | Body: Str(Initializing Spring embedded WebApplicationContext) 216 | Trace ID: 3bde5d3ee82303571bba6e1136781fe4 217 | Span ID: 45de5d3ee82303571bba6e1136781fe4 218 | Flags: 0 219 | ScopeLogs #1 220 | ScopeLogs SchemaURL: 221 | InstrumentationScope io.opentelemetry.dice.DiceApplication 222 | LogRecord #0 223 | ObservedTimestamp: 2024-02-28 10:08:21.638118261 +0000 UTC 224 | Timestamp: 2024-02-28 10:08:21.638 +0000 UTC 225 | SeverityText: INFO 226 | SeverityNumber: Info(9) 227 | Body: Str(Started DiceApplication in 3.459 seconds (process running for 6.305)) 228 | Trace ID: 3bde5d3ee82303571bba6e1136781fe4 229 | Span ID: 46de5d3ee82303571bba6e1136781fe4 230 | Flags: 0 231 | 232 | 233 | kubectl logs -n tutorial-application deployment.apps/backend2-deployment 234 | ... 235 | Defaulted container "backend2" out of: backend2, opentelemetry-auto-instrumentation-java (init) 236 | Picked up JAVA_TOOL_OPTIONS: -javaagent:/otel-auto-instrumentation-java/javaagent.jar 237 | OpenJDK 64-Bit Server VM warning: Sharing is only supported for boot loader classes because bootstrap classpath has been appended 238 | [otel.javaagent 2024-03-12 17:35:52:181 +0000] [main] INFO io.opentelemetry.javaagent.tooling.VersionLogger - opentelemetry-javaagent - version: 1.32.1 239 | 240 | . ____ _ __ _ _ 241 | /\\ / ___'_ __ _ _(_)_ __ __ _ \ \ \ \ 242 | ( ( )\___ | '_ | '_| | '_ \/ _` | \ \ \ \ 243 | \\/ ___)| |_)| | | | | || (_| | ) ) ) ) 244 | ' |____| .__|_| |_|_| |_\__, | / / / / 245 | =========|_|==============|___/=/_/_/_/ 246 | :: Spring Boot :: (v3.0.5) 247 | 248 | 2024-03-12T17:35:55.712Z INFO 7 --- [ main] io.opentelemetry.dice.DiceApplication : Starting DiceApplication v0.0.1-SNAPSHOT using Java 21.0.2 with PID 7 (/usr/src/app/build/libs/dice-0.0.1-SNAPSHOT.jar started by root in /usr/src/app) 249 | 2024-03-12T17:35:55.749Z INFO 7 --- [ main] io.opentelemetry.dice.DiceApplication : No active profile set, falling back to 1 default profile: "default" 250 | 2024-03-12T17:35:57.556Z INFO 7 --- [ main] o.s.b.w.embedded.tomcat.TomcatWebServer : Tomcat initialized with port(s): 5165 (http) 251 | 2024-03-12T17:35:57.588Z INFO 7 --- [ main] o.apache.catalina.core.StandardService : Starting service [Tomcat] 252 | 2024-03-12T17:35:57.589Z INFO 7 --- [ main] o.apache.catalina.core.StandardEngine : Starting Servlet engine: [Apache Tomcat/10.1.7] 253 | 2024-03-12T17:35:57.667Z INFO 7 --- [ main] o.a.c.c.C.[Tomcat].[localhost].[/] : Initializing Spring embedded WebApplicationContext 254 | 2024-03-12T17:35:57.669Z INFO 7 --- [ main] w.s.c.ServletWebServerApplicationContext : Root WebApplicationContext: initialization completed in 1800 ms 255 | 2024-03-12T17:35:58.293Z INFO 7 --- [ main] o.s.b.w.embedded.tomcat.TomcatWebServer : Tomcat started on port(s): 5165 (http) with context path '' 256 | 2024-03-12T17:35:58.308Z INFO 7 --- [ main] io.opentelemetry.dice.DiceApplication : Started DiceApplication in 3.459 seconds (process running for 6.305) 257 | 2024-03-12T17:37:04.363Z INFO 7 --- [nio-5165-exec-1] o.a.c.c.C.[Tomcat].[localhost].[/] : Initializing Spring DispatcherServlet 'dispatcherServlet' 258 | 2024-03-12T17:37:04.364Z INFO 7 --- [nio-5165-exec-1] o.s.web.servlet.DispatcherServlet : Initializing Servlet 'dispatcherServlet' 259 | 2024-03-12T17:37:04.365Z INFO 7 --- [nio-5165-exec-1] o.s.web.servlet.DispatcherServlet : Completed initialization in 1 ms 260 | 2024-03-12T17:37:04.435Z INFO 7 --- [nio-5165-exec-1] io.opentelemetry.dice.RollController : Player 2 is rolling the dice: 2 261 | 2024-03-12T17:37:04.736Z WARN 7 --- [nio-5165-exec-3] io.opentelemetry.dice.RollController : Illegal number rolled, setting result to '1' 262 | 2024-03-12T17:37:04.737Z INFO 7 --- [nio-5165-exec-3] io.opentelemetry.dice.RollController : Player 2 is rolling the dice: 1 263 | ``` 264 | 265 | ```bash 266 | kubectl port-forward -n observability-backend service/prometheus 8080:80 267 | ``` 268 | Open Prometheus in the browser [localhost:8080](http://localhost:8080/graph?g0.expr=group%20(%7Bjob%3D%22backend2-deployment%22%7D)%20by%20(__name__)%0A&g0.tab=0&g0.stacked=0&g0.show_exemplars=0&g0.range_input=1h) 269 | 270 | ![Metrics from Java agent from backend2-deployment](./images/prometheus_javaagent_metrics_list.jpg) 271 | 272 | Open Prometheus in the browser [localhost:8080](http://localhost:8080/graph?g0.expr=http_server_duration_milliseconds_bucket%7Bhttp_status_code%3D%22200%22%7D%0A&g0.tab=0&g0.stacked=0&g0.show_exemplars=0&g0.range_input=1h) 273 | 274 | ![Server RED Metrics from Java agent](./images/prometheus_javaagent_red_metrics.jpg) 275 | 276 | ### Customize Java auto-instrumentation with config (capture more data) 277 | 278 | In this section we will configure the Java auto-instrumentation by modifying `Instrumentation` CR to: 279 | * create custom spans - for the main method of the application 280 | * capture server response HTTP headers 281 | 282 | See the [Java agent docs](https://opentelemetry.io/docs/languages/java/automatic/configuration/) with all the configuration options. 283 | 284 | See the [Instrumentation CR](./app/instrumentation-java-custom-config.yaml). 285 | 286 | ```bash 287 | kubectl apply -f https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/main/app/instrumentation-java-custom-config.yaml 288 | kubectl rollout restart deployment.apps/backend2-deployment -n tutorial-application 289 | kubectl get pods -w -n tutorial-application 290 | ``` 291 | 292 | ![Span from backend2-deployment](./images/jaeger-capture-custom-headers.jpg) 293 | 294 | ### Customize Java auto-instrumentation with code (capture more data) 295 | 296 | > [!NOTE] 297 | > This is an optional more advanced section. 298 | 299 | In this section we will modify [Java backend2](./app/backend2) service to: 300 | * create a new span to observe execution of a business method 301 | * attach attributes to span 302 | 303 | The OpenTelemetry Java auto-instrumentation supports `@WithSpan`, `@SpanAttribute` and `@AddingSpanAttributes` see the [documentation](https://opentelemetry.io/docs/languages/java/automatic/annotations/) and [javadoc](https://javadoc.io/doc/io.opentelemetry.instrumentation/opentelemetry-instrumentation-annotations/latest/io/opentelemetry/instrumentation/annotations/package-summary.html). 304 | 305 | Open the [RollController.java](./app/backend2/src/main/java/io/opentelemetry/dice/RollController.java) and use the annotations: 306 | 307 | ```java 308 | # app/backend2/build.gradle 309 | # implementation 'io.opentelemetry.instrumentation:opentelemetry-instrumentation-annotations:2.1.0' 310 | # implementation 'io.opentelemetry:opentelemetry-api:1.35.0' 311 | 312 | import io.opentelemetry.api.trace.Span; 313 | import io.opentelemetry.instrumentation.annotations.WithSpan; 314 | import io.opentelemetry.instrumentation.annotations.SpanAttribute; 315 | import io.opentelemetry.instrumentation.annotations.AddingSpanAttributes; 316 | 317 | @AddingSpanAttributes 318 | @GetMapping("/rolldice") 319 | public String index(@SpanAttribute("player") @RequestParam("player") Optional player) { 320 | 321 | @WithSpan 322 | public int getRandomNumber(@SpanAttribute("min") int min, @SpanAttribute("max") int max) { 323 | int result = (int) ((Math.random() * (max - min)) + min); 324 | Span span = Span.current(); 325 | span.setAttribute("result", result); 326 | return result; 327 | } 328 | ``` 329 | 330 | Compile it and deploy: 331 | ```bash 332 | cd app/backend2 333 | 334 | # Use minikube's docker registry 335 | # eval $(minikube -p minikube docker-env) 336 | docker build -t ghcr.io/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial-backend2:withspan . 337 | # docker push ghcr.io/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial-backend2:withspan 338 | 339 | kubectl set image deployment.apps/backend2-deployment backend2=ghcr.io/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial-backend2:withspan -n tutorial-application 340 | kubectl get pods -w -n tutorial-application 341 | ``` 342 | 343 | ![Span from backend2-deployment](./images/jaeger-with-span.jpg) 344 | 345 | --- 346 | [Next steps](./04-manual-instrumentation.md) 347 | -------------------------------------------------------------------------------- /04-manual-instrumentation.md: -------------------------------------------------------------------------------- 1 | # Manual instrumentation using the OpenTelemetry SDK 2 | 3 | This tutorial section covers the manual instrumentation of a go application with the opentelemetry-sdk. 4 | 5 | As a basis for the instrumentation we use [backend4-uninstrumented](./app/backend4-no-instrumentation/main.go) ([backend4-instrumented](./app/backend4/main.go)). To compile the application you need [go 1.22 or newer](https://go.dev/doc/install). 6 | 7 | # Initialize OpenTelemetry-go-sdk 8 | 9 | Before we start instrumenting our application, we should create a standard tracer and register it globally. This ensures that the same exporter is used throughout the application and that the same process steps are performed. 10 | 11 | ```diff 12 | func main() { 13 | + otelExporter, err := otlptracegrpc.New(context.Background()) 14 | + if err != nil { 15 | + fmt.Printf("failed to create trace exporter: %s\n", err) 16 | + os.Exit(1) 17 | + } 18 | + tp := sdktrace.NewTracerProvider(sdktrace.WithBatcher(otelExporter)) 19 | + otel.SetTracerProvider(tp) 20 | ... 21 | ``` 22 | 23 | 24 | ### Create and register a global trace provider 25 | 26 | First we have to create a tracer that identifies our specific service. 27 | 28 | ```diff 29 | +var tracer = otel.GetTracerProvider().Tracer("github.com/kubecon-eu-2024/backend") 30 | ``` 31 | 32 | ## Identifying critical path and operations for instrumentation 33 | 34 | As we begin to instrument our application, we should remember that no data is free. By focusing on the most critical endpoint and its associated functions, we can ensure that our instrumentation efforts are focused on capturing the most valuable telemetry. Let's explore how we can instrument these key components to improve the observability and reliability of our application. 35 | 36 | In our example backend, the entry point `/rolldice` starts our critial operation. 37 | 38 | Using the prevously defined tracer, we can create a new spans. The method `Start` creates a span and a `context.Context` containing the newly-created span. If the context.Context provided in `ctx` contains a Span then the newly-created Span will be a child of that span, otherwise it will be a root span. 39 | 40 | Keep in mind that any Span that is created `MUST` also be ended. This is the responsibility of the user. Implementations of this API may leak memory or other resources if Spans are not ended. [Documentation](https://pkg.go.dev/go.opentelemetry.io/otel/trace#Tracer). 41 | 42 | When defining a span name, it's important to choose descriptive and meaningful names that accurately reflect the operation being performed. 43 | 44 | ```diff 45 | mux := http.NewServeMux() 46 | 47 | + registerHandleFunc := func(pattern string, h http.HandlerFunc) { 48 | + route := strings.Split(pattern, " ") 49 | + mux.Handle(pattern, otelhttp.NewHandler(otelhttp.WithRouteTag(route[len(route)-1], h), pattern)) 50 | + } 51 | ``` 52 | 53 | To simulate a more complex behaviour, we find a `causeError` function in the `/rolldice` handler source code of the backend4 application. Since there is a defined probability that errors will occur, it makes sense to take this part into account as well. 54 | 55 | Therefore we have to make sure that the context, which was previously created with the rootspan, is passed to this function. 56 | 57 | ### RecordError and set span status 58 | 59 | RecordError will record err as an exception span event for this span. An additional call to SetStatus is required if the Status of the Span should be set to Error, as this method does not change the Span status. If this span is not being recorded or err is nil then this method does nothing. 60 | 61 | ```diff 62 | func causeError(ctx context.Context, rate int) error { 63 | + _, span := tracer.Start(ctx, "causeError") 64 | + defer span.End() 65 | 66 | randomNumber := rand.Intn(100) 67 | + span.AddEvent("roll", trace.WithAttributes(attribute.Int("number", randomNumber))) 68 | if randomNumber < rate { 69 | err := fmt.Errorf("number(%d)) < rate(%d)", randomNumber, rate) 70 | + span.RecordError(err) 71 | + span.SetStatus(codes.Error, "some error occured") 72 | return err 73 | } 74 | return nil 75 | } 76 | ``` 77 | 78 | In the same execution path we also find a function that ensures high delays of our `/rolldice` endpoint with a fixed probability. 79 | 80 | ![OpenTelemetry-SDK Error](images/rolldice-error.png) 81 | 82 | ### Add a custom Event 83 | 84 | AddEvent adds an event with the provided name and optionsAddEvent adds an event with the provided name and options. 85 | 86 | ```diff 87 | func causeDelay(ctx context.Context, rate int) { 88 | + _, span := tracer.Start(ctx, "causeDelay") 89 | + defer span.End() 90 | randomNumber := rand.Intn(100) 91 | + span.AddEvent("roll", trace.WithAttributes(attribute.Int("number", randomNumber))) 92 | if randomNumber < rate { 93 | time.Sleep(time.Duration(2+rand.Intn(3)) * time.Second) 94 | } 95 | } 96 | ``` 97 | 98 | ![OpenTelemetry-SDK Event](images/rolldice-delay.png) 99 | 100 | Once the code has been instrumented, we can use `go mod tidy` to update the existing `go.mod` file and start testing our application. 101 | 102 | ## Apply Backend4 to the Kubernetes test cluster 103 | 104 | Now that we have instrumentalised `backend4`, we can use it as a drop-in replacement for `backend2`. 105 | 106 | For this we need to build and provide a new container image or use the prepared `backend4:with-instr` version. 107 | 108 | ```bash 109 | kubectl apply -f https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/main/app/instrumentation-replace-backend2.yaml 110 | ``` 111 | 112 | When using `kubectl diff` we should see something similar to this. 113 | 114 | ```diff 115 | apiVersion: apps/v1 116 | kind: Deployment 117 | metadata: 118 | name: backend2-deployment 119 | namespace: tutorial-application 120 | labels: 121 | app: backend2 122 | spec: 123 | template: 124 | metadata: 125 | labels: 126 | app: backend2 127 | annotations: 128 | prometheus.io/scrape: "true" 129 | + instrumentation.opentelemetry.io/inject-sdk: "true" 130 | template: 131 | spec: 132 | containers: 133 | - name: backend2 134 | - image: ghcr.io/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial-backend2:latest 135 | + image: ghcr.io/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial-backend4:latest 136 | env: 137 | + - name: RATE_ERROR 138 | + value: 20 139 | + - name: RATE_HIGH_DELAY 140 | + value: 20 141 | ``` 142 | 143 | > [!NOTE] 144 | > This is an optional section. 145 | 146 |
147 | 148 | Run and test backend 4 locally (shorter development cycle) 149 | 150 | ## Configuring an OTLP exporter and setting the endpoint 151 | 152 | To get quick feedback, we can run a Jaeger instance locally and point our application at it. Jaeger all-in-one will make this easy. 153 | 154 | ```bash 155 | docker run --rm -it -p 127.0.0.1:4317:4317 -p 127.0.0.1:16686:16686 -e COLLECTOR_OTLP_ENABLED=true -e LOG_LEVEL=debug jaegertracing/all-in-one:latest 156 | ``` 157 | 158 | Now we can configure our application with a specific `RATE_ERROR` and `RATE_DELAY` in `%`. This indicates how many traces should be delayed and/or cause an error. 159 | 160 | Finally we need to configure the OpenTelemetry-SDK, by default we can use common environment variables. [Documentation](https://opentelemetry.io/docs/languages/sdk-configuration/) 161 | 162 | ```bash 163 | OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 OTEL_SERVICE_NAME=go-backend RATE_ERROR=20 RATE_HIGH_DELAY=20 go run main.go 164 | ``` 165 | 166 |
167 | 168 | --- 169 | 170 | By instrumenting our applications, whether manually or automatically, we get more telemetry data to help us understand our system. However, since a large amount of telemetry data also generates costs, in the next chapter we will discuss how we can utilise this amount in a meaningful way. 171 | 172 | --- 173 | 174 | [Next steps](./05-sampling.md) 175 | -------------------------------------------------------------------------------- /05-sampling.md: -------------------------------------------------------------------------------- 1 | # Sampling 2 | 3 | This tutorial step covers the basic usage of the OpenTelemetry Collector on Kubernetes and how to reduce costs using sampling techniques. 4 | 5 | ## Overview 6 | 7 | In chapter 3 we saw the [schematic structure of the dice game application](https://github.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/blob/main/03-auto-instrumentation.md#application-description). The following diagram illustrates how the telemetry data collected there is exported and stored. [excalidraw](https://excalidraw.com/#json=15BrdSOMEkc9RA5cxeqwz,urTmfk01mbx7V-PpQI7KgA) 8 | 9 | ![tracing setup](images/tracing-setup.png) 10 | 11 | ## Sampling, what does it mean and why is it important? 12 | 13 | Sampling refers to the practice of selectively capturing and recording traces of requests flowing through a distributed system, rather than capturing every single request. It is crucial in distributed tracing systems because modern distributed applications often generate a massive volume of requests and transactions, which can overwhelm the tracing infrastructure or lead to excessive storage costs if every request is traced in detail. 14 | 15 | For example, a medium sized setup producing ~1M traces per minute can result in a cost of approximately $250,000 per month. (Note that this depends on your infrastructure costs, the SaaS provider you choose, the amount of metadata, etc.) You may want to check some service costs to get a better idea. 16 | 17 | Pricing: 18 | - AWS Xray ([calculator](https://aws.amazon.com/xray/pricing/)) 19 | - GCP Cloud Trace ([pricing](https://cloud.google.com/stackdriver/pricing#trace-costs)) 20 | 21 | ``` 22 | GCP 23 | 24 | Feature Price Free allotment per month Effective date 25 | Trace ingestion $0.20/million spans First 2.5 million spans November 1, 2018 26 | --- 27 | 28 | X-Ray Tracing 29 | 30 | Traces recorded cost $5.00 per 1 million traces recorded ($0.000005 per trace). 31 | 32 | Traces retrieved cost $0.50 per 1 million traces retrieved ($0.0000005 per trace). 33 | 34 | Traces scanned cost $0.50 per 1 million traces scanned ($0.0000005 per trace). 35 | 36 | X-Ray Insights traces stored costs $1.00 per million traces recorded ($0.000001 per trace). 37 | ``` 38 | 39 | For more details, check the [offical documentation](https://opentelemetry.io/docs/concepts/sampling/). 40 | 41 | ### How can we now reduce the number of traces? 42 | 43 | ![OpenTelemetry Sampling](images/sampling-venn.svg) 44 | 45 | ### Comparing Sampling Approaches 46 | 47 | ![OpenTelemetry Sampling](images/sampling-comparision.jpg) 48 | 49 | ### How to implement head sampling with OpenTelemetry 50 | 51 | Head sampling is a sampling technique used to make a sampling decision as early as possible. A decision to sample or drop a span or trace is not made by inspecting the trace as a whole. 52 | 53 | For the list of all available samplers, check the [offical documentation](https://opentelemetry.io/docs/languages/sdk-configuration/general/#otel_traces_sampler) 54 | 55 | #### Auto Instrumentation 56 | 57 | Update the sampling % in the Auto Instrumentation CR and restart the deployment for the configurations to take effect. 58 | 59 | https://github.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/blob/d4b917c1cc4a411f59ae5dd770b22de1de9f6020/app/instrumentation-head-sampling.yaml#L13-L15 60 | 61 | ```yaml 62 | kubectl apply -f https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/main/app/instrumentation-head-sampling.yaml 63 | kubectl rollout restart deployment.apps/backend1-deployment -n tutorial-application 64 | kubectl get pods -w -n tutorial-application 65 | ``` 66 | 67 | Describe the pod spec for the backend1 deployment to see the updated sampling rate. 68 | 69 | ```bash 70 | kubectl describe pod backend1-deployment-64ddcc76fd-w85zh -n tutorial-application 71 | ``` 72 | 73 | ```diff 74 | Environment: 75 | OTEL_TRACES_SAMPLER: parentbased_traceidratio 76 | - OTEL_TRACES_SAMPLER_ARG: 1 77 | + OTEL_TRACES_SAMPLER_ARG: 0.5 78 | ``` 79 | 80 | This tells the SDK to sample spans such that only 50% of traces get created. 81 | 82 | #### Manual Instrumentation 83 | 84 | You can also configure the ParentBasedTraceIdRatioSampler in code.A [`Sampler`](https://pkg.go.dev/go.opentelemetry.io/otel/sdk/trace#Sampler) can be set on the tracer provider using the [`WithSampler`](https://pkg.go.dev/go.opentelemetry.io/otel/sdk/trace#WithSampler) 85 | option, as follows: 86 | 87 | ```go 88 | provider := trace.NewTracerProvider( 89 | trace.WithSampler(trace.NewParentBasedTraceIdRatioSampler(0.5)), 90 | ) 91 | ``` 92 | 93 | ### How to implement tail sampling in the OpenTelemetry Collector 94 | 95 | Tail sampling is where the decision to sample a trace takes place by considering all or most of the spans within the trace. Tail Sampling gives you the option to sample your traces based on specific criteria derived from different parts of a trace, which isn’t an option with Head Sampling. 96 | 97 | Update the ENV variables below in the backend2 deployment, which generates random spans with errors and high latencies. 98 | 99 | ```shell 100 | kubectl set env deployment backend2-deployment RATE_ERROR=50 RATE_HIGH_DELAY=50 -n tutorial-application 101 | kubectl get pods -n tutorial-application -w 102 | ``` 103 | 104 | Deploy the opentelemetry collector with `tail_sampling` enabled. 105 | 106 | ```shell 107 | kubectl apply -f https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/main/backend/05-collector-1.yaml 108 | kubectl get pods -n observability-backend -w 109 | ``` 110 | 111 | Now, let’s walk-through the tail-sampling processor configuration, placed in the `processors` section of the collector configuration file: 112 | 113 | ```yaml 114 | # 1. Sample 100% of traces with ERROR-ing spans 115 | # 2. Sample 100% of trace which have a duration longer than 500ms 116 | # 3. Randomized sampling of 10% of traces without errors and latencies. 117 | processors: 118 | tail_sampling: 119 | decision_wait: 10s # time to wait before making a sampling decision 120 | num_traces: 100 # number of traces to be kept in memory 121 | expected_new_traces_per_sec: 10 # expected rate of new traces per second 122 | policies: 123 | [ 124 | { 125 | name: keep-errors, 126 | type: status_code, 127 | status_code: {status_codes: [ERROR]} 128 | }, 129 | { 130 | name: keep-slow-traces, 131 | type: latency, 132 | latency: {threshold_ms: 500} 133 | }, 134 | { 135 | name: randomized-policy, 136 | type: probabilistic, 137 | probabilistic: {sampling_percentage: 10} 138 | } 139 | ] 140 | ``` 141 | 142 | Now let's execute some requests on the app [http://localhost:4000/](http://localhost:4000/) and see traces in the Jaeger console [http://localhost:16686/](http://localhost:16686/). 143 | 144 | The image next is an example of what you might see in your backend with this sample configuration. With this configuration, you’ll get all traces with errors and latencies exceeding 500ms, as well as a random sample of other traces based on the rate we’ve configured. 145 | 146 | ![OpenTelemetry Sampling](images/jaeger-tail-sampling.jpg) 147 | 148 | You also have the flexibility to add other policies. For the list of all policies, check the [offical documentation](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/processor/tailsamplingprocessor/README.md) 149 | 150 | Here are a few examples: 151 | 152 | - `always_sample`: Sample all traces. 153 | - `string_attribute`: Sample based on string attribute values, both exact and 154 | regular expression value matches are supported. For example, you could sample 155 | based on specific custom attribute values. 156 | 157 | ----- 158 | ### Advanced Topic: Tail Sampling at scale with OpenTelemetry 159 | > [!NOTE] 160 | > This is an optional more advanced section. 161 | 162 | All spans of a trace must be processed by the same collector for tail sampling to function properly, posing scalability challenges. Initially, a single collector may suffice, but as the system grows, a two-layer setup becomes necessary. It requires two deployments of the collector, with the first layer routing all spans of a trace to the same collector in the downstream deployment (using a [load-balancing exporter](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/exporter/loadbalancingexporter/README.md)), and the second layer performing the tail sampling. 163 | 164 | ![OpenTelemetry Sampling](images/scaling-otel-collector.jpg) 165 | 166 | [excalidraw](https://excalidraw.com/#room=6a15d65ba4615c535a40,xcZD6DG977owHRoxpYY4Ag) 167 | 168 | Apply the YAML below to deploy a layer of Collectors containing the load-balancing exporter in front of collectors performing tail-sampling: 169 | 170 | ```shell 171 | kubectl apply -f https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/main/backend/05-collector-2.yaml 172 | kubectl get pods -n observability-backend -w 173 | ``` 174 | 175 | ```bash 176 | jaeger-bc5f49d78-627ct 1/1 Running 0 100m 177 | otel-collector-b48b5d66d-k5dsc 1/1 Running 0 4m42s 178 | otel-gateway-collector-0 1/1 Running 0 3m38s 179 | otel-gateway-collector-1 1/1 Running 0 3m38s 180 | prometheus-77f88ccf7f-dfwh2 1/1 Running 0 100m 181 | 182 | ``` 183 | 184 | Now, let’s walk-through the load-balancing exporter configuration, placed in the `exporters` section of the collector (layer 1) configuration file: 185 | 186 | ```yaml 187 | exporters: 188 | debug: 189 | # routing_key property is used to route spans to exporters based on traceID/service name 190 | loadbalancing: 191 | routing_key: "traceID" 192 | protocol: 193 | otlp: 194 | timeout: 1s 195 | tls: 196 | insecure: true 197 | resolver: 198 | k8s: 199 | service: otel-gateway.observability-backend 200 | ports: 201 | - 4317 202 | ``` 203 | 204 | ### Advanced Topic: Jaeger's Remote Sampling extension 205 | > [!NOTE] 206 | > This is an optional more advanced section. 207 | 208 | This extension allows serving sampling strategies following the Jaeger's remote sampling API. This extension can be configured to proxy requests to a backing remote sampling server, which could potentially be a Jaeger Collector down the pipeline, or a static JSON file from the local file system. 209 | 210 | #### Example Configuration 211 | 212 | ```yaml 213 | extensions: 214 | jaegerremotesampling: 215 | source: 216 | reload_interval: 30s 217 | remote: 218 | endpoint: jaeger-collector:14250 219 | jaegerremotesampling/1: 220 | source: 221 | reload_interval: 1s 222 | file: /etc/otelcol/sampling_strategies.json 223 | jaegerremotesampling/2: 224 | source: 225 | reload_interval: 1s 226 | file: http://jaeger.example.com/sampling_strategies.json 227 | ``` 228 | 229 | For more details, check the [offical documentation](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/extension/jaegerremotesampling/README.md) 230 | 231 | 232 | [Next steps](./06-RED-metrics.md) 233 | -------------------------------------------------------------------------------- /06-RED-metrics.md: -------------------------------------------------------------------------------- 1 | # RED Metrics 2 | 3 | Now that we have introduced sampling we have reduced our visibility into the true shape and size of our traffic. 4 | We can recover some of that visibility by sending our spans to the [`spanmetrics` connector](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/connector/spanmetricsconnector) in a separate collector pipeline. 5 | This connector will derive Rate, Error, and Duration (RED) metrics from the spans it processes. 6 | 7 | ## Collector Configuration 8 | 9 | The `spanmetrics` connector provides a reasonable starting point as its default configuration, so we will simply add it to our collector configuration and feed it trace data from the `otlp` receiver. 10 | 11 | ```diff 12 | @@ -53,14 +54,20 @@ 13 | debug: 14 | verbosity: detailed 15 | 16 | + connectors: 17 | + spanmetrics: 18 | + 19 | service: 20 | pipelines: 21 | traces: 22 | receivers: [otlp] 23 | processors: [tail_sampling] 24 | exporters: [otlp/traces] 25 | - metrics: 26 | + traces/spanmetrics: 27 | receivers: [otlp] 28 | + exporters: [spanmetrics] 29 | + metrics: 30 | + receivers: [otlp,spanmetrics] 31 | exporters: [otlphttp/metrics] 32 | logs: 33 | receivers: [otlp] 34 | ``` 35 | 36 | With this change to our collector configuration the `spanmetrics` connector will consume spans from the `otlp` receiver, generate metrics from those spans, and produce those metrics on our `metrics` pipeline to the `otlphttp/metrics` exporter to Prometheus. 37 | 38 | Let's apply this change to our collector deployment: 39 | 40 | ```bash 41 | kubectl apply -f https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/main/backend/06-collector.yaml 42 | ``` 43 | 44 | Once this change is applied we can query Prometheus for the `calls_total` metrics and observe the metrics generated from our trace data: 45 | 46 | ![Prometheus Span Metrics](images/prometheus_spanmetrics.png) 47 | [Link](http://localhost:8080/graph?g0.expr=calls_total&g0.tab=0&g0.stacked=0&g0.show_exemplars=0&g0.range_input=1h) 48 | 49 | ## Jaeger Configuration 50 | 51 | To tie together our new RED metrics with our trace data we can configure Jaeger 52 | to display service performance metrics from Prometheus. We can do this by adding 53 | environment variables that tell Jaeger where to find metrics and how to interpret them. 54 | 55 | ```yaml 56 | - name: METRICS_STORAGE_TYPE 57 | value: "prometheus" 58 | - name: PROMETHEUS_SERVER_URL 59 | value: "http://prometheus.observability-backend" 60 | - name: PROMETHEUS_QUERY_SUPPORT_SPANMETRICS_CONNECTOR 61 | value: "true" 62 | - name: PROMETHEUS_QUERY_NORMALIZE_CALLS 63 | value: "true" 64 | - name: PROMETHEUS_QUERY_NORMALIZE_DURATION 65 | value: "true" 66 | ``` 67 | 68 | These new variables tell Jaeger that we are producing metrics into Prometheus using the 69 | `spanmetrics` connector and that it should expect the `calls` and `duration` metric names 70 | to be normalized for Prometheus as `calls_total` and `duration_milliseconds_*`. With this 71 | information Jaeger is able to query Prometheus and present a dashboard for each service 72 | showing the RED metrics for all of its traced operations. 73 | 74 | ![Jaeger Service Performance Monitoring](/images/jaeger-spm.png) 75 | [Link](http://localhost:16686/monitor) 76 | 77 | [Next steps](./07-ottl.md) 78 | -------------------------------------------------------------------------------- /07-ottl.md: -------------------------------------------------------------------------------- 1 | # OpenTelemetry Transformation Language and Spans (OTTL) 2 | 3 | ## Overview 4 | 5 | The OpenTelemetry Transformation Language (OTTL) is a powerful language that allows you to transform telemetry data flowing through the Collector. The transformation of data is executed based on OTTL statements, which define how the telemetry should be transformed. It is a stand-alone part of the Collector codebase that is (re)used in several components, such as `filterprocessor`, `transformprocessor` or `routingprocessor`. 6 | 7 | Statements follow the OTTL grammar and are defined in the configuration of the particular component. Statements always relate to a particular **context** and invoke specific **functions** in the context. Besides that, as with any programming language, you can use operators for comparing values, converters or literals. Combined all together, an example statement could look like this: 8 | 9 | ```yaml 10 | set(attributes["client_error"], true) where attributes["http.status"] == 400 or attributes["http.status"] == 404 11 | ``` 12 | 13 | In this statement, we're using the `set` function to set the `client_error` attribute to `true`. We're conditioning this by using the `where` qualified, to apply only if HTTP status code of the requests is `400` OR `404`. 14 | 15 | ### Contexts 16 | 17 | Context determines which part of telemetry data should the statement be applied to. This can be universal for all signals, such as `Resource` and `Instrumentation Scope`, or they differ depending on the type of the signal such `Span`, `Datapoint` or `Log`. In the statement, a particular part of the context can be accessed via [paths](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/ottl/LANGUAGE.md#paths), which support the familiar `.` notation and accessing particular keys with `[]` (see the example above - `attributes["client_error"]` - accessing a particular attribute). Full list of all contexts can be found [here](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/ottl#getting-started). 18 | 19 | ### Functions 20 | 21 | OTTL provides a list of predefined functions that come in two flavors - **editors** and **converters**. Editors work directly on and transform telemetry itself. Editors functions include functions such as a `set`, `delete_key`, `replace_match` or `limit`. Conversely, converters are used to transform input within a statement and they do **not** modify the telemetry themselves. These can be used e.g. to get input length (`Len`), manipulate strings (`Concat`) or assert types (`IsInt`, `IsMap`, `IsString`...). Full list of both types of functions can be found [here](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/ottl/ottlfuncs#ottl-functions). 22 | 23 | ### Other language features (grammar) 24 | 25 | As mentioned, OTTL also supports other language features such as literals, operators, and comments. Full list of these can be found [here](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/ottl/LANGUAGE.md#paths), but most of these are common to many programming/scripting languages and are fairly intuitive. 26 | 27 | ## OTTL in action 28 | 29 | Let's take a look at how OTTL can look in action with tracing. OTTL can be leveraged to transform spans in a useful way, whether you want to enrich your spans with extra data, remove sensitive information or limit the amount of metadata included with your spans. To do this, we will use the `transformprocessor`. 30 | 31 | Our application supports recording the names of the players who are rolling the dice, by passing the names of the players as parameters in the URL, e.g. `?player1=John&player2=Jane`. Due to privacy concerns, we might not want to include these names as attributes on our spans and we would rather anonymize them. To do this, we will always pick only the first letter of the player's name and include it as the attribute. 32 | 33 | Let's fire a couple of frontend requests with player names. In case you stopped your port forwarding, start it again for both the frontend and our Jaeger instance: 34 | ```bash 35 | kubectl port-forward service/frontend-service -n tutorial-application 4000:4000 & 36 | kubectl port-forward -n observability-backend service/jaeger-query 16686:16686 & 37 | ``` 38 | Now let's make couple of requests with player names: 39 | - http://localhost:4000/?player1=John_Doe&player2=Jane_Doe 40 | - http://localhost:4000/?player1=Neo&player2=Trinity 41 | - http://localhost:4000/?player1=Barbie&player2=Ken 42 | 43 | First, take a look at the [Jaeger UI](http://localhost:16686/) and see that our spans have the `app.player1` attribute. Choose the `frontend-deployment` service and observe that the root span has attribute `app.player1` with the full name of the player. 44 | 45 | Second, inspect the configuration for our `transformprocessor` below: 46 | 47 | ```yaml 48 | processors: 49 | transform: 50 | error_mode: ignore 51 | trace_statements: 52 | - context: span 53 | statements: 54 | - set(attributes["app.player1"], Substring(attributes["app.player1"], 0, 1)) where attributes["app.player1"] != "" 55 | - set(attributes["app.player2"], Substring(attributes["app.player2"], 0, 1)) where attributes["app.player2"] != "" 56 | ``` 57 | 58 | We're using the `span` context to change the desired attributes of our spans. We're going to look for the `app.player1` and `app.player2` attributes and set them to the first letter of the name. We're using the `Substring` editor to do this, which takes the string, the start position and the length of the substring. We're also using the `where` qualifier to only apply this transformation if the attribute is not empty. 59 | 60 | But that is not everything. Do you see the `http.url` and `http.url` attributes? These attributes still include the names of our players as URL parameters! We need to get rid of them here as well. To achieve this, we add one more statement to replace the player name with `{playerName}` placeholder. 61 | 62 | ```yaml 63 | processors: 64 | transform: 65 | error_mode: ignore 66 | trace_statements: 67 | - context: span 68 | statements: 69 | - set(attributes["app.player1"], Substring(attributes["app.player1"], 0, 1)) where attributes["app.player1"] != "" 70 | - set(attributes["app.player2"], Substring(attributes["app.player2"], 0, 1)) where attributes["app.player2"] != "" 71 | - replace_all_patterns(attributes, "value", "player1=[a-zA-Z]*", "player1={playerName}") 72 | - replace_all_patterns(attributes, "value", "player2=[a-zA-Z]*", "player2={playerName}") 73 | 74 | ``` 75 | 76 | Apply the changes to our collector, with the transform processor now enabled in our tracing pipeline: 77 | 78 | ```bash 79 | kubectl replace -f https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/main/backend/07-collector.yaml 80 | kubectl get pods -n observability-backend -w 81 | ``` 82 | 83 | After the collector with the new configuration rolls out, run a couple of requests with player names set: 84 | - http://localhost:4000/?player1=John_Doe&player2=Jane_Doe 85 | - http://localhost:4000/?player1=Neo&player2=Trinity 86 | - http://localhost:4000/?player1=Barbie&player2=Ken 87 | 88 | Now open your [Jaeger UI](http://localhost:16686/) again and observe the spans. You should see that the `app.player1` attributes is now anonymized and the player names are now replaced with `{playerName}` in attributes that contain the URL. You have successfully transformed your spans with OTTL! -------------------------------------------------------------------------------- /08-k8s-tracing.md: -------------------------------------------------------------------------------- 1 | # Trace your k8s environment with OpenTelemetry 2 | 3 | This part of the tutorial focuses on tracing etcd, kube-api-server and kubelet requests. It is assumed that the tracing backend from the previous sections has been successfully installed. If you do not use a kind cluster with the workshop setup you may want to check the blog post [Alpha in Kubernetes v1.22: API Server Tracing](https://kubernetes.io/blog/2021/09/03/api-server-tracing/) from David Ashpole. 4 | 5 |
6 | 7 | Quick install guide... 8 | 9 | 10 | The following installation steps are dependent on each other and should therefore be executed multiple times until they succeed or with some delay. 11 | 12 | ```bash 13 | kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.11.0/cert-manager.yaml 14 | kubectl apply -f https://github.com/open-telemetry/opentelemetry-operator/releases/download/v0.94.0/opentelemetry-operator.yaml 15 | kubectl apply -f https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/main/backend/01-backend.yaml 16 | kubectl apply -f https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/main/backend/03-collector.yaml 17 | ``` 18 | 19 |
20 | 21 | The previously created workshop cluster already configured `etcd`, the `api-server` and `kubelet` to report all trace information to `localhost`. Using a [collector deployed as daemonset](./app/otel-daemonset.yaml) with hostnetwork access, we can capture this data and transmit it into the workshop backend. 22 | 23 | 24 | Deploy OpenTelemetry Collector as Daemonset with hostnetowrk access: 25 | ```bash 26 | kubectl apply -f https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/main/app/otel-daemonset.yaml 27 | ``` 28 | 29 | ## ETCD 30 | 31 | https://github.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/blob/f5bb1c22164d04d1fa16658ed20a7a15bfcd7d2f/kind-1.29.yaml#L23-L36 32 | 33 | - [kubeadm Configuration (v1beta3)](https://kubernetes.io/docs/reference/config-api/kubeadm-config.v1beta3/#kubeadm-k8s-io-v1beta3-LocalEtcd) 34 | - [PR #12919](https://github.com/etcd-io/etcd/pull/12919) 35 | 36 | We can check the applied etcd config using kubectl: 37 | ```bash 38 | kubectl get pods -n kube-system etcd-kind-control-plane -o yaml 39 | ``` 40 | 41 | This should be the effect: 42 | ```diff 43 | spec: 44 | containers: 45 | - command: 46 | - etcd 47 | - --data-dir=/var/lib/etcd 48 | + - --experimental-distributed-tracing-address=127.0.0.1:4317 49 | + - --experimental-distributed-tracing-instance-id=caf201fd-8d5b-467b-a70f-09ad3beb5a21 50 | + - --experimental-distributed-tracing-sampling-rate=1000000 51 | + - --experimental-distributed-tracing-service-name=etcd 52 | + - --experimental-enable-distributed-tracing=true 53 | - --experimental-initial-corrupt-check=true 54 | image: quay.io/coreos/etcd:v3.5.11 55 | hostNetwork: true 56 | ``` 57 | 58 | ## API-Server 59 | 60 | To configure the tracing export of the API-Server we have to provide a [TracingConfiguration](https://kubernetes.io/docs/reference/config-api/apiserver-config.v1beta1/#apiserver-k8s-io-v1beta1-TracingConfiguration) CR. 61 | 62 |
63 | Kind configuration 64 | 65 | https://github.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/blob/f5bb1c22164d04d1fa16658ed20a7a15bfcd7d2f/kind-1.29.yaml#L37-L45 66 |
67 | 68 | https://github.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/blob/f5bb1c22164d04d1fa16658ed20a7a15bfcd7d2f/app/api-server/tracing-config.yaml#L1-L4 69 | 70 | We can check the applied API-Server config using kubectl: 71 | ```bash 72 | kubectl get pods -n kube-system kube-apiserver-kind-control-plane -o yaml 73 | ``` 74 | 75 | This should be the effect: 76 | ```diff 77 | spec: 78 | containers: 79 | - command: 80 | - kube-apiserver 81 | - --authorization-mode=Node,RBAC 82 | - - --feature-gates=KubeletInUserNamespace=true 83 | + - --feature-gates=APIServerTracing=true,KubeletInUserNamespace=true 84 | + - --tracing-config-file=/api-server/tracing-config.yaml 85 | image: registry.k8s.io/kube-apiserver:v1.29.1 86 | hostNetwork: true 87 | nodeName: kind-control-plane 88 | priorityClassName: system-node-critical 89 | ``` 90 | 91 | Once the API-Server and ETCD are reporting telemetry data we can make some noise by creating and deleting an nginx instance: 92 | ```bash 93 | $ kubectl create deployment nginx-project --image=nginx 94 | deployment.apps/nginx-project created 95 | --- 96 | $ kubectl get deployments.apps 97 | NAME READY UP-TO-DATE AVAILABLE AGE 98 | nginx-project 1/1 1 1 1m 99 | --- 100 | $ kubectl delete deployments.apps nginx-project 101 | deployment.apps "nginx-project" deleted 102 | ``` 103 | 104 | ![api-server](images/api-server.png) 105 | 106 | ## Kubelet 107 | 108 | https://github.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/blob/f5bb1c22164d04d1fa16658ed20a7a15bfcd7d2f/kind-1.29.yaml#L16-L21 109 | 110 | We can check the applied kubelet config using kubectl: 111 | ```bash 112 | kubectl get configmaps -n kube-system kubelet-config -o yaml 113 | ``` 114 | 115 | This should be the effect: 116 | ```diff 117 | apiVersion: kubelet.config.k8s.io/v1beta1 118 | featureGates: 119 | KubeletInUserNamespace: true 120 | + KubeletTracing: true 121 | ... 122 | syncFrequency: 0s 123 | + tracing: 124 | + endpoint: otel-collector.observability-backend.svc.cluster.local:4317 125 | + samplingRatePerMillion: 1000000 126 | volumeStatsAggPeriod: 0s 127 | ``` 128 | 129 | 130 | ![terminating](images/terminating.png) 131 | ![terminated](images/terminated.png) 132 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Exploring the Power of Distributed Tracing with OpenTelemetry on Kubernetes 2 | 3 | This repository hosts content for tutorial for Kubecon EU 2024 Paris. 4 | 5 | Previous tutorials: 6 | * [Exploring the Power of OpenTelemetry on Kubernetes - Pavol Loffay, Benedikt Bongartz & Yuri Oliveira Sa, Red Hat; Severin Neumann, Cisco; Kristina Pathak, LightStep](https://github.com/pavolloffay/kubecon-eu-2023-opentelemetry-kubernetes-tutorial) 7 | * [Tutorial: Exploring the Power of Metrics Collection with OpenTelemetry on Kubernetes - Pavol Loffay & Benedikt Bongartz, Red Hat; Anthony Mirabella, AWS; Matej Gera, Coralogix; Anusha Reddy Narapureddy, Apple](https://github.com/pavolloffay/kubecon-na-2023-opentelemetry-kubernetes-metrics-tutorial) 8 | 9 | __Abstract__: Rolling out an observability solution is not a straightforward problem. There are many solutions and the final architecture can impact the effectiveness, robustness, and long-term maintenance aspects of the architecture. In this comprehensive tutorial, we will deploy an end-to-end distributed tracing stack on Kubernetes using the OpenTelemetry project. The tutorial will cover both manual and auto-instrumentation, extending the auto-instrumentation, collecting data with the OpenTelemetry collector and performing transformation on spans using OTTL, tail-based sampling, deriving metrics from traces, tracing with proxies/service meshes and collecting traces from Kubernetes infrastructure. After this session, the audience will be able to understand and use OpenTelemetry API/SDK, auto-instrumentation, collector, and operator to roll out a working distributed tracing stack on Kubernetes. 10 | 11 | __Schedule__: https://sched.co/1YePA 12 | 13 | __Slides__: [intro-slides](./intro-slides.pdf) 14 | 15 | __Recording__: https://www.youtube.com/watch?v=nwy0I6vdtEE 16 | 17 | ## Agenda 18 | 19 | Internal meeting doc: https://docs.google.com/document/d/1rbc0JqMP7i4koKpxqb9gYovmAlJ_BRN1Ttg3EhY9cbY/edit 20 | 21 | Each tutorial step is located in a separate file: 22 | 23 | 1. [Welcome & Setup](01-welcome-setup.md) (Pavol, 5 min) 24 | 1. [OpenTelemetry distributed tracing introduction](02-tracing-introduction.md) (Matej, 10 min) 25 | 1. [Auto-instrumentation](03-auto-instrumentation.md) (Pavol, 25 min) 26 | 1. [Manual-instrumentation](04-manual-instrumentation.md) (Bene, 10 min) 27 | 1. [Sampling](05-sampling.md) (Bene & Anu, 15 min) 28 | 1. [Metrics from Traces](06-RED-metrics.md) (Anthony, 10 min) 29 | 1. [OpenTelemetry Transformation Language and Spans](07-ottl.md) (Matej, 10 min) 30 | 1. Wrap up & Questions 31 | 1. [K8S-Tracing](08-k8s-tracing.md) (Bene, optional) 32 | -------------------------------------------------------------------------------- /app/README.md: -------------------------------------------------------------------------------- 1 | # Sample Application 2 | 3 | This is the source code of the sample application used in the tutorial step [Deploy the application](../03-app-instrumentation.md). 4 | 5 | ## Usage 6 | 7 | ### Docker Compose 8 | 9 | The quickest way to try out the sample application is by using docker compose. 10 | In the [app/](.) folder run: 11 | 12 | ```console 13 | docker compose up 14 | ``` 15 | 16 | This will build the images for all services + the load generator and then run 17 | them for you. You can test the frontend service yourself by calling it with 18 | your browser or curl at 19 | 20 | If you'd like to try out all services being instrumented with OpenTelemetry run 21 | 22 | ```console 23 | docker compose --env-file=./otel-env up 24 | ``` 25 | 26 | ### Kubernetes 27 | 28 | To run the the sample application on your kubernetes cluster, run 29 | 30 | ```console 31 | kubectl apply -f ./k8s.yaml 32 | ``` 33 | 34 | Note that this will pull images of the applications from ghcr.io. 35 | 36 | If you'd like to access the frontend service, open a new terminal and run 37 | 38 | ```console 39 | kubectl port-forward svc/frontend-service 4000:4000 40 | ``` 41 | 42 | ### Development 43 | 44 | If you'd like to change the code of any of the applications, 45 | you need to install Node.JS, Java and python3 with flask first. 46 | 47 | Then you can run them all standalone: 48 | 49 | - frontend (in folder [./frontend](./frontend)): 50 | 51 | ```console 52 | node index.js 53 | ``` 54 | 55 | - backend1 (in folder [./backend1](./backend1)): 56 | 57 | ```console 58 | flask run 59 | ``` 60 | 61 | - backend2 (in folder [./backend2](./backend)): 62 | 63 | ```console 64 | ./gradlew bootRun 65 | ``` 66 | -------------------------------------------------------------------------------- /app/api-server/tracing-config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apiserver.config.k8s.io/v1alpha1 2 | kind: TracingConfiguration 3 | endpoint: 127.0.0.1:4317 4 | samplingRatePerMillion: 1000000 5 | 6 | -------------------------------------------------------------------------------- /app/backend1/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-buster 2 | 3 | WORKDIR /app 4 | 5 | COPY requirements.txt requirements.txt 6 | RUN pip3 install -r requirements.txt 7 | 8 | COPY app.py . 9 | COPY run.sh . 10 | 11 | CMD [ "./run.sh" ] 12 | -------------------------------------------------------------------------------- /app/backend1/app.py: -------------------------------------------------------------------------------- 1 | from prometheus_client import generate_latest, Counter 2 | from hashlib import sha256 3 | from time import sleep 4 | from random import randint 5 | from flask import Flask, Response, request 6 | from logging.config import dictConfig 7 | 8 | app = Flask(__name__) 9 | 10 | ROLL_COUNTER = Counter( 11 | 'dice_roll_count', 'How often the dice was rolled' 12 | ) 13 | 14 | NUMBERS_COUNTER = Counter( 15 | 'dice_numbers_count', 'How often each number of the dice was rolled', 16 | ['number'] 17 | ) 18 | 19 | @app.route("/rolldice") 20 | def roll_dice(): 21 | player = request.args.get('player', default="Anonymous player") 22 | max = 8 if sha256(bytes(player, 'utf-8')).hexdigest() == 'f4b7c19317c929d2a34297d6229defe5262fa556ef654b600fc98f02c6d87fdc' else 6 23 | result = str(do_roll(max)) 24 | app.logger.info("%s is rolling the dice: %s", player, result); 25 | ROLL_COUNTER.inc() 26 | NUMBERS_COUNTER.labels(result).inc() 27 | return result 28 | 29 | def do_roll(max): 30 | result = randint(1, max) 31 | if result > 6: 32 | sleep(0.1 * result) 33 | return result 34 | 35 | @app.route("/metrics/") 36 | def metrics(): 37 | return Response(generate_latest(), mimetype=str('text/plain; version=0.0.4; charset=utf-8')) 38 | -------------------------------------------------------------------------------- /app/backend1/requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==2.0.2 2 | opentelemetry-exporter-otlp==1.17.0 3 | opentelemetry-exporter-otlp-proto-http==1.17.0 4 | opentelemetry-instrumentation-flask==0.38b0 5 | opentelemetry-instrumentation-logging==0.38b0 6 | opentelemetry-instrumentation-wsgi==0.38b0 7 | opentelemetry-api==1.17.0 8 | opentelemetry-distro==0.38b0 9 | opentelemetry-instrumentation==0.38b0 10 | opentelemetry-sdk==1.17.0 11 | opentelemetry-semantic-conventions==0.38b0 12 | prometheus-client==0.16.0 13 | Werkzeug==2.2.2 14 | -------------------------------------------------------------------------------- /app/backend1/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PORT=${PORT:-5000} 4 | HOST=${HOST:-"0.0.0.0"} 5 | 6 | if [[ "${OTEL_INSTRUMENTATION_ENABLED}" == "true" ]] ; then 7 | echo 'Run with instrumentation' 8 | env OTEL_SERVICE_NAME=${OTEL_SERVICE_NAME:-backend1} \ 9 | OTEL_TRACES_EXPORTER=${OTEL_TRACES_EXPORTER:-console} \ 10 | OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER:-console} \ 11 | OTEL_LOGS_EXPORTER=${OTEL_LOGS_EXPORTER:-console} \ 12 | OTEL_PYTHON_LOG_CORRELATION=${OTEL_PYTHON_LOG_CORRELATION:-"true"} \ 13 | opentelemetry-instrument python3 -m flask run --host="${HOST}" --port="${PORT}" 14 | else 15 | python3 -m flask run --host="${HOST}" --port "${PORT}" 16 | fi -------------------------------------------------------------------------------- /app/backend2/.gitignore: -------------------------------------------------------------------------------- 1 | HELP.md 2 | .gradle 3 | build/* 4 | !build/libs 5 | build/libs/* 6 | !build/libs/dice-0.0.1-SNAPSHOT.jar 7 | !gradle/wrapper/gradle-wrapper.jar 8 | !**/src/main/**/build/ 9 | !**/src/test/**/build/ 10 | 11 | ### STS ### 12 | .apt_generated 13 | .classpath 14 | .factorypath 15 | .project 16 | .settings 17 | .springBeans 18 | .sts4-cache 19 | bin/ 20 | !**/src/main/**/bin/ 21 | !**/src/test/**/bin/ 22 | 23 | ### IntelliJ IDEA ### 24 | .idea 25 | *.iws 26 | *.iml 27 | *.ipr 28 | out/ 29 | !**/src/main/**/out/ 30 | !**/src/test/**/out/ 31 | 32 | ### NetBeans ### 33 | /nbproject/private/ 34 | /nbbuild/ 35 | /dist/ 36 | /nbdist/ 37 | /.nb-gradle/ 38 | 39 | ### VS Code ### 40 | .vscode/ 41 | -------------------------------------------------------------------------------- /app/backend2/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gradle:8.4-jdk21 AS builder 2 | 3 | WORKDIR /usr/src/app 4 | 5 | COPY . . 6 | RUN gradle bootJar 7 | 8 | FROM eclipse-temurin:21-jre-alpine 9 | WORKDIR /usr/src/app 10 | 11 | COPY --from=builder /usr/src/app/ . 12 | 13 | ADD https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/latest/download/opentelemetry-javaagent.jar javaagent.jar 14 | 15 | COPY run.sh . 16 | 17 | ENTRYPOINT ["./run.sh"] 18 | 19 | # For debug purpose: 20 | # Comment the CMD above and uncomment the lines below to add the OTel java agent 21 | # 22 | # ENV OTEL_TRACES_EXPORTER=logging 23 | # ENV OTEL_METRICS_EXPORTER=logging 24 | # ENV OTEL_LOGS_EXPORTER=logging 25 | # CMD ["java", "-javaagent:/javaagent.jar", "-jar", "./build/libs/dice-0.0.1-SNAPSHOT.jar"] 26 | -------------------------------------------------------------------------------- /app/backend2/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'java' 3 | id 'org.springframework.boot' version '3.0.5' 4 | id 'io.spring.dependency-management' version '1.1.0' 5 | } 6 | 7 | group = 'io.opentelemetry' 8 | version = '0.0.1-SNAPSHOT' 9 | sourceCompatibility = '17' 10 | 11 | repositories { 12 | mavenCentral() 13 | } 14 | 15 | dependencies { 16 | implementation 'org.springframework.boot:spring-boot-starter-web' 17 | implementation 'org.apache.logging.log4j:log4j-core:2.17.2' 18 | implementation 'io.opentelemetry.instrumentation:opentelemetry-instrumentation-annotations:2.1.0' 19 | implementation 'io.opentelemetry:opentelemetry-api:1.35.0' 20 | } 21 | 22 | tasks.named('test') { 23 | useJUnitPlatform() 24 | } 25 | -------------------------------------------------------------------------------- /app/backend2/build/libs/dice-0.0.1-SNAPSHOT.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/app/backend2/build/libs/dice-0.0.1-SNAPSHOT.jar -------------------------------------------------------------------------------- /app/backend2/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/app/backend2/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /app/backend2/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.6.1-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /app/backend2/gradlew: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Copyright © 2015-2021 the original authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | # 21 | # Gradle start up script for POSIX generated by Gradle. 22 | # 23 | # Important for running: 24 | # 25 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is 26 | # noncompliant, but you have some other compliant shell such as ksh or 27 | # bash, then to run this script, type that shell name before the whole 28 | # command line, like: 29 | # 30 | # ksh Gradle 31 | # 32 | # Busybox and similar reduced shells will NOT work, because this script 33 | # requires all of these POSIX shell features: 34 | # * functions; 35 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}», 36 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»; 37 | # * compound commands having a testable exit status, especially «case»; 38 | # * various built-in commands including «command», «set», and «ulimit». 39 | # 40 | # Important for patching: 41 | # 42 | # (2) This script targets any POSIX shell, so it avoids extensions provided 43 | # by Bash, Ksh, etc; in particular arrays are avoided. 44 | # 45 | # The "traditional" practice of packing multiple parameters into a 46 | # space-separated string is a well documented source of bugs and security 47 | # problems, so this is (mostly) avoided, by progressively accumulating 48 | # options in "$@", and eventually passing that to Java. 49 | # 50 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, 51 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; 52 | # see the in-line comments for details. 53 | # 54 | # There are tweaks for specific operating systems such as AIX, CygWin, 55 | # Darwin, MinGW, and NonStop. 56 | # 57 | # (3) This script is generated from the Groovy template 58 | # https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt 59 | # within the Gradle project. 60 | # 61 | # You can find Gradle at https://github.com/gradle/gradle/. 62 | # 63 | ############################################################################## 64 | 65 | # Attempt to set APP_HOME 66 | 67 | # Resolve links: $0 may be a link 68 | app_path=$0 69 | 70 | # Need this for daisy-chained symlinks. 71 | while 72 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path 73 | [ -h "$app_path" ] 74 | do 75 | ls=$( ls -ld "$app_path" ) 76 | link=${ls#*' -> '} 77 | case $link in #( 78 | /*) app_path=$link ;; #( 79 | *) app_path=$APP_HOME$link ;; 80 | esac 81 | done 82 | 83 | APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit 84 | 85 | APP_NAME="Gradle" 86 | APP_BASE_NAME=${0##*/} 87 | 88 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 89 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 90 | 91 | # Use the maximum available, or set MAX_FD != -1 to use that value. 92 | MAX_FD=maximum 93 | 94 | warn () { 95 | echo "$*" 96 | } >&2 97 | 98 | die () { 99 | echo 100 | echo "$*" 101 | echo 102 | exit 1 103 | } >&2 104 | 105 | # OS specific support (must be 'true' or 'false'). 106 | cygwin=false 107 | msys=false 108 | darwin=false 109 | nonstop=false 110 | case "$( uname )" in #( 111 | CYGWIN* ) cygwin=true ;; #( 112 | Darwin* ) darwin=true ;; #( 113 | MSYS* | MINGW* ) msys=true ;; #( 114 | NONSTOP* ) nonstop=true ;; 115 | esac 116 | 117 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 118 | 119 | 120 | # Determine the Java command to use to start the JVM. 121 | if [ -n "$JAVA_HOME" ] ; then 122 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 123 | # IBM's JDK on AIX uses strange locations for the executables 124 | JAVACMD=$JAVA_HOME/jre/sh/java 125 | else 126 | JAVACMD=$JAVA_HOME/bin/java 127 | fi 128 | if [ ! -x "$JAVACMD" ] ; then 129 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 130 | 131 | Please set the JAVA_HOME variable in your environment to match the 132 | location of your Java installation." 133 | fi 134 | else 135 | JAVACMD=java 136 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 137 | 138 | Please set the JAVA_HOME variable in your environment to match the 139 | location of your Java installation." 140 | fi 141 | 142 | # Increase the maximum file descriptors if we can. 143 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then 144 | case $MAX_FD in #( 145 | max*) 146 | MAX_FD=$( ulimit -H -n ) || 147 | warn "Could not query maximum file descriptor limit" 148 | esac 149 | case $MAX_FD in #( 150 | '' | soft) :;; #( 151 | *) 152 | ulimit -n "$MAX_FD" || 153 | warn "Could not set maximum file descriptor limit to $MAX_FD" 154 | esac 155 | fi 156 | 157 | # Collect all arguments for the java command, stacking in reverse order: 158 | # * args from the command line 159 | # * the main class name 160 | # * -classpath 161 | # * -D...appname settings 162 | # * --module-path (only if needed) 163 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. 164 | 165 | # For Cygwin or MSYS, switch paths to Windows format before running java 166 | if "$cygwin" || "$msys" ; then 167 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) 168 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) 169 | 170 | JAVACMD=$( cygpath --unix "$JAVACMD" ) 171 | 172 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 173 | for arg do 174 | if 175 | case $arg in #( 176 | -*) false ;; # don't mess with options #( 177 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath 178 | [ -e "$t" ] ;; #( 179 | *) false ;; 180 | esac 181 | then 182 | arg=$( cygpath --path --ignore --mixed "$arg" ) 183 | fi 184 | # Roll the args list around exactly as many times as the number of 185 | # args, so each arg winds up back in the position where it started, but 186 | # possibly modified. 187 | # 188 | # NB: a `for` loop captures its iteration list before it begins, so 189 | # changing the positional parameters here affects neither the number of 190 | # iterations, nor the values presented in `arg`. 191 | shift # remove old arg 192 | set -- "$@" "$arg" # push replacement arg 193 | done 194 | fi 195 | 196 | # Collect all arguments for the java command; 197 | # * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of 198 | # shell script including quotes and variable substitutions, so put them in 199 | # double quotes to make sure that they get re-expanded; and 200 | # * put everything else in single quotes, so that it's not re-expanded. 201 | 202 | set -- \ 203 | "-Dorg.gradle.appname=$APP_BASE_NAME" \ 204 | -classpath "$CLASSPATH" \ 205 | org.gradle.wrapper.GradleWrapperMain \ 206 | "$@" 207 | 208 | # Stop when "xargs" is not available. 209 | if ! command -v xargs >/dev/null 2>&1 210 | then 211 | die "xargs is not available" 212 | fi 213 | 214 | # Use "xargs" to parse quoted args. 215 | # 216 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed. 217 | # 218 | # In Bash we could simply go: 219 | # 220 | # readarray ARGS < <( xargs -n1 <<<"$var" ) && 221 | # set -- "${ARGS[@]}" "$@" 222 | # 223 | # but POSIX shell has neither arrays nor command substitution, so instead we 224 | # post-process each arg (as a line of input to sed) to backslash-escape any 225 | # character that might be a shell metacharacter, then use eval to reverse 226 | # that process (while maintaining the separation between arguments), and wrap 227 | # the whole thing up as a single "set" statement. 228 | # 229 | # This will of course break if any of these variables contains a newline or 230 | # an unmatched quote. 231 | # 232 | 233 | eval "set -- $( 234 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | 235 | xargs -n1 | 236 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | 237 | tr '\n' ' ' 238 | )" '"$@"' 239 | 240 | exec "$JAVACMD" "$@" 241 | -------------------------------------------------------------------------------- /app/backend2/gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%"=="" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%"=="" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 34 | 35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 37 | 38 | @rem Find java.exe 39 | if defined JAVA_HOME goto findJavaFromJavaHome 40 | 41 | set JAVA_EXE=java.exe 42 | %JAVA_EXE% -version >NUL 2>&1 43 | if %ERRORLEVEL% equ 0 goto execute 44 | 45 | echo. 46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 47 | echo. 48 | echo Please set the JAVA_HOME variable in your environment to match the 49 | echo location of your Java installation. 50 | 51 | goto fail 52 | 53 | :findJavaFromJavaHome 54 | set JAVA_HOME=%JAVA_HOME:"=% 55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 56 | 57 | if exist "%JAVA_EXE%" goto execute 58 | 59 | echo. 60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 61 | echo. 62 | echo Please set the JAVA_HOME variable in your environment to match the 63 | echo location of your Java installation. 64 | 65 | goto fail 66 | 67 | :execute 68 | @rem Setup the command line 69 | 70 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 71 | 72 | 73 | @rem Execute Gradle 74 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 75 | 76 | :end 77 | @rem End local scope for the variables with windows NT shell 78 | if %ERRORLEVEL% equ 0 goto mainEnd 79 | 80 | :fail 81 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 82 | rem the _cmd.exe /c_ return code! 83 | set EXIT_CODE=%ERRORLEVEL% 84 | if %EXIT_CODE% equ 0 set EXIT_CODE=1 85 | if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% 86 | exit /b %EXIT_CODE% 87 | 88 | :mainEnd 89 | if "%OS%"=="Windows_NT" endlocal 90 | 91 | :omega 92 | -------------------------------------------------------------------------------- /app/backend2/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [[ "${OTEL_INSTRUMENTATION_ENABLED}" == "true" ]] ; then 4 | echo 'Run with instrumentation' 5 | env OTEL_SERVICE_NAME=${OTEL_SERVICE_NAME:-backend2} \ 6 | OTEL_TRACES_EXPORTER=${OTEL_TRACES_EXPORTER:-logging} \ 7 | OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER:-logging} \ 8 | OTEL_LOGS_EXPORTER=${OTEL_LOGS_EXPORTER:-logging} \ 9 | java -javaagent:./javaagent.jar -jar ./build/libs/dice-0.0.1-SNAPSHOT.jar 10 | else 11 | java -jar ./build/libs/dice-0.0.1-SNAPSHOT.jar 12 | fi 13 | -------------------------------------------------------------------------------- /app/backend2/settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'dice' 2 | -------------------------------------------------------------------------------- /app/backend2/src/main/java/io/opentelemetry/dice/DiceApplication.java: -------------------------------------------------------------------------------- 1 | package io.opentelemetry.dice; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | @SpringBootApplication 7 | public class DiceApplication { 8 | 9 | public static void main(String[] args) { 10 | SpringApplication.run(DiceApplication.class, args); 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /app/backend2/src/main/java/io/opentelemetry/dice/RollController.java: -------------------------------------------------------------------------------- 1 | package io.opentelemetry.dice; 2 | 3 | import org.springframework.web.bind.annotation.GetMapping; 4 | import org.springframework.web.bind.annotation.RequestParam; 5 | import org.springframework.web.bind.annotation.RestController; 6 | 7 | import org.apache.logging.log4j.LogManager; 8 | import org.apache.logging.log4j.Logger; 9 | 10 | import java.util.Optional; 11 | 12 | import io.opentelemetry.api.trace.Span; 13 | import io.opentelemetry.instrumentation.annotations.WithSpan; 14 | import io.opentelemetry.instrumentation.annotations.SpanAttribute; 15 | import io.opentelemetry.instrumentation.annotations.AddingSpanAttributes; 16 | 17 | @RestController 18 | public class RollController { 19 | 20 | private static final Logger logger = LogManager.getLogger(RollController.class); 21 | 22 | 23 | @GetMapping("/rolldice") 24 | public String index(@RequestParam("player") Optional player) { 25 | int result = this.getRandomNumber(-2, 6); 26 | if(result < 1) { 27 | logger.warn("Illegal number rolled, setting result to '1'"); 28 | result = 1; 29 | } 30 | if(player.isPresent()) { 31 | logger.info(player.get() + " is rolling the dice: " + result); 32 | } else { 33 | logger.info("Anonymous player is rolling the dice: " + result); 34 | } 35 | Span span = Span.current(); 36 | span.setAttribute("result", Integer.toString(result)); 37 | span.setAttribute("player", player.orElse("Anonymous")); 38 | return Integer.toString(result); 39 | } 40 | 41 | public int getRandomNumber(int min, int max) { 42 | int result = (int) ((Math.random() * (max - min)) + min); 43 | return result; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /app/backend2/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | 2 | server.port=5165 -------------------------------------------------------------------------------- /app/backend3/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/dotnet/sdk:7.0 AS build-env 2 | WORKDIR /App 3 | 4 | # Copy everything 5 | COPY . ./ 6 | # Restore as distinct layers 7 | RUN dotnet restore 8 | # Build and publish a release 9 | RUN dotnet publish -c Release -o out 10 | 11 | # Build runtime image 12 | FROM mcr.microsoft.com/dotnet/aspnet:7.0 13 | WORKDIR /App 14 | COPY --from=build-env /App/out . 15 | 16 | # https://stackoverflow.com/a/67111195 17 | ENV DOTNET_HOSTBUILDER__RELOADCONFIGONCHANGE=false 18 | ENV ASPNETCORE_URLS="http://*:5165" 19 | 20 | ENTRYPOINT ["dotnet", "backend3.dll"] 21 | -------------------------------------------------------------------------------- /app/backend3/Program.cs: -------------------------------------------------------------------------------- 1 | var builder = WebApplication.CreateBuilder(args); 2 | var app = builder.Build(); 3 | 4 | app.MapGet("/rolldice", () => { 5 | return new Random().Next( 1, 6 ); 6 | }); 7 | 8 | app.Run(); 9 | -------------------------------------------------------------------------------- /app/backend3/Properties/launchSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "iisSettings": { 3 | "windowsAuthentication": false, 4 | "anonymousAuthentication": true, 5 | "iisExpress": { 6 | "applicationUrl": "http://localhost:33014", 7 | "sslPort": 0 8 | } 9 | }, 10 | "profiles": { 11 | "http": { 12 | "commandName": "Project", 13 | "dotnetRunMessages": true, 14 | "launchBrowser": true, 15 | "applicationUrl": "http://localhost:5165", 16 | "environmentVariables": { 17 | "ASPNETCORE_ENVIRONMENT": "Development" 18 | } 19 | }, 20 | "IIS Express": { 21 | "commandName": "IISExpress", 22 | "launchBrowser": true, 23 | "environmentVariables": { 24 | "ASPNETCORE_ENVIRONMENT": "Development" 25 | } 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /app/backend3/appsettings.Development.json: -------------------------------------------------------------------------------- 1 | { 2 | "Logging": { 3 | "LogLevel": { 4 | "Default": "Information", 5 | "Microsoft.AspNetCore": "Warning" 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /app/backend3/appsettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "Logging": { 3 | "LogLevel": { 4 | "Default": "Information", 5 | "Microsoft.AspNetCore": "Warning" 6 | } 7 | }, 8 | "AllowedHosts": "*" 9 | } 10 | -------------------------------------------------------------------------------- /app/backend3/backend3.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net7.0 5 | enable 6 | enable 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /app/backend4-no-instrumentation/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.22 as builder 2 | 3 | WORKDIR /src 4 | 5 | COPY . /src 6 | 7 | RUN CGO_ENABLED=0 go build -v -o /app 8 | 9 | FROM scratch 10 | 11 | COPY --from=0 /app /app 12 | 13 | EXPOSE 8080 14 | 15 | CMD ["/app"] 16 | -------------------------------------------------------------------------------- /app/backend4-no-instrumentation/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/kubecon-eu-2024/backend 2 | 3 | go 1.22.0 4 | 5 | require github.com/prometheus/client_golang v1.19.0 6 | 7 | require ( 8 | github.com/beorn7/perks v1.0.1 // indirect 9 | github.com/cespare/xxhash/v2 v2.2.0 // indirect 10 | github.com/prometheus/client_model v0.5.0 // indirect 11 | github.com/prometheus/common v0.48.0 // indirect 12 | github.com/prometheus/procfs v0.12.0 // indirect 13 | golang.org/x/sys v0.17.0 // indirect 14 | google.golang.org/protobuf v1.32.0 // indirect 15 | ) 16 | -------------------------------------------------------------------------------- /app/backend4-no-instrumentation/go.sum: -------------------------------------------------------------------------------- 1 | github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 2 | github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 3 | github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= 4 | github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 5 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 6 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 7 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 8 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 9 | github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= 10 | github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= 11 | github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= 12 | github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= 13 | github.com/prometheus/common v0.48.0 h1:QO8U2CdOzSn1BBsmXJXduaaW+dY/5QLjfB8svtSzKKE= 14 | github.com/prometheus/common v0.48.0/go.mod h1:0/KsvlIEfPQCQ5I2iNSAWKPZziNCvRs5EC6ILDTlAPc= 15 | github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= 16 | github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= 17 | golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= 18 | golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 19 | google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= 20 | google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= 21 | -------------------------------------------------------------------------------- /app/backend4-no-instrumentation/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "crypto/sha256" 6 | "fmt" 7 | "math/rand" 8 | "net/http" 9 | "os" 10 | "strconv" 11 | "time" 12 | 13 | "github.com/prometheus/client_golang/prometheus" 14 | "github.com/prometheus/client_golang/prometheus/promhttp" 15 | ) 16 | 17 | var ( 18 | rollCounter = prometheus.NewCounter( 19 | prometheus.CounterOpts{ 20 | Name: "dice_roll_count", 21 | Help: "How often the dice was rolled", 22 | }, 23 | ) 24 | 25 | numbersCounter = prometheus.NewCounterVec( 26 | prometheus.CounterOpts{ 27 | Name: "dice_numbers_count", 28 | Help: "How often each number of the dice was rolled", 29 | }, 30 | []string{"number"}, 31 | ) 32 | ) 33 | 34 | func init() { 35 | prometheus.MustRegister(rollCounter) 36 | prometheus.MustRegister(numbersCounter) 37 | } 38 | 39 | func main() { 40 | v, ok := os.LookupEnv("RATE_ERROR") 41 | if !ok { 42 | v = "0" 43 | } 44 | rateError, err := strconv.Atoi(v) 45 | if err != nil { 46 | panic(err) 47 | } 48 | 49 | v, ok = os.LookupEnv("RATE_HIGH_DELAY") 50 | if !ok { 51 | v = "0" 52 | } 53 | rateDelay, err := strconv.Atoi(v) 54 | if err != nil { 55 | panic(err) 56 | } 57 | 58 | mux := http.NewServeMux() 59 | 60 | mux.HandleFunc("GET /rolldice", func(w http.ResponseWriter, r *http.Request) { 61 | player := "Anonymous player" 62 | if p := r.URL.Query().Get("player"); p != "" { 63 | player = p 64 | } 65 | 66 | max := 8 67 | if fmt.Sprintf("%x", sha256.Sum256([]byte(player))) == "f4b7c19317c929d2a34297d6229defe5262fa556ef654b600fc98f02c6d87fdc" { 68 | max = 8 69 | } else { 70 | max = 6 71 | } 72 | result := doRoll(r.Context(), max) 73 | causeDelay(r.Context(), rateDelay) 74 | if err := causeError(r.Context(), rateError); err != nil { 75 | w.WriteHeader(http.StatusInternalServerError) 76 | return 77 | } 78 | resStr := strconv.Itoa(result) 79 | rollCounter.Inc() 80 | numbersCounter.WithLabelValues(resStr).Inc() 81 | if _, err := w.Write([]byte(resStr)); err != nil { 82 | w.WriteHeader(http.StatusInternalServerError) 83 | } 84 | 85 | }) 86 | 87 | mux.HandleFunc("GET /metrics", promhttp.Handler().ServeHTTP) 88 | srv := &http.Server{ 89 | Addr: "0.0.0.0:5165", 90 | Handler: mux, 91 | } 92 | 93 | if err := srv.ListenAndServe(); err != nil { 94 | panic(err) 95 | } 96 | } 97 | 98 | func causeError(ctx context.Context, rate int) error { 99 | randomNumber := rand.Intn(100) 100 | if randomNumber < rate { 101 | err := fmt.Errorf("number(%d)) < rate(%d)", randomNumber, rate) 102 | return err 103 | } 104 | return nil 105 | } 106 | 107 | func causeDelay(ctx context.Context, rate int) { 108 | randomNumber := rand.Intn(100) 109 | if randomNumber < rate { 110 | time.Sleep(time.Duration(2+rand.Intn(3)) * time.Second) 111 | } 112 | } 113 | 114 | func doRoll(_ context.Context, max int) int { 115 | return rand.Intn(max) + 1 116 | } 117 | -------------------------------------------------------------------------------- /app/backend4/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.22 as builder 2 | 3 | WORKDIR /src 4 | 5 | COPY . /src 6 | 7 | RUN CGO_ENABLED=0 go build -v -o /app 8 | 9 | FROM scratch 10 | 11 | COPY --from=0 /app /app 12 | 13 | EXPOSE 8080 14 | 15 | CMD ["/app"] 16 | -------------------------------------------------------------------------------- /app/backend4/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/kubecon-eu-2024/backend 2 | 3 | go 1.22.0 4 | 5 | require ( 6 | github.com/prometheus/client_golang v1.19.0 7 | go.opentelemetry.io/otel v1.24.0 8 | go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.24.0 9 | go.opentelemetry.io/otel/sdk v1.24.0 10 | go.opentelemetry.io/otel/trace v1.24.0 11 | ) 12 | 13 | require ( 14 | github.com/beorn7/perks v1.0.1 // indirect 15 | github.com/cenkalti/backoff/v4 v4.2.1 // indirect 16 | github.com/cespare/xxhash/v2 v2.2.0 // indirect 17 | github.com/felixge/httpsnoop v1.0.4 // indirect 18 | github.com/go-logr/logr v1.4.1 // indirect 19 | github.com/go-logr/stdr v1.2.2 // indirect 20 | github.com/golang/protobuf v1.5.3 // indirect 21 | github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0 // indirect 22 | github.com/prometheus/client_model v0.5.0 // indirect 23 | github.com/prometheus/common v0.48.0 // indirect 24 | github.com/prometheus/procfs v0.12.0 // indirect 25 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect 26 | go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.24.0 // indirect 27 | go.opentelemetry.io/otel/metric v1.24.0 // indirect 28 | go.opentelemetry.io/proto/otlp v1.1.0 // indirect 29 | golang.org/x/net v0.20.0 // indirect 30 | golang.org/x/sys v0.17.0 // indirect 31 | golang.org/x/text v0.14.0 // indirect 32 | google.golang.org/genproto/googleapis/api v0.0.0-20240123012728-ef4313101c80 // indirect 33 | google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 // indirect 34 | google.golang.org/grpc v1.62.1 // indirect 35 | google.golang.org/protobuf v1.32.0 // indirect 36 | ) 37 | -------------------------------------------------------------------------------- /app/backend4/go.sum: -------------------------------------------------------------------------------- 1 | github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 2 | github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 3 | github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= 4 | github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= 5 | github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= 6 | github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 7 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 8 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 9 | github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= 10 | github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= 11 | github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= 12 | github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= 13 | github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 14 | github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= 15 | github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= 16 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= 17 | github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= 18 | github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= 19 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 20 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 21 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 22 | github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0 h1:Wqo399gCIufwto+VfwCSvsnfGpF/w5E9CNxSwbpD6No= 23 | github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0/go.mod h1:qmOFXW2epJhM0qSnUUYpldc7gVz2KMQwJ/QYCDIa7XU= 24 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 25 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 26 | github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= 27 | github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= 28 | github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= 29 | github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= 30 | github.com/prometheus/common v0.48.0 h1:QO8U2CdOzSn1BBsmXJXduaaW+dY/5QLjfB8svtSzKKE= 31 | github.com/prometheus/common v0.48.0/go.mod h1:0/KsvlIEfPQCQ5I2iNSAWKPZziNCvRs5EC6ILDTlAPc= 32 | github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= 33 | github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= 34 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= 35 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 36 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 h1:jq9TW8u3so/bN+JPT166wjOI6/vQPF6Xe7nMNIltagk= 37 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0/go.mod h1:p8pYQP+m5XfbZm9fxtSKAbM6oIllS7s2AfxrChvc7iw= 38 | go.opentelemetry.io/otel v1.24.0 h1:0LAOdjNmQeSTzGBzduGe/rU4tZhMwL5rWgtp9Ku5Jfo= 39 | go.opentelemetry.io/otel v1.24.0/go.mod h1:W7b9Ozg4nkF5tWI5zsXkaKKDjdVjpD4oAt9Qi/MArHo= 40 | go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.24.0 h1:t6wl9SPayj+c7lEIFgm4ooDBZVb01IhLB4InpomhRw8= 41 | go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.24.0/go.mod h1:iSDOcsnSA5INXzZtwaBPrKp/lWu/V14Dd+llD0oI2EA= 42 | go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.24.0 h1:Mw5xcxMwlqoJd97vwPxA8isEaIoxsta9/Q51+TTJLGE= 43 | go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.24.0/go.mod h1:CQNu9bj7o7mC6U7+CA/schKEYakYXWr79ucDHTMGhCM= 44 | go.opentelemetry.io/otel/metric v1.24.0 h1:6EhoGWWK28x1fbpA4tYTOWBkPefTDQnb8WSGXlc88kI= 45 | go.opentelemetry.io/otel/metric v1.24.0/go.mod h1:VYhLe1rFfxuTXLgj4CBiyz+9WYBA8pNGJgDcSFRKBco= 46 | go.opentelemetry.io/otel/sdk v1.24.0 h1:YMPPDNymmQN3ZgczicBY3B6sf9n62Dlj9pWD3ucgoDw= 47 | go.opentelemetry.io/otel/sdk v1.24.0/go.mod h1:KVrIYw6tEubO9E96HQpcmpTKDVn9gdv35HoYiQWGDFg= 48 | go.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI= 49 | go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU= 50 | go.opentelemetry.io/proto/otlp v1.1.0 h1:2Di21piLrCqJ3U3eXGCTPHE9R8Nh+0uglSnOyxikMeI= 51 | go.opentelemetry.io/proto/otlp v1.1.0/go.mod h1:GpBHCBWiqvVLDqmHZsoMM3C5ySeKTC7ej/RNTae6MdY= 52 | go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= 53 | go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= 54 | golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= 55 | golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= 56 | golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= 57 | golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 58 | golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= 59 | golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= 60 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 61 | google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80 h1:KAeGQVN3M9nD0/bQXnr/ClcEMJ968gUXJQ9pwfSynuQ= 62 | google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80/go.mod h1:cc8bqMqtv9gMOr0zHg2Vzff5ULhhL2IXP4sbcn32Dro= 63 | google.golang.org/genproto/googleapis/api v0.0.0-20240123012728-ef4313101c80 h1:Lj5rbfG876hIAYFjqiJnPHfhXbv+nzTWfm04Fg/XSVU= 64 | google.golang.org/genproto/googleapis/api v0.0.0-20240123012728-ef4313101c80/go.mod h1:4jWUdICTdgc3Ibxmr8nAJiiLHwQBY0UI0XZcEMaFKaA= 65 | google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 h1:AjyfHzEPEFp/NpvfN5g+KDla3EMojjhRVZc1i7cj+oM= 66 | google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80/go.mod h1:PAREbraiVEVGVdTZsVWjSbbTtSyGbAgIIvni8a8CD5s= 67 | google.golang.org/grpc v1.62.1 h1:B4n+nfKzOICUXMgyrNd19h/I9oH0L1pizfk1d4zSgTk= 68 | google.golang.org/grpc v1.62.1/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE= 69 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= 70 | google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= 71 | google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= 72 | google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= 73 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 74 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 75 | -------------------------------------------------------------------------------- /app/backend4/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "crypto/sha256" 6 | "fmt" 7 | "math/rand" 8 | "net/http" 9 | "os" 10 | "strconv" 11 | "strings" 12 | "time" 13 | 14 | "github.com/prometheus/client_golang/prometheus" 15 | "github.com/prometheus/client_golang/prometheus/promhttp" 16 | "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" 17 | "go.opentelemetry.io/otel" 18 | "go.opentelemetry.io/otel/attribute" 19 | "go.opentelemetry.io/otel/codes" 20 | "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" 21 | sdktrace "go.opentelemetry.io/otel/sdk/trace" 22 | "go.opentelemetry.io/otel/trace" 23 | ) 24 | 25 | var tracer = otel.GetTracerProvider().Tracer("github.com/kubecon-eu-2024/backend") 26 | 27 | var ( 28 | rollCounter = prometheus.NewCounter( 29 | prometheus.CounterOpts{ 30 | Name: "dice_roll_count", 31 | Help: "How often the dice was rolled", 32 | }, 33 | ) 34 | 35 | numbersCounter = prometheus.NewCounterVec( 36 | prometheus.CounterOpts{ 37 | Name: "dice_numbers_count", 38 | Help: "How often each number of the dice was rolled", 39 | }, 40 | []string{"number"}, 41 | ) 42 | ) 43 | 44 | func init() { 45 | prometheus.MustRegister(rollCounter) 46 | prometheus.MustRegister(numbersCounter) 47 | } 48 | 49 | func main() { 50 | otelExporter, err := otlptracegrpc.New(context.Background()) 51 | if err != nil { 52 | fmt.Printf("failed to create trace exporter: %s\n", err) 53 | os.Exit(1) 54 | } 55 | tp := sdktrace.NewTracerProvider(sdktrace.WithBatcher(otelExporter)) 56 | otel.SetTracerProvider(tp) 57 | 58 | v, ok := os.LookupEnv("RATE_ERROR") 59 | if !ok { 60 | v = "0" 61 | } 62 | rateError, err := strconv.Atoi(v) 63 | if err != nil { 64 | panic(err) 65 | } 66 | 67 | v, ok = os.LookupEnv("RATE_HIGH_DELAY") 68 | if !ok { 69 | v = "0" 70 | } 71 | rateDelay, err := strconv.Atoi(v) 72 | if err != nil { 73 | panic(err) 74 | } 75 | 76 | mux := http.NewServeMux() 77 | 78 | registerHandleFunc := func(pattern string, h http.HandlerFunc) { 79 | route := strings.Split(pattern, " ") 80 | mux.Handle(pattern, otelhttp.NewHandler(otelhttp.WithRouteTag(route[len(route)-1], h), pattern)) 81 | } 82 | 83 | registerHandleFunc("GET /rolldice", func(w http.ResponseWriter, r *http.Request) { 84 | player := "Anonymous player" 85 | if p := r.URL.Query().Get("player"); p != "" { 86 | player = p 87 | } 88 | 89 | trace.SpanFromContext(r.Context()).AddEvent("determine player", trace.WithAttributes(attribute.String("player.name", player))) 90 | max := 8 91 | if fmt.Sprintf("%x", sha256.Sum256([]byte(player))) == "f4b7c19317c929d2a34297d6229defe5262fa556ef654b600fc98f02c6d87fdc" { 92 | max = 8 93 | } else { 94 | max = 6 95 | } 96 | result := doRoll(r.Context(), max) 97 | causeDelay(r.Context(), rateDelay) 98 | if err := causeError(r.Context(), rateError); err != nil { 99 | w.WriteHeader(http.StatusInternalServerError) 100 | return 101 | } 102 | resStr := strconv.Itoa(result) 103 | rollCounter.Inc() 104 | numbersCounter.WithLabelValues(resStr).Inc() 105 | if _, err := w.Write([]byte(resStr)); err != nil { 106 | w.WriteHeader(http.StatusInternalServerError) 107 | } 108 | 109 | }) 110 | 111 | registerHandleFunc("GET /metrics", promhttp.Handler().ServeHTTP) 112 | srv := &http.Server{ 113 | Addr: "0.0.0.0:5165", 114 | Handler: mux, 115 | } 116 | 117 | if err := srv.ListenAndServe(); err != nil { 118 | panic(err) 119 | } 120 | } 121 | 122 | func causeError(ctx context.Context, rate int) error { 123 | _, span := tracer.Start(ctx, "causeError") 124 | defer span.End() 125 | 126 | randomNumber := rand.Intn(100) 127 | span.AddEvent("roll", trace.WithAttributes(attribute.Int("number", randomNumber))) 128 | if randomNumber < rate { 129 | err := fmt.Errorf("number(%d)) < rate(%d)", randomNumber, rate) 130 | span.RecordError(err) 131 | span.SetStatus(codes.Error, "some error occured") 132 | return err 133 | } 134 | return nil 135 | } 136 | 137 | func causeDelay(ctx context.Context, rate int) { 138 | _, span := tracer.Start(ctx, "causeDelay") 139 | defer span.End() 140 | randomNumber := rand.Intn(100) 141 | span.AddEvent("roll", trace.WithAttributes(attribute.Int("number", randomNumber))) 142 | if randomNumber < rate { 143 | time.Sleep(time.Duration(2+rand.Intn(3)) * time.Second) 144 | } 145 | } 146 | 147 | func doRoll(_ context.Context, max int) int { 148 | return rand.Intn(max) + 1 149 | } 150 | -------------------------------------------------------------------------------- /app/collector-docker.yaml: -------------------------------------------------------------------------------- 1 | # docker run --rm -it --name=otelcol -p 4317:4317 -v ./app:/tmp ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector:0.94.0 --config /tmp/collector-docker.yaml 2 | receivers: 3 | otlp: 4 | protocols: 5 | grpc: 6 | http: 7 | 8 | processors: 9 | 10 | exporters: 11 | debug: 12 | verbosity: detailed 13 | 14 | extensions: 15 | health_check: 16 | pprof: 17 | zpages: 18 | 19 | service: 20 | extensions: [zpages] 21 | telemetry: 22 | logs: 23 | level: info 24 | pipelines: 25 | traces: 26 | receivers: [otlp] 27 | processors: [] 28 | exporters: [debug] 29 | metrics: 30 | receivers: [otlp] 31 | processors: [] 32 | exporters: [debug] 33 | -------------------------------------------------------------------------------- /app/frontend/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | -------------------------------------------------------------------------------- /app/frontend/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:18-alpine 2 | 3 | ENV NODE_ENV=production 4 | WORKDIR /app 5 | 6 | COPY ["package.json", "package-lock.json*", "./"] 7 | 8 | RUN npm install --production 9 | 10 | COPY ["index.js", "instrument.js", "run.sh", "./"] 11 | 12 | EXPOSE 4000 13 | 14 | CMD [ "./run.sh" ] 15 | -------------------------------------------------------------------------------- /app/frontend/index.js: -------------------------------------------------------------------------------- 1 | /* 2 | Put your NodeSDK initialization here. 3 | */ 4 | const { context, trace, metrics, ValueType } = require('@opentelemetry/api'); 5 | const http = require("http"); 6 | const app = require("express")(); 7 | const pino = require('pino-http')() 8 | 9 | // not included by default 10 | //var otelsdkinit = require('./instrument.js'); 11 | 12 | app.use(pino) 13 | 14 | const port = process.env.FRONTEND_PORT || 4000; 15 | const backend1url = 16 | process.env.BACKEND1_URL || "http://localhost:5165/rolldice"; 17 | const backend2url = 18 | process.env.BACKEND2_URL || "http://localhost:5000/rolldice"; 19 | 20 | const myMeter = metrics.getMeter("app-meter"); 21 | const requestCounter = myMeter.createCounter('request_total', { 22 | description: "Counter of requests", 23 | valueType: ValueType.INT 24 | }) 25 | const gameCounter = myMeter.createUpDownCounter('app_games_total', { 26 | description: "A counter of how often the game has been played", 27 | valueType: ValueType.INT 28 | }) 29 | const winCounter = myMeter.createUpDownCounter('app_wins_total', { 30 | description: "A counter per player who has won", 31 | valueType: ValueType.INT 32 | }) 33 | 34 | app.get("/", (req, res) => { 35 | requestCounter.add(1); 36 | const { player1, player2 } = Object.assign({player1: "Player 1", player2: "Player 2"}, req.query) 37 | if(player1 == 'Player 1') { 38 | req.log.info('Player 1 prefers to stay anonymous.') 39 | } 40 | if(player2 == 'Player 2') { 41 | req.log.info('Player 2 prefers to stay anonymous.') 42 | } 43 | span = trace.getSpan(context.active()) 44 | if(span) { 45 | span.setAttribute('app.player1', player1) 46 | // TODO(tracing): Add an attribute for player2 47 | } 48 | 49 | const p1 = new Promise((resolve, reject) => { 50 | http.get(`${backend1url}?player=${player1}`, (response) => { 51 | let data = []; 52 | 53 | response.on("data", (chunk) => { 54 | data.push(chunk); 55 | }); 56 | response.on("end", () => { 57 | try { 58 | const result = JSON.parse(Buffer.concat(data).toString()); 59 | res.write("Player 1 rolls: " + result + "\n"); 60 | resolve(result); 61 | } catch(error) { 62 | reject(error) 63 | } 64 | }); 65 | }).on('error', (error) => { 66 | req.log.error("Backend1 is not available.") 67 | reject(error) 68 | }).end() 69 | }); 70 | 71 | const p2 = new Promise((resolve, reject) => { 72 | http.get(`${backend2url}?player=${player2}`, (response) => { 73 | let data = []; 74 | 75 | response.on("data", (chunk) => { 76 | data.push(chunk); 77 | }); 78 | response.on("end", () => { 79 | try { 80 | const result = Buffer.concat(data).toString(); 81 | res.write("Player 2 rolls: " + result + "\n"); 82 | resolve(result); 83 | } catch(error) { 84 | reject(error) 85 | } 86 | }); 87 | }).on('error', (error) => { 88 | req.log.error("Backend2 is not available.") 89 | reject(error) 90 | }).end() 91 | }); 92 | 93 | Promise.all([p1, p2]).then(([roll1, roll2]) => { 94 | let winner = 'Nobody' 95 | let winnerRolled = 0 96 | if (roll1 > roll2) { 97 | winner = player1 98 | winnerRolled = roll1 99 | } else if (roll2 > roll1) { 100 | winner = player2 101 | winnerRolled = roll2 102 | } 103 | // TODO(tracing): Add the winner as a span attribute 104 | 105 | // Count the total number of games 106 | gameCounter.add(1); 107 | 108 | // TODO (metrics): count how often each player wins - winCounter 109 | // use app.winner tag 110 | // Count how often each player wins 111 | 112 | // Add counters for numbers rolled and/or for players who played 113 | 114 | res.end(`${winner} wins`); 115 | }).catch(error => { 116 | try { 117 | res.sendStatus(500).end() 118 | } catch(e) { 119 | // If sending the error fails, the service crashes, we want to avoid that! 120 | } 121 | }); 122 | }); 123 | 124 | app.listen(port, () => { 125 | pino.logger.info(`Example app listening on port ${port}`); 126 | }); 127 | -------------------------------------------------------------------------------- /app/frontend/instrument.js: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * 4 | * 5 | * Use this file as a reference for your own instrumentation, but try to figure it out yourself 6 | * 7 | * 8 | * 9 | */ 10 | const opentelemetry = require("@opentelemetry/sdk-node"); 11 | const { getNodeAutoInstrumentations } = require("@opentelemetry/auto-instrumentations-node"); 12 | const { OTLPTraceExporter } = require("@opentelemetry/exporter-trace-otlp-grpc"); 13 | const otlpGrpc = require('@opentelemetry/exporter-metrics-otlp-grpc'); 14 | const otlpHttp = require('@opentelemetry/exporter-metrics-otlp-http'); 15 | 16 | const { PeriodicExportingMetricReader, MeterProvider, ConsoleMetricExporter } = require('@opentelemetry/sdk-metrics') 17 | 18 | const { Resource } = require('@opentelemetry/resources'); 19 | const { SemanticResourceAttributes } = require('@opentelemetry/semantic-conventions'); 20 | 21 | const sdk = new opentelemetry.NodeSDK({ 22 | resource: new Resource({ 23 | // [SemanticResourceAttributes.SERVICE_NAME]: 'frontend', 24 | // [SemanticResourceAttributes.SERVICE_VERSION]: '0.1.0', 25 | ["my-org-service-version"]: '2.0.1', 26 | }), 27 | traceExporter: new OTLPTraceExporter(), 28 | metricReader: new PeriodicExportingMetricReader({ 29 | exporter: new ConsoleMetricExporter() 30 | // by default send data to OTLP via gRPC 31 | // exporter: new otlpGrpc.OTLPMetricExporter(), 32 | // for sending data to Prometheus 33 | // exporter: new otlpHttp.OTLPMetricExporter({ 34 | // // NOTE: p8s expose /v1/metrics under /api/v1/otlp 35 | // url: "http://localhost:9090/api/v1/otlp/v1/metrics", 36 | // }), 37 | }), 38 | instrumentations: [getNodeAutoInstrumentations()] 39 | }); 40 | 41 | sdk.start() 42 | -------------------------------------------------------------------------------- /app/frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "@opentelemetry/api": "^1.4.1", 4 | "@opentelemetry/auto-instrumentations-node": "^0.39.4", 5 | "@opentelemetry/exporter-metrics-otlp-grpc": "^0.44.0", 6 | "@opentelemetry/exporter-metrics-otlp-http": "^0.43.0", 7 | "@opentelemetry/exporter-prometheus": "^0.44.0", 8 | "@opentelemetry/exporter-trace-otlp-grpc": "^0.44.0", 9 | "@opentelemetry/sdk-metrics": "^1.17.1", 10 | "@opentelemetry/sdk-node": "^0.44.0", 11 | "express": "^4.18.2", 12 | "pino-http": "^8.3.3" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /app/frontend/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [[ "${OTEL_INSTRUMENTATION_ENABLED}" == "true" ]] ; then 4 | echo 'Run with instrumentation' 5 | env OTEL_SERVICE_NAME=${OTEL_SERVICE_NAME:-frontend} \ 6 | OTEL_TRACES_EXPORTER=${OTEL_TRACES_EXPORTER:-console} \ 7 | OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER:-console} \ 8 | OTEL_LOGS_EXPORTER=${OTEL_LOGS_EXPORTER:-console} \ 9 | node -r ./instrument.js index.js 10 | else 11 | node index.js 12 | fi -------------------------------------------------------------------------------- /app/instrumentation-head-sampling.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: opentelemetry.io/v1alpha1 2 | kind: Instrumentation 3 | metadata: 4 | name: my-instrumentation 5 | namespace: tutorial-application 6 | spec: 7 | exporter: 8 | endpoint: http://otel-collector.observability-backend.svc.cluster.local:4317 9 | propagators: 10 | - tracecontext 11 | - baggage 12 | - b3 13 | sampler: 14 | type: parentbased_traceidratio 15 | argument: "0.5" 16 | resource: 17 | addK8sUIDAttributes: false 18 | python: 19 | env: 20 | # Required if endpoint is set to 4317. 21 | # Python autoinstrumentation uses http/proto by default 22 | # so data must be sent to 4318 instead of 4317. 23 | - name: OTEL_EXPORTER_OTLP_ENDPOINT 24 | value: http://otel-collector.observability-backend.svc.cluster.local:4318 25 | java: 26 | env: 27 | - name: OTEL_INSTRUMENTATION_METHODS_INCLUDE 28 | value: io.opentelemetry.dice.DiceApplication[main]; 29 | - name: OTEL_INSTRUMENTATION_HTTP_SERVER_CAPTURE_RESPONSE_HEADERS 30 | value: Content-Type,Date 31 | # - name: OTEL_INSTRUMENTATION_TOMCAT_ENABLED 32 | # value: "false" 33 | # - name: OTEL_INSTRUMENTATION_SERVLET_ENABLED 34 | # value: "false" 35 | -------------------------------------------------------------------------------- /app/instrumentation-java-custom-config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: opentelemetry.io/v1alpha1 2 | kind: Instrumentation 3 | metadata: 4 | name: my-instrumentation 5 | namespace: tutorial-application 6 | spec: 7 | exporter: 8 | endpoint: http://otel-collector.observability-backend.svc.cluster.local:4317 9 | propagators: 10 | - tracecontext 11 | - baggage 12 | - b3 13 | sampler: 14 | type: parentbased_traceidratio 15 | argument: "1" 16 | resource: 17 | addK8sUIDAttributes: false 18 | python: 19 | env: 20 | # Required if endpoint is set to 4317. 21 | # Python autoinstrumentation uses http/proto by default 22 | # so data must be sent to 4318 instead of 4317. 23 | - name: OTEL_EXPORTER_OTLP_ENDPOINT 24 | value: http://otel-collector.observability-backend.svc.cluster.local:4318 25 | java: 26 | env: 27 | - name: OTEL_INSTRUMENTATION_METHODS_INCLUDE 28 | value: io.opentelemetry.dice.DiceApplication[main]; 29 | - name: OTEL_INSTRUMENTATION_HTTP_SERVER_CAPTURE_RESPONSE_HEADERS 30 | value: Content-Type,Date 31 | # - name: OTEL_INSTRUMENTATION_TOMCAT_ENABLED 32 | # value: "false" 33 | # - name: OTEL_INSTRUMENTATION_SERVLET_ENABLED 34 | # value: "false" 35 | -------------------------------------------------------------------------------- /app/instrumentation-replace-backend2.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: backend2-deployment 6 | namespace: tutorial-application 7 | labels: 8 | app: backend2 9 | spec: 10 | replicas: 1 11 | selector: 12 | matchLabels: 13 | app: backend2 14 | template: 15 | metadata: 16 | labels: 17 | app: backend2 18 | annotations: 19 | prometheus.io/scrape: "true" 20 | instrumentation.opentelemetry.io/inject-sdk: "true" 21 | spec: 22 | containers: 23 | - name: backend2 24 | image: ghcr.io/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial-backend4:latest 25 | ports: 26 | - containerPort: 5165 27 | env: 28 | - name: RATE_ERROR 29 | value: "20" 30 | - name: RATE_HIGH_DELAY 31 | value: "20" 32 | # NOTE: alternative to instrumentation annotation 33 | - name: OTEL_EXPORTER_OTLP_ENDPOINT 34 | value: "http://otel-collector.observability-backend.svc.cluster.local:4317" 35 | - name: OTEL_SERVICE_NAME 36 | value: "go-backend" 37 | -------------------------------------------------------------------------------- /app/instrumentation.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: opentelemetry.io/v1alpha1 2 | kind: Instrumentation 3 | metadata: 4 | name: my-instrumentation 5 | namespace: tutorial-application 6 | spec: 7 | exporter: 8 | endpoint: http://otel-collector.observability-backend.svc.cluster.local:4317 9 | propagators: 10 | - tracecontext 11 | - baggage 12 | - b3 13 | sampler: 14 | type: parentbased_traceidratio 15 | argument: "1" 16 | resource: 17 | addK8sUIDAttributes: true 18 | python: 19 | env: 20 | # Required if endpoint is set to 4317. 21 | # Python autoinstrumentation uses http/proto by default 22 | # so data must be sent to 4318 instead of 4317. 23 | - name: OTEL_EXPORTER_OTLP_ENDPOINT 24 | value: http://otel-collector.observability-backend.svc.cluster.local:4318 25 | java: 26 | env: 27 | - name: OTEL_LOGS_EXPORTER 28 | value: otlp 29 | -------------------------------------------------------------------------------- /app/k8s.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: tutorial-application 5 | annotations: 6 | kubecon-tutorial: otel-metrics 7 | --- 8 | apiVersion: apps/v1 9 | kind: Deployment 10 | metadata: 11 | name: backend1-deployment 12 | namespace: tutorial-application 13 | labels: 14 | app: backend1 15 | spec: 16 | replicas: 1 17 | selector: 18 | matchLabels: 19 | app: backend1 20 | template: 21 | metadata: 22 | labels: 23 | app: backend1 24 | annotations: 25 | prometheus.io/scrape: "true" 26 | spec: 27 | containers: 28 | - name: backend1 29 | image: ghcr.io/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial-backend1:latest 30 | imagePullPolicy: IfNotPresent 31 | ports: 32 | - containerPort: 5000 33 | name: service 34 | --- 35 | apiVersion: v1 36 | kind: Service 37 | metadata: 38 | name: backend1-service 39 | namespace: tutorial-application 40 | labels: 41 | app: backend1 42 | annotations: 43 | prometheus.io/scrape: "true" 44 | spec: 45 | ports: 46 | - name: http 47 | port: 5000 48 | targetPort: 5000 49 | selector: 50 | app: backend1 51 | --- 52 | apiVersion: apps/v1 53 | kind: Deployment 54 | metadata: 55 | name: backend2-deployment 56 | namespace: tutorial-application 57 | labels: 58 | app: backend2 59 | spec: 60 | replicas: 1 61 | selector: 62 | matchLabels: 63 | app: backend2 64 | template: 65 | metadata: 66 | labels: 67 | app: backend2 68 | annotations: 69 | prometheus.io/scrape: "true" 70 | spec: 71 | containers: 72 | - name: backend2 73 | image: ghcr.io/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial-backend2:latest 74 | imagePullPolicy: IfNotPresent 75 | ports: 76 | - containerPort: 5165 77 | --- 78 | apiVersion: v1 79 | kind: Service 80 | metadata: 81 | name: backend2-service 82 | namespace: tutorial-application 83 | labels: 84 | app: backend2 85 | annotations: 86 | prometheus.io/scrape: "true" 87 | spec: 88 | ports: 89 | - name: http 90 | port: 5165 91 | targetPort: 5165 92 | selector: 93 | app: backend2 94 | --- 95 | apiVersion: apps/v1 96 | kind: Deployment 97 | metadata: 98 | name: frontend-deployment 99 | namespace: tutorial-application 100 | labels: 101 | app: frontend 102 | spec: 103 | replicas: 1 104 | selector: 105 | matchLabels: 106 | app: frontend 107 | template: 108 | metadata: 109 | labels: 110 | app: frontend 111 | annotations: 112 | prometheus.io/scrape: "true" 113 | spec: 114 | containers: 115 | - name: frontend 116 | image: ghcr.io/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial-frontend:latest 117 | imagePullPolicy: IfNotPresent 118 | env: 119 | - name: OTEL_INSTRUMENTATION_ENABLED 120 | value: "true" 121 | - name: BACKEND1_URL 122 | value: "http://backend1-service:5000/rolldice" 123 | - name: BACKEND2_URL 124 | value: "http://backend2-service:5165/rolldice" 125 | ports: 126 | - containerPort: 4000 127 | --- 128 | apiVersion: v1 129 | kind: Service 130 | metadata: 131 | name: frontend-service 132 | namespace: tutorial-application 133 | labels: 134 | app: frontend 135 | annotations: 136 | prometheus.io/scrape: "true" 137 | spec: 138 | ports: 139 | - name: http 140 | port: 4000 141 | targetPort: 4000 142 | selector: 143 | app: frontend 144 | --- 145 | #apiVersion: apps/v1 146 | #kind: Deployment 147 | #metadata: 148 | # name: loadgen-deployment 149 | # namespace: tutorial-application 150 | # labels: 151 | # app: loadgen 152 | #spec: 153 | # replicas: 1 154 | # selector: 155 | # matchLabels: 156 | # app: loadgen 157 | # template: 158 | # metadata: 159 | # labels: 160 | # app: loadgen 161 | # spec: 162 | # containers: 163 | # - name: frontend 164 | # image: ghcr.io/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial-loadgen:latest 165 | # imagePullPolicy: IfNotPresent 166 | # env: 167 | # - name: URL 168 | # value: "http://frontend-service:4000/" 169 | -------------------------------------------------------------------------------- /app/loadgen/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | RUN apk add --no-cache curl coreutils bash 3 | COPY run.sh . 4 | CMD ["./run.sh"] 5 | -------------------------------------------------------------------------------- /app/loadgen/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SLOWDOWN_IN_SECONDS=${SLOWDOWN_IN_SECONDS:-3} 4 | URL=${URL:-"http://frontend:4000/"} 5 | 6 | while true; 7 | do 8 | MAX=$(($(($RANDOM%30))+10)) 9 | 10 | for i in `seq 1 ${MAX}` 11 | do 12 | PLAYERS=(`echo -e "Pavol\nBenedikt\nYuri\nKristina\nSeverin" | shuf`) 13 | timeout 5 curl -s "${URL}?player1=${PLAYERS[1]}&player2=${PLAYERS[2]}" & 14 | sleep 1 15 | done 16 | 17 | wait 18 | 19 | sleep ${SLOWDOWN_IN_SECONDS} 20 | done 21 | -------------------------------------------------------------------------------- /app/otel-daemonset.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: opentelemetry.io/v1alpha1 3 | kind: OpenTelemetryCollector 4 | metadata: 5 | name: system-tracing 6 | namespace: kube-system 7 | spec: 8 | mode: daemonset 9 | hostNetwork: true 10 | config: | 11 | receivers: 12 | otlp: 13 | protocols: 14 | grpc: 15 | endpoint: 0.0.0.0:4317 16 | processors: 17 | 18 | exporters: 19 | otlp: 20 | endpoint: "otel-collector.observability-backend.svc.cluster.local:4317" 21 | tls: 22 | insecure: true 23 | service: 24 | pipelines: 25 | traces: 26 | receivers: [otlp] 27 | processors: [] 28 | exporters: [otlp] 29 | 30 | -------------------------------------------------------------------------------- /app/otel-env: -------------------------------------------------------------------------------- 1 | OTEL_INSTRUMENTATION_ENABLED=true 2 | -------------------------------------------------------------------------------- /app/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "app", 3 | "lockfileVersion": 2, 4 | "requires": true, 5 | "packages": {} 6 | } 7 | -------------------------------------------------------------------------------- /backend/01-backend.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: observability-backend 5 | --- 6 | apiVersion: v1 7 | data: 8 | prometheus.yml: |- 9 | global: 10 | scrape_interval: 15s 11 | evaluation_interval: 15s 12 | alerting: 13 | alertmanagers: 14 | - static_configs: 15 | - targets: 16 | # - alertmanager:9093 17 | rule_files: 18 | # - "first_rules.yml" 19 | # - "second_rules.yml" 20 | scrape_configs: 21 | - job_name: "prometheus" 22 | static_configs: 23 | - targets: ["localhost:9090"] 24 | kind: ConfigMap 25 | metadata: 26 | labels: 27 | name: prometheus 28 | name: prometheus-conf 29 | namespace: observability-backend 30 | --- 31 | apiVersion: apps/v1 32 | kind: Deployment 33 | metadata: 34 | labels: 35 | name: prometheus 36 | name: prometheus 37 | namespace: observability-backend 38 | spec: 39 | replicas: 1 40 | selector: 41 | matchLabels: 42 | app: prometheus 43 | name: prometheus 44 | template: 45 | metadata: 46 | labels: 47 | app: prometheus 48 | name: prometheus 49 | spec: 50 | containers: 51 | - args: 52 | - --config.file=/etc/prometheus/prometheus.yml 53 | - --storage.tsdb.path=/tmp/prometheus/metrics 54 | - --web.enable-remote-write-receiver 55 | - --enable-feature=otlp-write-receiver 56 | - --enable-feature=exemplar-storage 57 | image: quay.io/prometheus/prometheus:v2.47.2 58 | imagePullPolicy: IfNotPresent 59 | name: prometheus 60 | ports: 61 | - containerPort: 9090 62 | volumeMounts: 63 | - mountPath: /etc/prometheus/ 64 | name: prometheus-config-volume 65 | - mountPath: /prometheus/ 66 | name: prometheus-storage-volume 67 | volumes: 68 | - configMap: 69 | defaultMode: 420 70 | name: prometheus-conf 71 | name: prometheus-config-volume 72 | - name: prometheus-storage-volume 73 | --- 74 | apiVersion: v1 75 | kind: Service 76 | metadata: 77 | labels: 78 | name: prometheus 79 | name: prometheus 80 | namespace: observability-backend 81 | spec: 82 | ports: 83 | - name: prometheus-http-metrics 84 | port: 80 85 | targetPort: 9090 86 | selector: 87 | app: prometheus 88 | name: prometheus 89 | --- 90 | --- 91 | apiVersion: v1 92 | kind: ServiceAccount 93 | metadata: 94 | creationTimestamp: null 95 | labels: 96 | app: jaeger 97 | app.kubernetes.io/component: service-account 98 | app.kubernetes.io/instance: jaeger 99 | app.kubernetes.io/managed-by: jaeger-operator 100 | app.kubernetes.io/name: jaeger 101 | app.kubernetes.io/part-of: jaeger 102 | name: jaeger 103 | namespace: observability-backend 104 | --- 105 | apiVersion: v1 106 | data: 107 | sampling: '{"default_strategy":{"param":1,"type":"probabilistic"}}' 108 | kind: ConfigMap 109 | metadata: 110 | creationTimestamp: null 111 | labels: 112 | app: jaeger 113 | app.kubernetes.io/component: sampling-configuration 114 | app.kubernetes.io/instance: jaeger 115 | app.kubernetes.io/managed-by: jaeger-operator 116 | app.kubernetes.io/name: jaeger-sampling-configuration 117 | app.kubernetes.io/part-of: jaeger 118 | name: jaeger-sampling-configuration 119 | namespace: observability-backend 120 | --- 121 | apiVersion: v1 122 | kind: Service 123 | metadata: 124 | annotations: 125 | prometheus.io/scrape: "false" 126 | service.beta.openshift.io/serving-cert-secret-name: jaeger-collector-headless-tls 127 | creationTimestamp: null 128 | labels: 129 | app: jaeger 130 | app.kubernetes.io/component: service-collector 131 | app.kubernetes.io/instance: jaeger 132 | app.kubernetes.io/managed-by: jaeger-operator 133 | app.kubernetes.io/name: jaeger-collector 134 | app.kubernetes.io/part-of: jaeger 135 | name: jaeger-collector-headless 136 | namespace: observability-backend 137 | spec: 138 | clusterIP: None 139 | ports: 140 | - name: http-zipkin 141 | port: 9411 142 | targetPort: 0 143 | - name: grpc-jaeger 144 | port: 14250 145 | targetPort: 0 146 | - name: http-c-tchan-trft 147 | port: 14267 148 | targetPort: 0 149 | - name: http-c-binary-trft 150 | port: 14268 151 | targetPort: 0 152 | - name: admin-http 153 | port: 14269 154 | targetPort: 0 155 | - name: grpc-otlp 156 | port: 4317 157 | targetPort: 0 158 | - name: http-otlp 159 | port: 4318 160 | targetPort: 0 161 | selector: 162 | app: jaeger 163 | app.kubernetes.io/component: all-in-one 164 | app.kubernetes.io/instance: jaeger 165 | app.kubernetes.io/managed-by: jaeger-operator 166 | app.kubernetes.io/name: jaeger 167 | app.kubernetes.io/part-of: jaeger 168 | status: 169 | loadBalancer: {} 170 | --- 171 | apiVersion: v1 172 | kind: Service 173 | metadata: 174 | creationTimestamp: null 175 | labels: 176 | app: jaeger 177 | app.kubernetes.io/component: service-collector 178 | app.kubernetes.io/instance: jaeger 179 | app.kubernetes.io/managed-by: jaeger-operator 180 | app.kubernetes.io/name: jaeger-collector 181 | app.kubernetes.io/part-of: jaeger 182 | name: jaeger-collector 183 | namespace: observability-backend 184 | spec: 185 | ports: 186 | - name: http-zipkin 187 | port: 9411 188 | targetPort: 0 189 | - name: grpc-jaeger 190 | port: 14250 191 | targetPort: 0 192 | - name: http-c-tchan-trft 193 | port: 14267 194 | targetPort: 0 195 | - name: http-c-binary-trft 196 | port: 14268 197 | targetPort: 0 198 | - name: admin-http 199 | port: 14269 200 | targetPort: 0 201 | - name: grpc-otlp 202 | port: 4317 203 | targetPort: 0 204 | - name: http-otlp 205 | port: 4318 206 | targetPort: 0 207 | selector: 208 | app: jaeger 209 | app.kubernetes.io/component: all-in-one 210 | app.kubernetes.io/instance: jaeger 211 | app.kubernetes.io/managed-by: jaeger-operator 212 | app.kubernetes.io/name: jaeger 213 | app.kubernetes.io/part-of: jaeger 214 | type: ClusterIP 215 | status: 216 | loadBalancer: {} 217 | --- 218 | apiVersion: v1 219 | kind: Service 220 | metadata: 221 | creationTimestamp: null 222 | labels: 223 | app: jaeger 224 | app.kubernetes.io/component: service-query 225 | app.kubernetes.io/instance: jaeger 226 | app.kubernetes.io/managed-by: jaeger-operator 227 | app.kubernetes.io/name: jaeger-query 228 | app.kubernetes.io/part-of: jaeger 229 | name: jaeger-query 230 | namespace: observability-backend 231 | spec: 232 | ports: 233 | - name: http-query 234 | port: 16686 235 | targetPort: 16686 236 | - name: grpc-query 237 | port: 16685 238 | targetPort: 16685 239 | - name: admin-http 240 | port: 16687 241 | targetPort: 16687 242 | selector: 243 | app: jaeger 244 | app.kubernetes.io/component: all-in-one 245 | app.kubernetes.io/instance: jaeger 246 | app.kubernetes.io/managed-by: jaeger-operator 247 | app.kubernetes.io/name: jaeger 248 | app.kubernetes.io/part-of: jaeger 249 | type: ClusterIP 250 | status: 251 | loadBalancer: {} 252 | --- 253 | apiVersion: v1 254 | kind: Service 255 | metadata: 256 | creationTimestamp: null 257 | labels: 258 | app: jaeger 259 | app.kubernetes.io/component: service-agent 260 | app.kubernetes.io/instance: jaeger 261 | app.kubernetes.io/managed-by: jaeger-operator 262 | app.kubernetes.io/name: jaeger-agent 263 | app.kubernetes.io/part-of: jaeger 264 | name: jaeger-agent 265 | namespace: observability-backend 266 | spec: 267 | clusterIP: None 268 | ports: 269 | - name: zk-compact-trft 270 | port: 5775 271 | protocol: UDP 272 | targetPort: 0 273 | - name: http-config-rest 274 | port: 5778 275 | targetPort: 0 276 | - name: jg-compact-trft 277 | port: 6831 278 | protocol: UDP 279 | targetPort: 0 280 | - name: jg-binary-trft 281 | port: 6832 282 | protocol: UDP 283 | targetPort: 0 284 | - name: admin-http 285 | port: 14271 286 | targetPort: 0 287 | selector: 288 | app: jaeger 289 | app.kubernetes.io/component: all-in-one 290 | app.kubernetes.io/instance: jaeger 291 | app.kubernetes.io/managed-by: jaeger-operator 292 | app.kubernetes.io/name: jaeger 293 | app.kubernetes.io/part-of: jaeger 294 | status: 295 | loadBalancer: {} 296 | --- 297 | apiVersion: apps/v1 298 | kind: Deployment 299 | metadata: 300 | annotations: 301 | linkerd.io/inject: disabled 302 | prometheus.io/port: "14269" 303 | prometheus.io/scrape: "true" 304 | creationTimestamp: null 305 | labels: 306 | app: jaeger 307 | app.kubernetes.io/component: all-in-one 308 | app.kubernetes.io/instance: jaeger 309 | app.kubernetes.io/managed-by: jaeger-operator 310 | app.kubernetes.io/name: jaeger 311 | app.kubernetes.io/part-of: jaeger 312 | name: jaeger 313 | namespace: observability-backend 314 | spec: 315 | selector: 316 | matchLabels: 317 | app: jaeger 318 | app.kubernetes.io/component: all-in-one 319 | app.kubernetes.io/instance: jaeger 320 | app.kubernetes.io/managed-by: jaeger-operator 321 | app.kubernetes.io/name: jaeger 322 | app.kubernetes.io/part-of: jaeger 323 | strategy: 324 | type: Recreate 325 | template: 326 | metadata: 327 | annotations: 328 | linkerd.io/inject: disabled 329 | prometheus.io/port: "14269" 330 | prometheus.io/scrape: "true" 331 | sidecar.istio.io/inject: "false" 332 | creationTimestamp: null 333 | labels: 334 | app: jaeger 335 | app.kubernetes.io/component: all-in-one 336 | app.kubernetes.io/instance: jaeger 337 | app.kubernetes.io/managed-by: jaeger-operator 338 | app.kubernetes.io/name: jaeger 339 | app.kubernetes.io/part-of: jaeger 340 | spec: 341 | containers: 342 | - args: 343 | - --sampling.strategies-file=/etc/jaeger/sampling/sampling.json 344 | env: 345 | - name: SPAN_STORAGE_TYPE 346 | value: memory 347 | - name: METRICS_STORAGE_TYPE 348 | - name: COLLECTOR_ZIPKIN_HOST_PORT 349 | value: :9411 350 | - name: JAEGER_DISABLED 351 | value: "false" 352 | - name: COLLECTOR_OTLP_ENABLED 353 | value: "true" 354 | image: jaegertracing/all-in-one:1.54.0 355 | livenessProbe: 356 | failureThreshold: 5 357 | httpGet: 358 | path: / 359 | port: 14269 360 | initialDelaySeconds: 5 361 | periodSeconds: 15 362 | name: jaeger 363 | ports: 364 | - containerPort: 5775 365 | name: zk-compact-trft 366 | protocol: UDP 367 | - containerPort: 5778 368 | name: config-rest 369 | - containerPort: 6831 370 | name: jg-compact-trft 371 | protocol: UDP 372 | - containerPort: 6832 373 | name: jg-binary-trft 374 | protocol: UDP 375 | - containerPort: 9411 376 | name: zipkin 377 | - containerPort: 14267 378 | name: c-tchan-trft 379 | - containerPort: 14268 380 | name: c-binary-trft 381 | - containerPort: 16685 382 | name: grpc-query 383 | - containerPort: 16686 384 | name: query 385 | - containerPort: 14269 386 | name: admin-http 387 | - containerPort: 14250 388 | name: grpc 389 | - containerPort: 4317 390 | name: grpc-otlp 391 | - containerPort: 4318 392 | name: http-otlp 393 | readinessProbe: 394 | httpGet: 395 | path: / 396 | port: 14269 397 | initialDelaySeconds: 1 398 | resources: {} 399 | volumeMounts: 400 | - mountPath: /etc/jaeger/sampling 401 | name: jaeger-sampling-configuration-volume 402 | readOnly: true 403 | enableServiceLinks: false 404 | serviceAccountName: jaeger 405 | volumes: 406 | - configMap: 407 | items: 408 | - key: sampling 409 | path: sampling.json 410 | name: jaeger-sampling-configuration 411 | name: jaeger-sampling-configuration-volume 412 | -------------------------------------------------------------------------------- /backend/03-collector.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: opentelemetry.io/v1alpha1 2 | kind: OpenTelemetryCollector 3 | metadata: 4 | name: otel 5 | namespace: observability-backend 6 | spec: 7 | image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.94.0 8 | mode: deployment 9 | replicas: 1 10 | ports: 11 | - port: 8888 12 | protocol: TCP 13 | name: metrics 14 | config: | 15 | receivers: 16 | otlp: 17 | protocols: 18 | grpc: 19 | endpoint: 0.0.0.0:4317 20 | http: 21 | endpoint: 0.0.0.0:4318 22 | 23 | processors: 24 | batch: 25 | 26 | exporters: 27 | otlp/traces: 28 | endpoint: jaeger-collector:4317 29 | tls: 30 | insecure: true 31 | 32 | otlphttp/metrics: 33 | endpoint: http://prometheus.observability-backend.svc.cluster.local:80/api/v1/otlp/ 34 | tls: 35 | insecure: true 36 | 37 | debug: 38 | verbosity: detailed 39 | 40 | service: 41 | pipelines: 42 | traces: 43 | receivers: [otlp] 44 | processors: [batch] 45 | exporters: [otlp/traces] 46 | metrics: 47 | receivers: [otlp] 48 | exporters: [otlphttp/metrics] 49 | logs: 50 | receivers: [otlp] 51 | exporters: [debug] 52 | -------------------------------------------------------------------------------- /backend/05-collector-1.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: opentelemetry.io/v1alpha1 2 | kind: OpenTelemetryCollector 3 | metadata: 4 | name: otel 5 | namespace: observability-backend 6 | spec: 7 | image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.94.0 8 | mode: deployment 9 | replicas: 1 10 | ports: 11 | - port: 8888 12 | protocol: TCP 13 | name: metrics 14 | config: | 15 | receivers: 16 | otlp: 17 | protocols: 18 | grpc: 19 | endpoint: 0.0.0.0:4317 20 | http: 21 | endpoint: 0.0.0.0:4318 22 | 23 | processors: 24 | tail_sampling: 25 | decision_wait: 10s # time to wait before making a sampling decision 26 | num_traces: 100 # number of traces to be kept in memory 27 | expected_new_traces_per_sec: 10 # expected rate of new traces per second 28 | policies: 29 | - name: keep-errors 30 | type: status_code 31 | status_code: {status_codes: [ERROR]} 32 | - name: keep-slow-traces 33 | type: latency 34 | latency: {threshold_ms: 500} 35 | - name: randomized-policy 36 | type: probabilistic 37 | probabilistic: {sampling_percentage: 10} 38 | 39 | exporters: 40 | otlp/traces: 41 | endpoint: jaeger-collector:4317 42 | tls: 43 | insecure: true 44 | 45 | otlphttp/metrics: 46 | endpoint: http://prometheus.observability-backend.svc.cluster.local:80/api/v1/otlp/ 47 | tls: 48 | insecure: true 49 | 50 | debug: 51 | verbosity: detailed 52 | 53 | service: 54 | pipelines: 55 | traces: 56 | receivers: [otlp] 57 | processors: [tail_sampling] 58 | exporters: [otlp/traces] 59 | metrics: 60 | receivers: [otlp] 61 | exporters: [otlphttp/metrics] 62 | logs: 63 | receivers: [otlp] 64 | exporters: [debug] 65 | -------------------------------------------------------------------------------- /backend/05-collector-2.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: opentelemetry.io/v1alpha1 2 | kind: OpenTelemetryCollector 3 | metadata: 4 | name: otel 5 | namespace: observability-backend 6 | spec: 7 | image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.94.0 8 | mode: deployment 9 | replicas: 1 10 | ports: 11 | - port: 8888 12 | protocol: TCP 13 | name: metrics 14 | config: | 15 | receivers: 16 | otlp: 17 | protocols: 18 | grpc: 19 | endpoint: 0.0.0.0:4317 20 | http: 21 | endpoint: 0.0.0.0:4318 22 | 23 | processors: 24 | batch: 25 | 26 | exporters: 27 | debug: 28 | verbosity: detailed 29 | loadbalancing: 30 | routing_key: "traceID" 31 | protocol: 32 | otlp: 33 | timeout: 1s 34 | tls: 35 | insecure: true 36 | resolver: 37 | k8s: 38 | service: otel-gateway.observability-backend 39 | ports: 40 | - 4317 41 | 42 | otlphttp/metrics: 43 | endpoint: http://prometheus.observability-backend.svc.cluster.local:80/api/v1/otlp/ 44 | tls: 45 | insecure: true 46 | 47 | debug: 48 | verbosity: detailed 49 | 50 | service: 51 | pipelines: 52 | traces: 53 | receivers: [otlp] 54 | processors: [batch] 55 | exporters: [loadbalancing] 56 | metrics: 57 | receivers: [otlp] 58 | exporters: [otlphttp/metrics] 59 | logs: 60 | receivers: [otlp] 61 | exporters: [debug] 62 | --- 63 | apiVersion: opentelemetry.io/v1alpha1 64 | kind: OpenTelemetryCollector 65 | metadata: 66 | name: otel-gateway 67 | namespace: observability-backend 68 | spec: 69 | image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.94.0 70 | mode: statefulset 71 | replicas: 2 72 | ports: 73 | - port: 8888 74 | protocol: TCP 75 | name: metrics 76 | config: | 77 | receivers: 78 | otlp: 79 | protocols: 80 | grpc: 81 | endpoint: 0.0.0.0:4317 82 | http: 83 | endpoint: 0.0.0.0:4318 84 | 85 | processors: 86 | tail_sampling: 87 | decision_wait: 10s # time to wait before making a sampling decision is made 88 | num_traces: 100 # number of traces to be kept in memory 89 | expected_new_traces_per_sec: 10 # expected rate of new traces per second 90 | policies: 91 | - name: keep-errors 92 | type: status_code 93 | status_code: {status_codes: [ERROR]} 94 | - name: keep-slow-traces 95 | type: latency 96 | latency: {threshold_ms: 500} 97 | - name: randomized-policy 98 | type: probabilistic 99 | probabilistic: {sampling_percentage: 10} 100 | 101 | exporters: 102 | otlp/traces: 103 | endpoint: jaeger-collector:4317 104 | tls: 105 | insecure: true 106 | 107 | otlphttp/metrics: 108 | endpoint: http://prometheus.observability-backend.svc.cluster.local:80/api/v1/otlp/ 109 | tls: 110 | insecure: true 111 | 112 | debug: 113 | verbosity: detailed 114 | 115 | service: 116 | pipelines: 117 | traces: 118 | receivers: [otlp] 119 | processors: [tail_sampling] 120 | exporters: [otlp/traces] 121 | metrics: 122 | receivers: [otlp] 123 | exporters: [otlphttp/metrics] 124 | logs: 125 | receivers: [otlp] 126 | exporters: [debug] 127 | --- 128 | -------------------------------------------------------------------------------- /backend/06-backend.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: observability-backend 5 | --- 6 | apiVersion: v1 7 | data: 8 | prometheus.yml: |- 9 | global: 10 | scrape_interval: 15s 11 | evaluation_interval: 15s 12 | alerting: 13 | alertmanagers: 14 | - static_configs: 15 | - targets: 16 | # - alertmanager:9093 17 | rule_files: 18 | # - "first_rules.yml" 19 | # - "second_rules.yml" 20 | scrape_configs: 21 | - job_name: "prometheus" 22 | static_configs: 23 | - targets: ["localhost:9090"] 24 | kind: ConfigMap 25 | metadata: 26 | labels: 27 | name: prometheus 28 | name: prometheus-conf 29 | namespace: observability-backend 30 | --- 31 | apiVersion: apps/v1 32 | kind: Deployment 33 | metadata: 34 | labels: 35 | name: prometheus 36 | name: prometheus 37 | namespace: observability-backend 38 | spec: 39 | replicas: 1 40 | selector: 41 | matchLabels: 42 | app: prometheus 43 | name: prometheus 44 | template: 45 | metadata: 46 | labels: 47 | app: prometheus 48 | name: prometheus 49 | spec: 50 | containers: 51 | - args: 52 | - --config.file=/etc/prometheus/prometheus.yml 53 | - --storage.tsdb.path=/tmp/prometheus/metrics 54 | - --web.enable-remote-write-receiver 55 | - --enable-feature=otlp-write-receiver 56 | - --enable-feature=exemplar-storage 57 | image: quay.io/prometheus/prometheus:v2.47.2 58 | imagePullPolicy: IfNotPresent 59 | name: prometheus 60 | ports: 61 | - containerPort: 9090 62 | volumeMounts: 63 | - mountPath: /etc/prometheus/ 64 | name: prometheus-config-volume 65 | - mountPath: /prometheus/ 66 | name: prometheus-storage-volume 67 | volumes: 68 | - configMap: 69 | defaultMode: 420 70 | name: prometheus-conf 71 | name: prometheus-config-volume 72 | - name: prometheus-storage-volume 73 | --- 74 | apiVersion: v1 75 | kind: Service 76 | metadata: 77 | labels: 78 | name: prometheus 79 | name: prometheus 80 | namespace: observability-backend 81 | spec: 82 | ports: 83 | - name: prometheus-http-metrics 84 | port: 80 85 | targetPort: 9090 86 | selector: 87 | app: prometheus 88 | name: prometheus 89 | --- 90 | --- 91 | apiVersion: v1 92 | kind: ServiceAccount 93 | metadata: 94 | creationTimestamp: null 95 | labels: 96 | app: jaeger 97 | app.kubernetes.io/component: service-account 98 | app.kubernetes.io/instance: jaeger 99 | app.kubernetes.io/managed-by: jaeger-operator 100 | app.kubernetes.io/name: jaeger 101 | app.kubernetes.io/part-of: jaeger 102 | name: jaeger 103 | namespace: observability-backend 104 | --- 105 | apiVersion: v1 106 | data: 107 | sampling: '{"default_strategy":{"param":1,"type":"probabilistic"}}' 108 | kind: ConfigMap 109 | metadata: 110 | creationTimestamp: null 111 | labels: 112 | app: jaeger 113 | app.kubernetes.io/component: sampling-configuration 114 | app.kubernetes.io/instance: jaeger 115 | app.kubernetes.io/managed-by: jaeger-operator 116 | app.kubernetes.io/name: jaeger-sampling-configuration 117 | app.kubernetes.io/part-of: jaeger 118 | name: jaeger-sampling-configuration 119 | namespace: observability-backend 120 | --- 121 | apiVersion: v1 122 | kind: Service 123 | metadata: 124 | annotations: 125 | prometheus.io/scrape: "false" 126 | service.beta.openshift.io/serving-cert-secret-name: jaeger-collector-headless-tls 127 | creationTimestamp: null 128 | labels: 129 | app: jaeger 130 | app.kubernetes.io/component: service-collector 131 | app.kubernetes.io/instance: jaeger 132 | app.kubernetes.io/managed-by: jaeger-operator 133 | app.kubernetes.io/name: jaeger-collector 134 | app.kubernetes.io/part-of: jaeger 135 | name: jaeger-collector-headless 136 | namespace: observability-backend 137 | spec: 138 | clusterIP: None 139 | ports: 140 | - name: http-zipkin 141 | port: 9411 142 | targetPort: 0 143 | - name: grpc-jaeger 144 | port: 14250 145 | targetPort: 0 146 | - name: http-c-tchan-trft 147 | port: 14267 148 | targetPort: 0 149 | - name: http-c-binary-trft 150 | port: 14268 151 | targetPort: 0 152 | - name: admin-http 153 | port: 14269 154 | targetPort: 0 155 | - name: grpc-otlp 156 | port: 4317 157 | targetPort: 0 158 | - name: http-otlp 159 | port: 4318 160 | targetPort: 0 161 | selector: 162 | app: jaeger 163 | app.kubernetes.io/component: all-in-one 164 | app.kubernetes.io/instance: jaeger 165 | app.kubernetes.io/managed-by: jaeger-operator 166 | app.kubernetes.io/name: jaeger 167 | app.kubernetes.io/part-of: jaeger 168 | status: 169 | loadBalancer: {} 170 | --- 171 | apiVersion: v1 172 | kind: Service 173 | metadata: 174 | creationTimestamp: null 175 | labels: 176 | app: jaeger 177 | app.kubernetes.io/component: service-collector 178 | app.kubernetes.io/instance: jaeger 179 | app.kubernetes.io/managed-by: jaeger-operator 180 | app.kubernetes.io/name: jaeger-collector 181 | app.kubernetes.io/part-of: jaeger 182 | name: jaeger-collector 183 | namespace: observability-backend 184 | spec: 185 | ports: 186 | - name: http-zipkin 187 | port: 9411 188 | targetPort: 0 189 | - name: grpc-jaeger 190 | port: 14250 191 | targetPort: 0 192 | - name: http-c-tchan-trft 193 | port: 14267 194 | targetPort: 0 195 | - name: http-c-binary-trft 196 | port: 14268 197 | targetPort: 0 198 | - name: admin-http 199 | port: 14269 200 | targetPort: 0 201 | - name: grpc-otlp 202 | port: 4317 203 | targetPort: 0 204 | - name: http-otlp 205 | port: 4318 206 | targetPort: 0 207 | selector: 208 | app: jaeger 209 | app.kubernetes.io/component: all-in-one 210 | app.kubernetes.io/instance: jaeger 211 | app.kubernetes.io/managed-by: jaeger-operator 212 | app.kubernetes.io/name: jaeger 213 | app.kubernetes.io/part-of: jaeger 214 | type: ClusterIP 215 | status: 216 | loadBalancer: {} 217 | --- 218 | apiVersion: v1 219 | kind: Service 220 | metadata: 221 | creationTimestamp: null 222 | labels: 223 | app: jaeger 224 | app.kubernetes.io/component: service-query 225 | app.kubernetes.io/instance: jaeger 226 | app.kubernetes.io/managed-by: jaeger-operator 227 | app.kubernetes.io/name: jaeger-query 228 | app.kubernetes.io/part-of: jaeger 229 | name: jaeger-query 230 | namespace: observability-backend 231 | spec: 232 | ports: 233 | - name: http-query 234 | port: 16686 235 | targetPort: 16686 236 | - name: grpc-query 237 | port: 16685 238 | targetPort: 16685 239 | - name: admin-http 240 | port: 16687 241 | targetPort: 16687 242 | selector: 243 | app: jaeger 244 | app.kubernetes.io/component: all-in-one 245 | app.kubernetes.io/instance: jaeger 246 | app.kubernetes.io/managed-by: jaeger-operator 247 | app.kubernetes.io/name: jaeger 248 | app.kubernetes.io/part-of: jaeger 249 | type: ClusterIP 250 | status: 251 | loadBalancer: {} 252 | --- 253 | apiVersion: v1 254 | kind: Service 255 | metadata: 256 | creationTimestamp: null 257 | labels: 258 | app: jaeger 259 | app.kubernetes.io/component: service-agent 260 | app.kubernetes.io/instance: jaeger 261 | app.kubernetes.io/managed-by: jaeger-operator 262 | app.kubernetes.io/name: jaeger-agent 263 | app.kubernetes.io/part-of: jaeger 264 | name: jaeger-agent 265 | namespace: observability-backend 266 | spec: 267 | clusterIP: None 268 | ports: 269 | - name: zk-compact-trft 270 | port: 5775 271 | protocol: UDP 272 | targetPort: 0 273 | - name: http-config-rest 274 | port: 5778 275 | targetPort: 0 276 | - name: jg-compact-trft 277 | port: 6831 278 | protocol: UDP 279 | targetPort: 0 280 | - name: jg-binary-trft 281 | port: 6832 282 | protocol: UDP 283 | targetPort: 0 284 | - name: admin-http 285 | port: 14271 286 | targetPort: 0 287 | selector: 288 | app: jaeger 289 | app.kubernetes.io/component: all-in-one 290 | app.kubernetes.io/instance: jaeger 291 | app.kubernetes.io/managed-by: jaeger-operator 292 | app.kubernetes.io/name: jaeger 293 | app.kubernetes.io/part-of: jaeger 294 | status: 295 | loadBalancer: {} 296 | --- 297 | apiVersion: apps/v1 298 | kind: Deployment 299 | metadata: 300 | annotations: 301 | linkerd.io/inject: disabled 302 | prometheus.io/port: "14269" 303 | prometheus.io/scrape: "true" 304 | creationTimestamp: null 305 | labels: 306 | app: jaeger 307 | app.kubernetes.io/component: all-in-one 308 | app.kubernetes.io/instance: jaeger 309 | app.kubernetes.io/managed-by: jaeger-operator 310 | app.kubernetes.io/name: jaeger 311 | app.kubernetes.io/part-of: jaeger 312 | name: jaeger 313 | namespace: observability-backend 314 | spec: 315 | selector: 316 | matchLabels: 317 | app: jaeger 318 | app.kubernetes.io/component: all-in-one 319 | app.kubernetes.io/instance: jaeger 320 | app.kubernetes.io/managed-by: jaeger-operator 321 | app.kubernetes.io/name: jaeger 322 | app.kubernetes.io/part-of: jaeger 323 | strategy: 324 | type: Recreate 325 | template: 326 | metadata: 327 | annotations: 328 | linkerd.io/inject: disabled 329 | prometheus.io/port: "14269" 330 | prometheus.io/scrape: "true" 331 | sidecar.istio.io/inject: "false" 332 | creationTimestamp: null 333 | labels: 334 | app: jaeger 335 | app.kubernetes.io/component: all-in-one 336 | app.kubernetes.io/instance: jaeger 337 | app.kubernetes.io/managed-by: jaeger-operator 338 | app.kubernetes.io/name: jaeger 339 | app.kubernetes.io/part-of: jaeger 340 | spec: 341 | containers: 342 | - args: 343 | - --sampling.strategies-file=/etc/jaeger/sampling/sampling.json 344 | env: 345 | - name: SPAN_STORAGE_TYPE 346 | value: memory 347 | - name: COLLECTOR_ZIPKIN_HOST_PORT 348 | value: :9411 349 | - name: JAEGER_DISABLED 350 | value: "false" 351 | - name: COLLECTOR_OTLP_ENABLED 352 | value: "true" 353 | - name: METRICS_STORAGE_TYPE 354 | value: "prometheus" 355 | - name: PROMETHEUS_SERVER_URL 356 | value: "http://prometheus.observability-backend" 357 | - name: PROMETHEUS_QUERY_SUPPORT_SPANMETRICS_CONNECTOR 358 | value: "true" 359 | - name: PROMETHEUS_QUERY_NORMALIZE_CALLS 360 | value: "true" 361 | - name: PROMETHEUS_QUERY_NORMALIZE_DURATION 362 | value: "true" 363 | image: jaegertracing/all-in-one:1.54.0 364 | livenessProbe: 365 | failureThreshold: 5 366 | httpGet: 367 | path: / 368 | port: 14269 369 | initialDelaySeconds: 5 370 | periodSeconds: 15 371 | name: jaeger 372 | ports: 373 | - containerPort: 5775 374 | name: zk-compact-trft 375 | protocol: UDP 376 | - containerPort: 5778 377 | name: config-rest 378 | - containerPort: 6831 379 | name: jg-compact-trft 380 | protocol: UDP 381 | - containerPort: 6832 382 | name: jg-binary-trft 383 | protocol: UDP 384 | - containerPort: 9411 385 | name: zipkin 386 | - containerPort: 14267 387 | name: c-tchan-trft 388 | - containerPort: 14268 389 | name: c-binary-trft 390 | - containerPort: 16685 391 | name: grpc-query 392 | - containerPort: 16686 393 | name: query 394 | - containerPort: 14269 395 | name: admin-http 396 | - containerPort: 14250 397 | name: grpc 398 | - containerPort: 4317 399 | name: grpc-otlp 400 | - containerPort: 4318 401 | name: http-otlp 402 | readinessProbe: 403 | httpGet: 404 | path: / 405 | port: 14269 406 | initialDelaySeconds: 1 407 | resources: {} 408 | volumeMounts: 409 | - mountPath: /etc/jaeger/sampling 410 | name: jaeger-sampling-configuration-volume 411 | readOnly: true 412 | enableServiceLinks: false 413 | serviceAccountName: jaeger 414 | volumes: 415 | - configMap: 416 | items: 417 | - key: sampling 418 | path: sampling.json 419 | name: jaeger-sampling-configuration 420 | name: jaeger-sampling-configuration-volume 421 | -------------------------------------------------------------------------------- /backend/06-collector.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: opentelemetry.io/v1alpha1 2 | kind: OpenTelemetryCollector 3 | metadata: 4 | name: otel 5 | namespace: observability-backend 6 | spec: 7 | image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.94.0 8 | mode: deployment 9 | replicas: 1 10 | ports: 11 | - port: 8888 12 | protocol: TCP 13 | name: metrics 14 | config: | 15 | receivers: 16 | otlp: 17 | protocols: 18 | grpc: 19 | endpoint: 0.0.0.0:4317 20 | http: 21 | endpoint: 0.0.0.0:4318 22 | 23 | processors: 24 | tail_sampling: 25 | decision_wait: 10s # time to wait before making a sampling decision 26 | num_traces: 100 # number of traces to be kept in memory 27 | expected_new_traces_per_sec: 10 # expected rate of new traces per second 28 | policies: 29 | - name: keep-errors 30 | type: status_code 31 | status_code: {status_codes: [ERROR]} 32 | - name: keep-slow-traces 33 | type: latency 34 | latency: {threshold_ms: 500} 35 | - name: randomized-policy 36 | type: probabilistic 37 | probabilistic: {sampling_percentage: 10} 38 | 39 | exporters: 40 | otlp/traces: 41 | endpoint: jaeger-collector:4317 42 | tls: 43 | insecure: true 44 | 45 | otlphttp/metrics: 46 | endpoint: http://prometheus.observability-backend.svc.cluster.local:80/api/v1/otlp/ 47 | tls: 48 | insecure: true 49 | 50 | debug: 51 | verbosity: detailed 52 | 53 | connectors: 54 | spanmetrics: 55 | 56 | service: 57 | pipelines: 58 | traces: 59 | receivers: [otlp] 60 | processors: [tail_sampling] 61 | exporters: [otlp/traces] 62 | traces/spanmetrics: 63 | receivers: [otlp] 64 | exporters: [spanmetrics] 65 | metrics: 66 | receivers: [otlp,spanmetrics] 67 | exporters: [otlphttp/metrics] 68 | logs: 69 | receivers: [otlp] 70 | exporters: [debug] 71 | -------------------------------------------------------------------------------- /backend/07-collector.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: opentelemetry.io/v1alpha1 2 | kind: OpenTelemetryCollector 3 | metadata: 4 | name: otel 5 | namespace: observability-backend 6 | spec: 7 | image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.94.0 8 | mode: deployment 9 | replicas: 1 10 | ports: 11 | - port: 8888 12 | protocol: TCP 13 | name: metrics 14 | config: | 15 | receivers: 16 | otlp: 17 | protocols: 18 | grpc: 19 | endpoint: 0.0.0.0:4317 20 | http: 21 | endpoint: 0.0.0.0:4318 22 | 23 | processors: 24 | batch: 25 | transform: 26 | error_mode: ignore 27 | trace_statements: 28 | - context: span 29 | statements: 30 | - set(attributes["app.player1"], Substring(attributes["app.player1"], 0, 1)) where attributes["app.player1"] != "" 31 | - set(attributes["app.player2"], Substring(attributes["app.player2"], 0, 1)) where attributes["app.player2"] != "" 32 | - replace_all_patterns(attributes, "value", "player1=[a-zA-Z_]*", "player1={playerName}") 33 | - replace_all_patterns(attributes, "value", "player2=[a-zA-Z_]*", "player2={playerName}") 34 | 35 | exporters: 36 | otlp/traces: 37 | endpoint: jaeger-collector:4317 38 | tls: 39 | insecure: true 40 | 41 | otlphttp/metrics: 42 | endpoint: http://prometheus.observability-backend.svc.cluster.local:80/api/v1/otlp/ 43 | tls: 44 | insecure: true 45 | 46 | debug: 47 | verbosity: detailed 48 | 49 | service: 50 | pipelines: 51 | traces: 52 | receivers: [otlp] 53 | processors: [transform, batch] 54 | exporters: [otlp/traces] 55 | metrics: 56 | receivers: [otlp] 57 | exporters: [otlphttp/metrics] 58 | logs: 59 | receivers: [otlp] 60 | exporters: [debug] 61 | -------------------------------------------------------------------------------- /images/api-server.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/api-server.png -------------------------------------------------------------------------------- /images/jaeger-capture-custom-headers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/jaeger-capture-custom-headers.jpg -------------------------------------------------------------------------------- /images/jaeger-spm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/jaeger-spm.png -------------------------------------------------------------------------------- /images/jaeger-tail-sampling.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/jaeger-tail-sampling.jpg -------------------------------------------------------------------------------- /images/jaeger-trace-detail.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/jaeger-trace-detail.jpg -------------------------------------------------------------------------------- /images/jaeger-trace-search.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/jaeger-trace-search.jpg -------------------------------------------------------------------------------- /images/jaeger-with-span.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/jaeger-with-span.jpg -------------------------------------------------------------------------------- /images/otel-collector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/otel-collector.png -------------------------------------------------------------------------------- /images/prometheus_javaagent_metrics_list.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/prometheus_javaagent_metrics_list.jpg -------------------------------------------------------------------------------- /images/prometheus_javaagent_red_metrics.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/prometheus_javaagent_red_metrics.jpg -------------------------------------------------------------------------------- /images/prometheus_spanmetrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/prometheus_spanmetrics.png -------------------------------------------------------------------------------- /images/rolldice-delay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/rolldice-delay.png -------------------------------------------------------------------------------- /images/rolldice-error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/rolldice-error.png -------------------------------------------------------------------------------- /images/sampling-comparision.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/sampling-comparision.jpg -------------------------------------------------------------------------------- /images/scaling-otel-collector.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/scaling-otel-collector.jpg -------------------------------------------------------------------------------- /images/terminated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/terminated.png -------------------------------------------------------------------------------- /images/terminating.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/terminating.png -------------------------------------------------------------------------------- /images/tracing-setup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/images/tracing-setup.png -------------------------------------------------------------------------------- /intro-slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/intro-slides.pdf -------------------------------------------------------------------------------- /kind-1.29.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: Cluster 3 | apiVersion: kind.x-k8s.io/v1alpha4 4 | featureGates: 5 | APIServerTracing: true 6 | nodes: 7 | - role: control-plane 8 | image: kindest/node:v1.29.1@sha256:a0cc28af37cf39b019e2b448c54d1a3f789de32536cb5a5db61a49623e527144 9 | extraMounts: 10 | - hostPath: ./app/api-server/ 11 | containerPath: /api-server 12 | readOnly: true 13 | selinuxRelabel: true 14 | kubeadmConfigPatches: 15 | - | 16 | kind: KubeletConfiguration 17 | featureGates: 18 | KubeletTracing: true 19 | tracing: 20 | samplingRatePerMillion: 1000000 21 | endpoint: "127.0.0.1:4317" 22 | - | 23 | kind: ClusterConfiguration 24 | etcd: 25 | local: 26 | # NOTE: https://github.com/etcd-io/etcd/pull/16951 27 | imageRepository: "quay.io/coreos" 28 | imageTag: "v3.5.11" 29 | extraArgs: 30 | experimental-enable-distributed-tracing: "true" 31 | # NOTE: Default: localhost:4317, we avoid IPv6 32 | experimental-distributed-tracing-address: 127.0.0.1:4317 33 | experimental-distributed-tracing-service-name: "etcd" 34 | experimental-distributed-tracing-instance-id: "caf201fd-8d5b-467b-a70f-09ad3beb5a21" 35 | # NOTE: Only availabile in etcd 3.5.11 or higher 36 | experimental-distributed-tracing-sampling-rate: "1000000" 37 | apiServer: 38 | extraArgs: 39 | tracing-config-file: "/api-server/tracing-config.yaml" 40 | extraVolumes: 41 | - name: tracing-config 42 | hostPath: /api-server/tracing-config.yaml 43 | mountPath: /api-server/tracing-config.yaml 44 | readOnly: true 45 | pathType: "File" 46 | -------------------------------------------------------------------------------- /tracing-theory.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/e0d57e9fdb0dfd3b0d95eaa19358b2a959862c08/tracing-theory.pdf --------------------------------------------------------------------------------