├── .gitignore ├── README.md ├── init.sh └── sessions ├── 001 ├── README.md └── install.yaml ├── 002 ├── README.md └── install │ ├── 010-metrics-server.yaml │ ├── 020-prometheus-operator.yaml │ ├── 021-prometheus-server.yaml │ ├── 022-prometheus-strimzi.yaml │ ├── 030-grafana-operator.yaml │ ├── 031-grafana-server.yaml │ ├── 032-grafana-strimzi.yaml │ ├── 040-kube-state-metrics.yaml │ └── 050-node-exporter.yaml ├── 003 └── README.md ├── 004 ├── README.md └── install.yaml ├── 005 ├── README.md └── install │ ├── apicurio.yaml │ ├── application.yaml │ ├── greeting.avsc │ └── registry.yaml ├── 006 ├── README.md └── install │ ├── connect.yaml │ └── mysql.yaml ├── 007 ├── README.md └── install │ ├── mm2.yaml │ └── target.yaml ├── 008 └── README.md ├── 009 └── README.md └── 010 ├── README.md └── install.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | .settings/ 2 | .idea/ 3 | .vscode/ 4 | target/ 5 | .project 6 | .classpath 7 | .factorypath 8 | .DS_Store 9 | *.iml 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Strimzi debugging 2 | 3 | This project contains a series of debugging sessions for Strimzi. 4 | You can use Minikube to run most of the examples. 5 | 6 | 1. [Deploy a Kafka cluster](/sessions/001) 7 | 2. [Monitor Kafka metrics](/sessions/002) 8 | 3. [Get diagnostic data](/sessions/003) 9 | 4. [Configure TLS authentication](/sessions/004) 10 | 5. [Use Kafka with Apicurio Registry](/sessions/005) 11 | 6. [Use Kafka Connect with Debezium](/sessions/006) 12 | 7. [Use Mirror Maker 2 for disaster recovery](/sessions/007) 13 | 8. [Recover broker volumes](/sessions/008) 14 | 9. [Rebalance with Cruise Control](/sessions/009) 15 | 10. [Run transactional applications](/sessions/010) 16 | -------------------------------------------------------------------------------- /init.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | NAMESPACE="test" && export NAMESPACE 4 | STRIMZI_VERSION="0.46.0" && export STRIMZI_VERSION 5 | 6 | [[ "${BASH_SOURCE[0]}" -ef "$0" ]] && echo "Usage: source init.sh" && exit 1 7 | 8 | kafka-cp() { 9 | local id="${1-}" part="${2-50}" 10 | echo 'public void run(String id, int part) { System.out.println(abs(id.hashCode()) % part); } 11 | private int abs(int n) { return (n == Integer.MIN_VALUE) ? 0 : Math.abs(n); } 12 | run("'"$id"'", '"$part"');' | jshell - 13 | } 14 | 15 | kubectl-kafka() { 16 | kubectl get po kafka-tools &>/dev/null || kubectl run kafka-tools -q --restart="Never" \ 17 | --image="apache/kafka:latest" -- sh -c "trap : TERM INT; sleep infinity & wait" 18 | kubectl wait --for=condition=ready po kafka-tools &>/dev/null 19 | kubectl exec kafka-tools -itq -- sh -c "/opt/kafka/$*" 20 | } 21 | 22 | echo "Deploying Strimzi" 23 | 24 | # create test namespace 25 | kubectl config set-context --current --namespace="$NAMESPACE" &>/dev/null 26 | 27 | # delete any topic first to clean finalizers 28 | kubectl get kt -o yaml 2>/dev/null | yq 'del(.items[].metadata.finalizers[])' \ 29 | | kubectl apply -f - &>/dev/null; kubectl delete kt --all --force &>/dev/null 30 | 31 | kubectl delete ns "$NAMESPACE" --ignore-not-found --force --wait=false &>/dev/null 32 | kubectl wait --for=delete ns/"$NAMESPACE" --timeout=120s &>/dev/null && kubectl create ns "$NAMESPACE" 33 | 34 | # set privileged SecurityStandard label for this namespace 35 | kubectl label ns "$NAMESPACE" pod-security.kubernetes.io/enforce=privileged --overwrite &>/dev/null 36 | 37 | # clean PersistentVolumes 38 | # shellcheck disable=SC2046 39 | kubectl delete pv $(kubectl get pv 2>/dev/null | grep "my-cluster" | awk '{print $1}') --force &>/dev/null 40 | 41 | # clean monitoring stack 42 | kubectl delete ns grafana prometheus --force --wait=false &>/dev/null 43 | kubectl delete crd $(kubectl get crd 2>/dev/null | grep integreatly.org | awk '{print $1}') &>/dev/null 44 | kubectl delete crd $(kubectl get crd 2>/dev/null | grep monitoring.coreos.com | awk '{print $1}') &>/dev/null 45 | 46 | # deploy Strimzi 47 | STRIMZI_FILE="/tmp/strimzi-$STRIMZI_VERSION.yaml" 48 | if [[ ! -f "$STRIMZI_FILE" ]]; then 49 | echo "Downloading Strimzi to $STRIMZI_FILE" 50 | curl -sLk "https://github.com/strimzi/strimzi-kafka-operator/releases/download/$STRIMZI_VERSION/strimzi-cluster-operator-$STRIMZI_VERSION.yaml" -o "$STRIMZI_FILE" 51 | fi 52 | sed -E "s/namespace: .*/namespace: $NAMESPACE/g ; s/memory: .*/memory: 500Mi/g" "$STRIMZI_FILE" \ 53 | | kubectl create -f - --dry-run=client -o yaml | kubectl replace --force -f - &>/dev/null 54 | kubectl set env deploy/strimzi-cluster-operator STRIMZI_FULL_RECONCILIATION_INTERVAL_MS="30000" &>/dev/null 55 | 56 | kubectl wait --for=condition=Available deploy strimzi-cluster-operator --timeout=300s 57 | echo "Done" 58 | -------------------------------------------------------------------------------- /sessions/001/README.md: -------------------------------------------------------------------------------- 1 | ## Deploy a Kafka cluster 2 | 3 | In this example, we deploy a Kafka cluster to a Kubernetes cluster using the operator. 4 | Use the `init.sh` script to easily initialize or reset the test environment. 5 | 6 | > [!IMPORTANT] 7 | > Login first if your Kubernetes cluster requires authentication. 8 | 9 | ```sh 10 | $ source init.sh 11 | Deploying Strimzi 12 | namespace/test created 13 | Done 14 | ``` 15 | 16 | Then, we create a new Kafka cluster and test topic. 17 | In the YAML files, we can see how the desired cluster state is declared. 18 | 19 | In addition to Kafka pods, the Entity Operator (EO) pod is also deployed, which includes two namespaced operators: the Topic Operator (TO), and the User Operator (UO). 20 | These operators only support a single namespace and a single Kafka cluster. 21 | 22 | ```sh 23 | $ kubectl create -f sessions/001/install.yaml 24 | kafkanodepool.kafka.strimzi.io/controller created 25 | kafkanodepool.kafka.strimzi.io/broker created 26 | kafka.kafka.strimzi.io/my-cluster created 27 | kafkatopic.kafka.strimzi.io/my-topic created 28 | 29 | $ kubectl get sps,knp,k,kt,po 30 | NAME PODS READY PODS CURRENT PODS AGE 31 | strimzipodset.core.strimzi.io/my-cluster-broker 3 3 3 65s 32 | strimzipodset.core.strimzi.io/my-cluster-controller 3 3 3 65s 33 | 34 | NAME DESIRED REPLICAS ROLES NODEIDS 35 | kafkanodepool.kafka.strimzi.io/broker 3 ["broker"] [5,6,7] 36 | kafkanodepool.kafka.strimzi.io/controller 3 ["controller"] [0,1,2] 37 | 38 | NAME DESIRED KAFKA REPLICAS DESIRED ZK REPLICAS READY METADATA STATE WARNINGS 39 | kafka.kafka.strimzi.io/my-cluster 40 | 41 | NAME CLUSTER PARTITIONS REPLICATION FACTOR READY 42 | kafkatopic.kafka.strimzi.io/my-topic my-cluster 3 3 True 43 | 44 | NAME READY STATUS RESTARTS AGE 45 | pod/my-cluster-broker-10 1/1 Running 0 64s 46 | pod/my-cluster-broker-11 1/1 Running 0 64s 47 | pod/my-cluster-broker-12 1/1 Running 0 64s 48 | pod/my-cluster-controller-0 1/1 Running 0 63s 49 | pod/my-cluster-controller-1 1/1 Running 0 63s 50 | pod/my-cluster-controller-2 1/1 Running 0 63s 51 | pod/my-cluster-entity-operator-bb7c65dd4-9zdmk 2/2 Running 0 31s 52 | pod/strimzi-cluster-operator-6596f469c9-smsw2 1/1 Running 0 2m5s 53 | ``` 54 | 55 | When the Kafka cluster is ready, we send and receive some messages. 56 | When consuming messages, you can print additional data such as the partition number. 57 | Every consumer with the same `group.id` is part of the same consumer group. 58 | 59 | ```sh 60 | $ kubectl-kafka bin/kafka-console-producer.sh --bootstrap-server my-cluster-kafka-bootstrap:9092 --topic my-topic \ 61 | --property parse.key=true --property key.separator="#" 62 | >32947#hello 63 | >24910#kafka 64 | >45237#world 65 | >^C 66 | 67 | $ kubectl-kafka bin/kafka-console-consumer.sh --bootstrap-server my-cluster-kafka-bootstrap:9092 --topic my-topic \ 68 | --group my-group --from-beginning --max-messages 3 --property print.partition=true --property print.key=true 69 | Partition:0 24910 kafka 70 | Partition:2 32947 hello 71 | Partition:2 45237 world 72 | Processed a total of 3 messages 73 | ``` 74 | 75 | It works, but where our messages are being stored? 76 | The broker property `log.dirs` configures where our topic partitions are stored. 77 | We have 3 partitions, which corresponds to exactly 3 folders on disk. 78 | 79 | ```sh 80 | $ kubectl exec my-cluster-broker-10 -- cat /tmp/strimzi.properties | grep log.dirs 81 | log.dirs=/var/lib/kafka/data/kafka-log10 82 | 83 | $ kubectl exec my-cluster-broker-10 -- ls -lh /var/lib/kafka/data/kafka-log10 | grep my-topic 84 | drwxr-xr-x. 2 kafka root 167 Mar 23 13:18 my-topic-0 85 | drwxr-xr-x. 2 kafka root 167 Mar 23 13:15 my-topic-1 86 | drwxr-xr-x. 2 kafka root 167 Mar 23 13:18 my-topic-2 87 | ``` 88 | 89 | The consumer output shows that messages were sent to partition 0 and 2. 90 | Looking inside partition 0, we have a `.log` file containing our records (each segment is named after the initial offset), an `.index` file mapping the record offset to its position in the log and a `.timeindex` file mapping the record timestamp to its position in the log. 91 | The other two files contain additional metadata. 92 | 93 | ```sh 94 | $ kubectl exec my-cluster-broker-10 -- ls -lh /var/lib/kafka/data/kafka-log10/my-topic-0 95 | total 12K 96 | -rw-r--r--. 1 kafka root 10M Mar 23 13:15 00000000000000000000.index 97 | -rw-r--r--. 1 kafka root 78 Mar 23 13:18 00000000000000000000.log 98 | -rw-r--r--. 1 kafka root 10M Mar 23 13:15 00000000000000000000.timeindex 99 | -rw-r--r--. 1 kafka root 8 Mar 23 13:18 leader-epoch-checkpoint 100 | -rw-r--r--. 1 kafka root 43 Mar 23 13:15 partition.metadata 101 | ``` 102 | 103 | Partition log files are in binary format, but Kafka includes a dump tool for decoding them. 104 | On this partition, we have one batch (`baseOffset`), containing only one record (`| offset`) with key "24910" and payload "kafka". 105 | 106 | ```sh 107 | $ kubectl exec my-cluster-broker-10 -- bin/kafka-dump-log.sh --deep-iteration --print-data-log \ 108 | --files /var/lib/kafka/data/kafka-log10/my-topic-0/00000000000000000000.log 109 | Dumping /var/lib/kafka/data/kafka-log10/my-topic-0/00000000000000000000.log 110 | Log starting offset: 0 111 | baseOffset: 0 lastOffset: 0 count: 1 baseSequence: 0 lastSequence: 0 producerId: 0 producerEpoch: 0 partitionLeaderEpoch: 0 isTransactional: false isControl: false deleteHorizonMs: OptionalLong.empty position: 0 CreateTime: 1742735936663 size: 78 magic: 2 compresscodec: none crc: 825983240 isvalid: true 112 | | offset: 0 CreateTime: 1742735936663 keySize: 5 valueSize: 5 sequence: 0 headerKeys: [] key: 24910 payload: kafka 113 | ``` 114 | 115 | Our consumer group should have committed the offsets to the `__consumer_offsets` internal topic. 116 | The problem is that this topic has 50 partitions by default, so how do we know which partition was used? 117 | We can use the same algorithm that Kafka uses to map a `group.id` to a specific offset coordinating partition. 118 | The `kafka-cp` function is defined inside the `init.sh` script. 119 | 120 | ```sh 121 | $ kafka-cp my-group 122 | 12 123 | ``` 124 | 125 | We know that the consumer group commit record was sent to `__consumer_offsets-12`, so let's dump this partition too. 126 | Here values are encoded for performance reasons, so we have to pass the `--offsets-decoder` option. 127 | 128 | This partition contains other metadata, but we are specifically interested in the `offset_commit` key. 129 | We have a batch from our consumer group, which includes 3 records, one for each input topic partition. 130 | As expected, the consumer group committed offset1@partition0, offset2@partition2, and offset0@partition1 (this partition didn't received any message). 131 | 132 | ```sh 133 | $ kubectl exec my-cluster-broker-10 -- bin/kafka-dump-log.sh --deep-iteration --print-data-log --offsets-decoder \ 134 | --files /var/lib/kafka/data/kafka-log10/__consumer_offsets-12/00000000000000000000.log 135 | Dumping /var/lib/kafka/data/kafka-log10/__consumer_offsets-12/00000000000000000000.log 136 | Log starting offset: 0 137 | ... 138 | baseOffset: 1 lastOffset: 3 count: 3 baseSequence: 0 lastSequence: 2 producerId: -1 producerEpoch: -1 partitionLeaderEpoch: 0 isTransactional: false isControl: false deleteHorizonMs: OptionalLong.empty position: 344 CreateTime: 1742735956644 size: 232 magic: 2 compresscodec: none crc: 4034662502 isvalid: true 139 | | offset: 1 CreateTime: 1742735956644 keySize: 26 valueSize: 24 sequence: 0 headerKeys: [] key: {"type":"1","data":{"group":"my-group","topic":"my-topic","partition":0}} payload: {"version":"3","data":{"offset":1,"leaderEpoch":0,"metadata":"","commitTimestamp":1742735956641}} 140 | | offset: 2 CreateTime: 1742735956644 keySize: 26 valueSize: 24 sequence: 1 headerKeys: [] key: {"type":"1","data":{"group":"my-group","topic":"my-topic","partition":1}} payload: {"version":"3","data":{"offset":0,"leaderEpoch":-1,"metadata":"","commitTimestamp":1742735956641}} 141 | | offset: 3 CreateTime: 1742735956644 keySize: 26 valueSize: 24 sequence: 2 headerKeys: [] key: {"type":"1","data":{"group":"my-group","topic":"my-topic","partition":2}} payload: {"version":"3","data":{"offset":2,"leaderEpoch":0,"metadata":"","commitTimestamp":1742735956641}} 142 | ... 143 | ``` 144 | -------------------------------------------------------------------------------- /sessions/001/install.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kafka.strimzi.io/v1beta2 2 | kind: KafkaNodePool 3 | metadata: 4 | name: controller 5 | labels: 6 | # must match the cluster name 7 | strimzi.io/cluster: my-cluster 8 | annotations: 9 | strimzi.io/next-node-ids: "[0-9]" 10 | spec: 11 | replicas: 3 12 | roles: 13 | - controller 14 | resources: 15 | # set requests==limits to have Guaranteed QoS 16 | limits: 17 | cpu: 1000m 18 | memory: 1Gi 19 | requests: 20 | cpu: 500m 21 | memory: 1Gi 22 | storage: 23 | size: 5Gi 24 | type: persistent-claim 25 | deleteClaim: false 26 | --- 27 | apiVersion: kafka.strimzi.io/v1beta2 28 | kind: KafkaNodePool 29 | metadata: 30 | name: broker 31 | labels: 32 | # must match the cluster name 33 | strimzi.io/cluster: my-cluster 34 | annotations: 35 | strimzi.io/next-node-ids: "[10-100]" 36 | spec: 37 | replicas: 3 38 | roles: 39 | - broker 40 | resources: 41 | # set requests==limits to have Guaranteed QoS 42 | limits: 43 | cpu: 1000m 44 | memory: 2Gi 45 | requests: 46 | cpu: 500m 47 | memory: 2Gi 48 | storage: 49 | size: 10Gi 50 | type: persistent-claim 51 | deleteClaim: false 52 | --- 53 | apiVersion: kafka.strimzi.io/v1beta2 54 | kind: Kafka 55 | metadata: 56 | name: my-cluster 57 | annotations: 58 | strimzi.io/node-pools: enabled 59 | strimzi.io/kraft: enabled 60 | spec: 61 | kafka: 62 | #version: x.y.z 63 | #metadataVersion: x.y-IVx 64 | config: 65 | num.partitions: 3 66 | default.replication.factor: 3 67 | min.insync.replicas: 2 68 | offsets.topic.replication.factor: 3 69 | transaction.state.log.replication.factor: 3 70 | transaction.state.log.min.isr: 2 71 | listeners: 72 | - name: plain 73 | port: 9092 74 | type: internal 75 | tls: false 76 | - name: tls 77 | port: 9093 78 | type: internal 79 | tls: true 80 | logging: 81 | type: inline 82 | loggers: 83 | rootLogger.level: INFO 84 | logger.kafka.request.logger.level: INFO 85 | logger.kafkatc.name: kafka.coordinator.transaction 86 | logger.kafkatc.level: INFO 87 | logger.kafkalcm.name: kafka.log.LogCleanerManager 88 | logger.kafkalcm.level: INFO 89 | logger.strimzi.name: io.strimzi 90 | logger.strimzi.level: INFO 91 | metricsConfig: 92 | type: jmxPrometheusExporter 93 | valueFrom: 94 | configMapKeyRef: 95 | name: kafka-metrics 96 | key: kafka-metrics-config.yml 97 | entityOperator: 98 | topicOperator: 99 | logging: 100 | type: inline 101 | loggers: 102 | rootLogger.level: INFO 103 | logger.top.name: io.strimzi.operator.topic 104 | logger.top.level: INFO 105 | resources: 106 | limits: 107 | cpu: 500m 108 | memory: 512Mi 109 | requests: 110 | cpu: 500m 111 | memory: 256Mi 112 | userOperator: 113 | logging: 114 | type: inline 115 | loggers: 116 | rootLogger.level: INFO 117 | logger.uop.name: io.strimzi.operator.user 118 | logger.uop.level: INFO 119 | resources: 120 | limits: 121 | cpu: 500m 122 | memory: 512Mi 123 | requests: 124 | cpu: 500m 125 | memory: 256Mi 126 | --- 127 | apiVersion: kafka.strimzi.io/v1beta2 128 | kind: KafkaTopic 129 | metadata: 130 | name: my-topic 131 | labels: 132 | # must match the cluster name 133 | strimzi.io/cluster: my-cluster 134 | spec: 135 | partitions: 3 136 | replicas: 3 137 | config: 138 | min.insync.replicas: 2 139 | # 1 GiB to avoid running out of space with load tests 140 | retention.bytes: 1073741824 141 | --- 142 | kind: ConfigMap 143 | apiVersion: v1 144 | metadata: 145 | name: kafka-metrics 146 | labels: 147 | app: strimzi 148 | data: 149 | kafka-metrics-config.yml: | 150 | # See https://github.com/prometheus/jmx_exporter for more info about JMX Prometheus Exporter metrics 151 | lowercaseOutputName: true 152 | rules: 153 | # Special cases and very specific rules 154 | - pattern: kafka.server<>Value 155 | name: kafka_server_$1_$2 156 | type: GAUGE 157 | labels: 158 | clientId: "$3" 159 | topic: "$4" 160 | partition: "$5" 161 | - pattern: kafka.server<>Value 162 | name: kafka_server_$1_$2 163 | type: GAUGE 164 | labels: 165 | clientId: "$3" 166 | broker: "$4:$5" 167 | - pattern: kafka.server<>connections 168 | name: kafka_server_$1_connections_tls_info 169 | type: GAUGE 170 | labels: 171 | cipher: "$2" 172 | protocol: "$3" 173 | listener: "$4" 174 | networkProcessor: "$5" 175 | - pattern: kafka.server<>connections 176 | name: kafka_server_$1_connections_software 177 | type: GAUGE 178 | labels: 179 | clientSoftwareName: "$2" 180 | clientSoftwareVersion: "$3" 181 | listener: "$4" 182 | networkProcessor: "$5" 183 | - pattern: "kafka.server<>(.+-total):" 184 | name: kafka_server_$1_$4 185 | type: COUNTER 186 | labels: 187 | listener: "$2" 188 | networkProcessor: "$3" 189 | - pattern: "kafka.server<>(.+):" 190 | name: kafka_server_$1_$4 191 | type: GAUGE 192 | labels: 193 | listener: "$2" 194 | networkProcessor: "$3" 195 | - pattern: kafka.server<>(.+-total) 196 | name: kafka_server_$1_$4 197 | type: COUNTER 198 | labels: 199 | listener: "$2" 200 | networkProcessor: "$3" 201 | - pattern: kafka.server<>(.+) 202 | name: kafka_server_$1_$4 203 | type: GAUGE 204 | labels: 205 | listener: "$2" 206 | networkProcessor: "$3" 207 | # Some percent metrics use MeanRate attribute 208 | # Ex) kafka.server<>MeanRate 209 | - pattern: kafka.(\w+)<>MeanRate 210 | name: kafka_$1_$2_$3_percent 211 | type: GAUGE 212 | # Generic gauges for percents 213 | - pattern: kafka.(\w+)<>Value 214 | name: kafka_$1_$2_$3_percent 215 | type: GAUGE 216 | - pattern: kafka.(\w+)<>Value 217 | name: kafka_$1_$2_$3_percent 218 | type: GAUGE 219 | labels: 220 | "$4": "$5" 221 | # Generic per-second counters with 0-2 key/value pairs 222 | - pattern: kafka.(\w+)<>Count 223 | name: kafka_$1_$2_$3_total 224 | type: COUNTER 225 | labels: 226 | "$4": "$5" 227 | "$6": "$7" 228 | - pattern: kafka.(\w+)<>Count 229 | name: kafka_$1_$2_$3_total 230 | type: COUNTER 231 | labels: 232 | "$4": "$5" 233 | - pattern: kafka.(\w+)<>Count 234 | name: kafka_$1_$2_$3_total 235 | type: COUNTER 236 | # Generic gauges with 0-2 key/value pairs 237 | - pattern: kafka.(\w+)<>Value 238 | name: kafka_$1_$2_$3 239 | type: GAUGE 240 | labels: 241 | "$4": "$5" 242 | "$6": "$7" 243 | - pattern: kafka.(\w+)<>Value 244 | name: kafka_$1_$2_$3 245 | type: GAUGE 246 | labels: 247 | "$4": "$5" 248 | - pattern: kafka.(\w+)<>Value 249 | name: kafka_$1_$2_$3 250 | type: GAUGE 251 | # Emulate Prometheus 'Summary' metrics for the exported 'Histogram's. 252 | # Note that these are missing the '_sum' metric! 253 | - pattern: kafka.(\w+)<>Count 254 | name: kafka_$1_$2_$3_count 255 | type: COUNTER 256 | labels: 257 | "$4": "$5" 258 | "$6": "$7" 259 | - pattern: kafka.(\w+)<>(\d+)thPercentile 260 | name: kafka_$1_$2_$3 261 | type: GAUGE 262 | labels: 263 | "$4": "$5" 264 | "$6": "$7" 265 | quantile: "0.$8" 266 | - pattern: kafka.(\w+)<>Count 267 | name: kafka_$1_$2_$3_count 268 | type: COUNTER 269 | labels: 270 | "$4": "$5" 271 | - pattern: kafka.(\w+)<>(\d+)thPercentile 272 | name: kafka_$1_$2_$3 273 | type: GAUGE 274 | labels: 275 | "$4": "$5" 276 | quantile: "0.$6" 277 | - pattern: kafka.(\w+)<>Count 278 | name: kafka_$1_$2_$3_count 279 | type: COUNTER 280 | - pattern: kafka.(\w+)<>(\d+)thPercentile 281 | name: kafka_$1_$2_$3 282 | type: GAUGE 283 | labels: 284 | quantile: "0.$4" 285 | # KRaft overall related metrics 286 | # distinguish between always increasing COUNTER (total and max) and variable GAUGE (all others) metrics 287 | - pattern: "kafka.server<>(.+-total|.+-max):" 288 | name: kafka_server_raftmetrics_$1 289 | type: COUNTER 290 | - pattern: "kafka.server<>(current-state): (.+)" 291 | name: kafka_server_raftmetrics_$1 292 | value: 1 293 | type: UNTYPED 294 | labels: 295 | $1: "$2" 296 | - pattern: "kafka.server<>(.+):" 297 | name: kafka_server_raftmetrics_$1 298 | type: GAUGE 299 | # KRaft "low level" channels related metrics 300 | # distinguish between always increasing COUNTER (total and max) and variable GAUGE (all others) metrics 301 | - pattern: "kafka.server<>(.+-total|.+-max):" 302 | name: kafka_server_raftchannelmetrics_$1 303 | type: COUNTER 304 | - pattern: "kafka.server<>(.+):" 305 | name: kafka_server_raftchannelmetrics_$1 306 | type: GAUGE 307 | # Broker metrics related to fetching metadata topic records in KRaft mode 308 | - pattern: "kafka.server<>(.+):" 309 | name: kafka_server_brokermetadatametrics_$1 310 | type: GAUGE 311 | -------------------------------------------------------------------------------- /sessions/002/README.md: -------------------------------------------------------------------------------- 1 | ## Monitor Kafka metrics 2 | 3 | First, use [this session](/sessions/001) to deploy a Kafka cluster on Kubernetes. 4 | 5 | When the cluster is ready, install Prometheus, Grafana and Strimzi dashboards. 6 | Only the Cluster Operator and Kafka dashboards are included, but you can easily add the other components. 7 | 8 | ```sh 9 | 10 | $ for f in sessions/002/install/*.yaml; do 11 | echo ">>> Installing $f" 12 | envsubst < "$f" | kubectl apply -f - 13 | sleep 5 14 | done 15 | >>> Installing sessions/002/install/010-metrics-server.yaml 16 | serviceaccount/metrics-server unchanged 17 | clusterrole.rbac.authorization.k8s.io/system:aggregated-metrics-reader unchanged 18 | rolebinding.rbac.authorization.k8s.io/metrics-server-auth-reader unchanged 19 | clusterrolebinding.rbac.authorization.k8s.io/metrics-server:system:auth-delegator unchanged 20 | clusterrole.rbac.authorization.k8s.io/system:metrics-server unchanged 21 | clusterrolebinding.rbac.authorization.k8s.io/system:metrics-server unchanged 22 | service/metrics-server unchanged 23 | deployment.apps/metrics-server configured 24 | apiservice.apiregistration.k8s.io/v1beta1.metrics.k8s.io unchanged 25 | >>> Installing sessions/002/install/020-prometheus-operator.yaml 26 | namespace/prometheus created 27 | customresourcedefinition.apiextensions.k8s.io/alertmanagers.monitoring.coreos.com created 28 | customresourcedefinition.apiextensions.k8s.io/podmonitors.monitoring.coreos.com created 29 | customresourcedefinition.apiextensions.k8s.io/prometheuses.monitoring.coreos.com created 30 | customresourcedefinition.apiextensions.k8s.io/prometheusrules.monitoring.coreos.com created 31 | customresourcedefinition.apiextensions.k8s.io/servicemonitors.monitoring.coreos.com created 32 | customresourcedefinition.apiextensions.k8s.io/thanosrulers.monitoring.coreos.com created 33 | serviceaccount/prometheus-operator created 34 | clusterrole.rbac.authorization.k8s.io/prometheus-operator unchanged 35 | clusterrolebinding.rbac.authorization.k8s.io/prometheus-operator unchanged 36 | service/prometheus-operator created 37 | deployment.apps/prometheus-operator created 38 | >>> Installing sessions/002/install/021-prometheus-server.yaml 39 | namespace/prometheus unchanged 40 | serviceaccount/prometheus created 41 | clusterrole.rbac.authorization.k8s.io/prometheus unchanged 42 | clusterrolebinding.rbac.authorization.k8s.io/prometheus unchanged 43 | service/prometheus created 44 | ingress.networking.k8s.io/prometheus created 45 | prometheus.monitoring.coreos.com/prometheus created 46 | secret/additional-scrape-configs created 47 | alertmanager.monitoring.coreos.com/alertmanager created 48 | service/alertmanager created 49 | ingress.networking.k8s.io/alertmanager created 50 | secret/alertmanager-alertmanager created 51 | >>> Installing sessions/002/install/022-prometheus-strimzi.yaml 52 | podmonitor.monitoring.coreos.com/strimzi-cluster-operator-metrics-test created 53 | podmonitor.monitoring.coreos.com/strimzi-entity-operator-metrics-test created 54 | podmonitor.monitoring.coreos.com/strimzi-bridge-metrics-test created 55 | podmonitor.monitoring.coreos.com/strimzi-kafka-and-cruise-control-metrics-test created 56 | >>> Installing sessions/002/install/030-grafana-operator.yaml 57 | namespace/grafana created 58 | customresourcedefinition.apiextensions.k8s.io/grafanadashboards.integreatly.org created 59 | customresourcedefinition.apiextensions.k8s.io/grafanadatasources.integreatly.org created 60 | customresourcedefinition.apiextensions.k8s.io/grafananotificationchannels.integreatly.org created 61 | customresourcedefinition.apiextensions.k8s.io/grafanas.integreatly.org created 62 | serviceaccount/controller-manager created 63 | role.rbac.authorization.k8s.io/leader-election-role created 64 | clusterrole.rbac.authorization.k8s.io/manager-role configured 65 | clusterrole.rbac.authorization.k8s.io/metrics-reader unchanged 66 | clusterrole.rbac.authorization.k8s.io/proxy-role unchanged 67 | rolebinding.rbac.authorization.k8s.io/leader-election-rolebinding created 68 | clusterrolebinding.rbac.authorization.k8s.io/manager-rolebinding unchanged 69 | clusterrolebinding.rbac.authorization.k8s.io/proxy-rolebinding unchanged 70 | service/controller-manager-metrics-service created 71 | configmap/manager-config created 72 | deployment.apps/controller-manager created 73 | >>> Installing sessions/002/install/031-grafana-server.yaml 74 | namespace/grafana unchanged 75 | grafana.integreatly.org/grafana created 76 | service/grafana created 77 | ingress.networking.k8s.io/grafana created 78 | grafanadatasource.integreatly.org/prometheus created 79 | >>> Installing sessions/002/install/032-grafana-strimzi.yaml 80 | grafanadashboard.integreatly.org/strimzi-operators created 81 | grafanadashboard.integreatly.org/strimzi-kafka created 82 | >>> Installing sessions/002/install/040-kube-state-metrics.yaml 83 | serviceaccount/kube-state-metrics unchanged 84 | clusterrole.rbac.authorization.k8s.io/kube-state-metrics unchanged 85 | clusterrolebinding.rbac.authorization.k8s.io/kube-state-metrics unchanged 86 | deployment.apps/kube-state-metrics unchanged 87 | service/kube-state-metrics unchanged 88 | podmonitor.monitoring.coreos.com/kube-state-metrics created 89 | grafanadashboard.integreatly.org/kube-state-metrics created 90 | >>> Installing sessions/002/install/050-node-exporter.yaml 91 | service/node-exporter created 92 | daemonset.apps/node-exporter created 93 | servicemonitor.monitoring.coreos.com/node-exporter created 94 | grafanadashboard.integreatly.org/node-exporter created 95 | ``` 96 | 97 | When all Grafana is ready, you can access the dashboards from [http://grafana.f12i.io](http://grafana.f12i.io). 98 | 99 | > [!IMPORTANT] 100 | > Make sure to add ingress mappings to `/etc/hosts`. 101 | > Example: `192.168.49.2 prometheus.f12i.io grafana.f12i.io` 102 | 103 | It is also possible to create alerting rules to provide notifications about specific conditions observed in metrics. 104 | This is managed by Prometheus Alertmanager, but it is not described here. 105 | -------------------------------------------------------------------------------- /sessions/002/install/010-metrics-server.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | labels: 5 | k8s-app: metrics-server 6 | name: metrics-server 7 | namespace: kube-system 8 | --- 9 | apiVersion: rbac.authorization.k8s.io/v1 10 | kind: ClusterRole 11 | metadata: 12 | name: system:aggregated-metrics-reader 13 | labels: 14 | k8s-app: metrics-server 15 | rbac.authorization.k8s.io/aggregate-to-view: "true" 16 | rbac.authorization.k8s.io/aggregate-to-edit: "true" 17 | rbac.authorization.k8s.io/aggregate-to-admin: "true" 18 | rules: 19 | - apiGroups: ["metrics.k8s.io"] 20 | resources: ["pods", "nodes"] 21 | verbs: ["get", "list", "watch"] 22 | --- 23 | apiVersion: rbac.authorization.k8s.io/v1 24 | kind: RoleBinding 25 | metadata: 26 | labels: 27 | k8s-app: metrics-server 28 | name: metrics-server-auth-reader 29 | namespace: kube-system 30 | roleRef: 31 | apiGroup: rbac.authorization.k8s.io 32 | kind: Role 33 | name: extension-apiserver-authentication-reader 34 | subjects: 35 | - kind: ServiceAccount 36 | name: metrics-server 37 | namespace: kube-system 38 | --- 39 | apiVersion: rbac.authorization.k8s.io/v1 40 | kind: ClusterRoleBinding 41 | metadata: 42 | labels: 43 | k8s-app: metrics-server 44 | name: metrics-server:system:auth-delegator 45 | roleRef: 46 | apiGroup: rbac.authorization.k8s.io 47 | kind: ClusterRole 48 | name: system:auth-delegator 49 | subjects: 50 | - kind: ServiceAccount 51 | name: metrics-server 52 | namespace: kube-system 53 | --- 54 | apiVersion: rbac.authorization.k8s.io/v1 55 | kind: ClusterRole 56 | metadata: 57 | labels: 58 | k8s-app: metrics-server 59 | name: system:metrics-server 60 | rules: 61 | - apiGroups: [""] 62 | resources: 63 | - nodes/metrics 64 | verbs: 65 | - get 66 | - apiGroups: [""] 67 | resources: 68 | - pods 69 | - nodes 70 | verbs: 71 | - get 72 | - list 73 | - watch 74 | --- 75 | apiVersion: rbac.authorization.k8s.io/v1 76 | kind: ClusterRoleBinding 77 | metadata: 78 | labels: 79 | k8s-app: metrics-server 80 | name: system:metrics-server 81 | roleRef: 82 | apiGroup: rbac.authorization.k8s.io 83 | kind: ClusterRole 84 | name: system:metrics-server 85 | subjects: 86 | - kind: ServiceAccount 87 | name: metrics-server 88 | namespace: kube-system 89 | --- 90 | apiVersion: v1 91 | kind: Service 92 | metadata: 93 | labels: 94 | k8s-app: metrics-server 95 | name: metrics-server 96 | namespace: kube-system 97 | spec: 98 | ports: 99 | - name: https 100 | port: 443 101 | protocol: TCP 102 | targetPort: https 103 | selector: 104 | k8s-app: metrics-server 105 | --- 106 | apiVersion: apps/v1 107 | kind: Deployment 108 | metadata: 109 | labels: 110 | k8s-app: metrics-server 111 | name: metrics-server 112 | namespace: kube-system 113 | spec: 114 | selector: 115 | matchLabels: 116 | k8s-app: metrics-server 117 | strategy: 118 | rollingUpdate: 119 | maxUnavailable: 0 120 | template: 121 | metadata: 122 | labels: 123 | k8s-app: metrics-server 124 | spec: 125 | containers: 126 | - args: 127 | - --cert-dir=/tmp 128 | - --secure-port=10250 129 | - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname 130 | - --kubelet-use-node-status-port 131 | - --metric-resolution=15s 132 | - --kubelet-insecure-tls 133 | image: registry.k8s.io/metrics-server/metrics-server:v0.6.4 134 | imagePullPolicy: IfNotPresent 135 | livenessProbe: 136 | failureThreshold: 3 137 | httpGet: 138 | path: /livez 139 | port: https 140 | scheme: HTTPS 141 | periodSeconds: 10 142 | name: metrics-server 143 | resources: 144 | requests: 145 | cpu: 100m 146 | memory: 200Mi 147 | ports: 148 | - containerPort: 10250 149 | name: https 150 | protocol: TCP 151 | readinessProbe: 152 | failureThreshold: 3 153 | httpGet: 154 | path: /readyz 155 | port: https 156 | scheme: HTTPS 157 | periodSeconds: 10 158 | initialDelaySeconds: 20 159 | securityContext: 160 | readOnlyRootFilesystem: true 161 | runAsNonRoot: true 162 | runAsUser: 1000 163 | allowPrivilegeEscalation: false 164 | volumeMounts: 165 | - mountPath: /tmp 166 | name: tmp-dir 167 | nodeSelector: 168 | kubernetes.io/os: linux 169 | priorityClassName: system-cluster-critical 170 | serviceAccountName: metrics-server 171 | volumes: 172 | # mount in tmp so we can safely use from-scratch images and/or read-only containers 173 | - emptyDir: {} 174 | name: tmp-dir 175 | --- 176 | apiVersion: apiregistration.k8s.io/v1 177 | kind: APIService 178 | metadata: 179 | labels: 180 | k8s-app: metrics-server 181 | name: v1beta1.metrics.k8s.io 182 | spec: 183 | group: metrics.k8s.io 184 | groupPriorityMinimum: 100 185 | insecureSkipTLSVerify: true 186 | service: 187 | name: metrics-server 188 | namespace: kube-system 189 | version: v1beta1 190 | versionPriority: 100 191 | -------------------------------------------------------------------------------- /sessions/002/install/021-prometheus-server.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: prometheus 5 | labels: 6 | app: prometheus 7 | --- 8 | apiVersion: v1 9 | kind: ServiceAccount 10 | metadata: 11 | name: prometheus 12 | namespace: prometheus 13 | --- 14 | apiVersion: rbac.authorization.k8s.io/v1 15 | kind: ClusterRole 16 | metadata: 17 | name: prometheus 18 | rules: 19 | - apiGroups: [""] 20 | resources: 21 | - nodes 22 | - nodes/metrics 23 | - nodes/proxy 24 | - services 25 | - endpoints 26 | - pods 27 | verbs: ["get", "list", "watch"] 28 | - apiGroups: [""] 29 | resources: 30 | - configmaps 31 | verbs: ["get"] 32 | - nonResourceURLs: ["/metrics"] 33 | verbs: ["get"] 34 | --- 35 | apiVersion: rbac.authorization.k8s.io/v1 36 | kind: ClusterRoleBinding 37 | metadata: 38 | name: prometheus 39 | roleRef: 40 | apiGroup: rbac.authorization.k8s.io 41 | kind: ClusterRole 42 | name: prometheus 43 | subjects: 44 | - kind: ServiceAccount 45 | name: prometheus 46 | namespace: prometheus 47 | --- 48 | kind: Service 49 | apiVersion: v1 50 | metadata: 51 | name: prometheus 52 | labels: 53 | app: prometheus 54 | namespace: prometheus 55 | spec: 56 | type: ClusterIP 57 | ports: 58 | - port: 80 59 | name: http 60 | protocol: TCP 61 | targetPort: web 62 | selector: 63 | prometheus: prometheus 64 | --- 65 | apiVersion: networking.k8s.io/v1 66 | kind: Ingress 67 | metadata: 68 | name: prometheus 69 | namespace: prometheus 70 | spec: 71 | ingressClassName: nginx 72 | rules: 73 | - host: prometheus.f12i.io 74 | http: 75 | paths: 76 | - backend: 77 | service: 78 | name: prometheus 79 | port: 80 | number: 80 81 | path: / 82 | pathType: Prefix 83 | --- 84 | apiVersion: monitoring.coreos.com/v1 85 | kind: Prometheus 86 | metadata: 87 | name: prometheus 88 | labels: 89 | app: prometheus 90 | namespace: prometheus 91 | spec: 92 | replicas: 1 93 | serviceAccountName: prometheus 94 | enableAdminAPI: true 95 | storage: 96 | volumeClaimTemplate: 97 | spec: 98 | resources: 99 | requests: 100 | storage: 10Gi 101 | serviceMonitorSelector: 102 | matchLabels: 103 | prometheus: prometheus 104 | podMonitorSelector: 105 | matchLabels: 106 | # monitors must have 'prometheus: prometheus' label 107 | prometheus: prometheus 108 | additionalScrapeConfigs: 109 | name: additional-scrape-configs 110 | key: prometheus-additional.yaml 111 | alerting: 112 | alertmanagers: 113 | - namespace: prometheus 114 | name: alertmanager 115 | port: http 116 | --- 117 | apiVersion: v1 118 | kind: Secret 119 | metadata: 120 | name: additional-scrape-configs 121 | labels: 122 | app: prometheus 123 | namespace: prometheus 124 | type: Opaque 125 | stringData: 126 | prometheus-additional.yaml: | 127 | - job_name: kubernetes-cadvisor 128 | honor_labels: true 129 | scrape_interval: 10s 130 | scrape_timeout: 10s 131 | metrics_path: /metrics/cadvisor 132 | scheme: https 133 | kubernetes_sd_configs: 134 | - role: node 135 | namespaces: 136 | names: [] 137 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 138 | tls_config: 139 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 140 | insecure_skip_verify: true 141 | relabel_configs: 142 | - separator: ; 143 | regex: __meta_kubernetes_node_label_(.+) 144 | replacement: $1 145 | action: labelmap 146 | - separator: ; 147 | regex: (.*) 148 | target_label: __address__ 149 | replacement: kubernetes.default.svc:443 150 | action: replace 151 | - source_labels: [__meta_kubernetes_node_name] 152 | separator: ; 153 | regex: (.+) 154 | target_label: __metrics_path__ 155 | replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor 156 | action: replace 157 | - source_labels: [__meta_kubernetes_node_name] 158 | separator: ; 159 | regex: (.*) 160 | target_label: node_name 161 | replacement: $1 162 | action: replace 163 | - source_labels: [__meta_kubernetes_node_address_InternalIP] 164 | separator: ; 165 | regex: (.*) 166 | target_label: node_ip 167 | replacement: $1 168 | action: replace 169 | metric_relabel_configs: 170 | - source_labels: [container, __name__] 171 | separator: ; 172 | regex: POD;container_(network).* 173 | target_label: container 174 | replacement: $1 175 | action: replace 176 | - source_labels: [container] 177 | separator: ; 178 | regex: POD 179 | replacement: $1 180 | action: drop 181 | - source_labels: [container] 182 | separator: ; 183 | regex: ^$ 184 | replacement: $1 185 | action: drop 186 | - source_labels: [__name__] 187 | separator: ; 188 | regex: container_(network_tcp_usage_total|tasks_state|memory_failures_total|network_udp_usage_total) 189 | replacement: $1 190 | action: drop 191 | - job_name: kubernetes-nodes-kubelet 192 | scrape_interval: 10s 193 | scrape_timeout: 10s 194 | scheme: https 195 | kubernetes_sd_configs: 196 | - role: node 197 | namespaces: 198 | names: [] 199 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 200 | tls_config: 201 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 202 | insecure_skip_verify: true 203 | relabel_configs: 204 | - action: labelmap 205 | regex: __meta_kubernetes_node_label_(.+) 206 | - target_label: __address__ 207 | replacement: kubernetes.default.svc:443 208 | - source_labels: [__meta_kubernetes_node_name] 209 | regex: (.+) 210 | target_label: __metrics_path__ 211 | replacement: /api/v1/nodes/${1}/proxy/metrics 212 | --- 213 | apiVersion: monitoring.coreos.com/v1 214 | kind: Alertmanager 215 | metadata: 216 | name: alertmanager 217 | labels: 218 | app: prometheus 219 | namespace: prometheus 220 | spec: 221 | replicas: 1 222 | --- 223 | kind: Service 224 | apiVersion: v1 225 | metadata: 226 | labels: 227 | app: prometheus 228 | name: alertmanager 229 | namespace: prometheus 230 | spec: 231 | type: ClusterIP 232 | ports: 233 | - port: 80 234 | name: http 235 | protocol: TCP 236 | targetPort: web 237 | selector: 238 | alertmanager: alertmanager 239 | --- 240 | apiVersion: networking.k8s.io/v1 241 | kind: Ingress 242 | metadata: 243 | name: alertmanager 244 | namespace: prometheus 245 | spec: 246 | ingressClassName: nginx 247 | rules: 248 | - host: alertmanager.f12i.io 249 | http: 250 | paths: 251 | - backend: 252 | service: 253 | name: alertmanager 254 | port: 255 | number: 80 256 | path: / 257 | pathType: Prefix 258 | --- 259 | kind: Secret 260 | apiVersion: v1 261 | metadata: 262 | name: alertmanager-alertmanager 263 | labels: 264 | app: prometheus 265 | namespace: prometheus 266 | type: Opaque 267 | stringData: 268 | alertmanager.yaml: | 269 | global: 270 | slack_api_url: https://hooks.slack.com/services/change/me/please 271 | route: 272 | receiver: slack 273 | receivers: 274 | - name: slack 275 | slack_configs: 276 | - channel: "#strimzi-alerts" 277 | title: "{{ range .Alerts }}{{ .Annotations.summary }}\n{{ end }}" 278 | text: "{{ range .Alerts }}{{ .Annotations.description }}\n{{ end }}" 279 | send_resolved: true 280 | -------------------------------------------------------------------------------- /sessions/002/install/022-prometheus-strimzi.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PodMonitor 3 | metadata: 4 | name: strimzi-cluster-operator-metrics-${NAMESPACE} 5 | labels: 6 | app: strimzi 7 | prometheus: prometheus 8 | namespace: prometheus 9 | spec: 10 | selector: 11 | matchLabels: 12 | strimzi.io/kind: cluster-operator 13 | namespaceSelector: 14 | matchNames: 15 | - ${NAMESPACE} 16 | podMetricsEndpoints: 17 | - path: /metrics 18 | port: http 19 | --- 20 | apiVersion: monitoring.coreos.com/v1 21 | kind: PodMonitor 22 | metadata: 23 | name: strimzi-entity-operator-metrics-${NAMESPACE} 24 | labels: 25 | app: strimzi 26 | prometheus: prometheus 27 | namespace: prometheus 28 | spec: 29 | selector: 30 | matchLabels: 31 | app.kubernetes.io/name: entity-operator 32 | namespaceSelector: 33 | matchNames: 34 | - ${NAMESPACE} 35 | podMetricsEndpoints: 36 | - path: /metrics 37 | port: healthcheck 38 | --- 39 | apiVersion: monitoring.coreos.com/v1 40 | kind: PodMonitor 41 | metadata: 42 | name: strimzi-bridge-metrics-${NAMESPACE} 43 | labels: 44 | app: strimzi 45 | prometheus: prometheus 46 | namespace: prometheus 47 | spec: 48 | selector: 49 | matchLabels: 50 | strimzi.io/kind: KafkaBridge 51 | namespaceSelector: 52 | matchNames: 53 | - ${NAMESPACE} 54 | podMetricsEndpoints: 55 | - path: /metrics 56 | port: rest-api 57 | --- 58 | apiVersion: monitoring.coreos.com/v1 59 | kind: PodMonitor 60 | metadata: 61 | name: strimzi-kafka-and-cruise-control-metrics-${NAMESPACE} 62 | labels: 63 | app: strimzi 64 | prometheus: prometheus 65 | namespace: prometheus 66 | spec: 67 | selector: 68 | matchExpressions: 69 | - key: "strimzi.io/kind" 70 | operator: In 71 | values: ["Kafka", "KafkaConnect", "KafkaConnectS2I", "KafkaMirrorMaker", "KafkaMirrorMaker2"] 72 | namespaceSelector: 73 | matchNames: 74 | - ${NAMESPACE} 75 | podMetricsEndpoints: 76 | - path: /metrics 77 | port: tcp-prometheus 78 | relabelings: 79 | - separator: ; 80 | regex: __meta_kubernetes_pod_label_(strimzi_io_.+) 81 | replacement: $1 82 | action: labelmap 83 | - sourceLabels: [__meta_kubernetes_namespace] 84 | separator: ; 85 | regex: (.*) 86 | targetLabel: namespace 87 | replacement: $1 88 | action: replace 89 | - sourceLabels: [__meta_kubernetes_pod_name] 90 | separator: ; 91 | regex: (.*) 92 | targetLabel: kubernetes_pod_name 93 | replacement: $1 94 | action: replace 95 | - sourceLabels: [__meta_kubernetes_pod_node_name] 96 | separator: ; 97 | regex: (.*) 98 | targetLabel: node_name 99 | replacement: $1 100 | action: replace 101 | - sourceLabels: [__meta_kubernetes_pod_host_ip] 102 | separator: ; 103 | regex: (.*) 104 | targetLabel: node_ip 105 | replacement: $1 106 | action: replace 107 | -------------------------------------------------------------------------------- /sessions/002/install/031-grafana-server.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: grafana 5 | labels: 6 | app: grafana 7 | --- 8 | apiVersion: integreatly.org/v1alpha1 9 | kind: Grafana 10 | metadata: 11 | name: grafana 12 | labels: 13 | app: grafana 14 | namespace: grafana 15 | spec: 16 | service: {} 17 | config: 18 | log: 19 | mode: "console" 20 | level: "warn" 21 | security: 22 | admin_user: "admin" 23 | admin_password: "admin" 24 | auth: 25 | disable_login_form: False 26 | disable_signout_menu: True 27 | auth.anonymous: 28 | enabled: True 29 | dashboardLabelSelector: 30 | - matchExpressions: 31 | - {key: grafana, operator: In, values: [dashabord]} 32 | --- 33 | kind: Service 34 | apiVersion: v1 35 | metadata: 36 | name: grafana 37 | labels: 38 | app: grafana 39 | namespace: grafana 40 | spec: 41 | type: ClusterIP 42 | ports: 43 | - name: grafana 44 | protocol: TCP 45 | port: 80 46 | targetPort: grafana-http 47 | selector: 48 | app: grafana 49 | --- 50 | apiVersion: networking.k8s.io/v1 51 | kind: Ingress 52 | metadata: 53 | name: grafana 54 | namespace: grafana 55 | spec: 56 | ingressClassName: nginx 57 | rules: 58 | - host: grafana.f12i.io 59 | http: 60 | paths: 61 | - backend: 62 | service: 63 | name: grafana 64 | port: 65 | number: 80 66 | path: / 67 | pathType: Prefix 68 | --- 69 | apiVersion: integreatly.org/v1alpha1 70 | kind: GrafanaDataSource 71 | metadata: 72 | name: prometheus 73 | labels: 74 | app: grafana 75 | namespace: grafana 76 | spec: 77 | name: prometheus.yaml 78 | datasources: 79 | - name: Prometheus 80 | type: prometheus 81 | access: proxy 82 | url: http://prometheus.prometheus.svc:80 83 | isDefault: true 84 | version: 1 85 | editable: true 86 | jsonData: 87 | tlsSkipVerify: true 88 | timeInterval: "5s" 89 | -------------------------------------------------------------------------------- /sessions/002/install/032-grafana-strimzi.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: integreatly.org/v1alpha1 2 | kind: GrafanaDashboard 3 | metadata: 4 | name: strimzi-operators 5 | labels: 6 | app: grafana 7 | grafana: dashabord 8 | namespace: grafana 9 | spec: 10 | json: "" 11 | url: https://raw.githubusercontent.com/strimzi/strimzi-kafka-operator/${STRIMZI_VERSION}/examples/metrics/grafana-dashboards/strimzi-operators.json 12 | datasources: 13 | - inputName: "DS_PROMETHEUS" 14 | datasourceName: "Prometheus" 15 | --- 16 | apiVersion: integreatly.org/v1alpha1 17 | kind: GrafanaDashboard 18 | metadata: 19 | name: strimzi-kafka 20 | labels: 21 | app: grafana 22 | grafana: dashabord 23 | namespace: grafana 24 | spec: 25 | json: "" 26 | url: https://raw.githubusercontent.com/strimzi/strimzi-kafka-operator/${STRIMZI_VERSION}/packaging/examples/metrics/grafana-dashboards/strimzi-kafka.json 27 | datasources: 28 | - inputName: "DS_PROMETHEUS" 29 | datasourceName: "Prometheus" 30 | -------------------------------------------------------------------------------- /sessions/002/install/040-kube-state-metrics.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | name: kube-state-metrics 7 | namespace: kube-system 8 | --- 9 | apiVersion: rbac.authorization.k8s.io/v1 10 | kind: ClusterRole 11 | metadata: 12 | labels: 13 | app.kubernetes.io/name: kube-state-metrics 14 | name: kube-state-metrics 15 | rules: 16 | - apiGroups: 17 | - "" 18 | resources: 19 | - configmaps 20 | - secrets 21 | - nodes 22 | - pods 23 | - services 24 | - serviceaccounts 25 | - resourcequotas 26 | - replicationcontrollers 27 | - limitranges 28 | - persistentvolumeclaims 29 | - persistentvolumes 30 | - namespaces 31 | - endpoints 32 | verbs: 33 | - list 34 | - watch 35 | - apiGroups: 36 | - apps 37 | resources: 38 | - statefulsets 39 | - daemonsets 40 | - deployments 41 | - replicasets 42 | verbs: 43 | - list 44 | - watch 45 | - apiGroups: 46 | - batch 47 | resources: 48 | - cronjobs 49 | - jobs 50 | verbs: 51 | - list 52 | - watch 53 | - apiGroups: 54 | - autoscaling 55 | resources: 56 | - horizontalpodautoscalers 57 | verbs: 58 | - list 59 | - watch 60 | - apiGroups: 61 | - authentication.k8s.io 62 | resources: 63 | - tokenreviews 64 | verbs: 65 | - create 66 | - apiGroups: 67 | - authorization.k8s.io 68 | resources: 69 | - subjectaccessreviews 70 | verbs: 71 | - create 72 | - apiGroups: 73 | - policy 74 | resources: 75 | - poddisruptionbudgets 76 | verbs: 77 | - list 78 | - watch 79 | - apiGroups: 80 | - certificates.k8s.io 81 | resources: 82 | - certificatesigningrequests 83 | verbs: 84 | - list 85 | - watch 86 | - apiGroups: 87 | - discovery.k8s.io 88 | resources: 89 | - endpointslices 90 | verbs: 91 | - list 92 | - watch 93 | - apiGroups: 94 | - storage.k8s.io 95 | resources: 96 | - storageclasses 97 | - volumeattachments 98 | verbs: 99 | - list 100 | - watch 101 | - apiGroups: 102 | - admissionregistration.k8s.io 103 | resources: 104 | - mutatingwebhookconfigurations 105 | - validatingwebhookconfigurations 106 | verbs: 107 | - list 108 | - watch 109 | - apiGroups: 110 | - networking.k8s.io 111 | resources: 112 | - networkpolicies 113 | - ingressclasses 114 | - ingresses 115 | verbs: 116 | - list 117 | - watch 118 | - apiGroups: 119 | - coordination.k8s.io 120 | resources: 121 | - leases 122 | verbs: 123 | - list 124 | - watch 125 | - apiGroups: 126 | - rbac.authorization.k8s.io 127 | resources: 128 | - clusterrolebindings 129 | - clusterroles 130 | - rolebindings 131 | - roles 132 | verbs: 133 | - list 134 | - watch 135 | --- 136 | apiVersion: rbac.authorization.k8s.io/v1 137 | kind: ClusterRoleBinding 138 | metadata: 139 | labels: 140 | app.kubernetes.io/name: kube-state-metrics 141 | name: kube-state-metrics 142 | roleRef: 143 | apiGroup: rbac.authorization.k8s.io 144 | kind: ClusterRole 145 | name: kube-state-metrics 146 | subjects: 147 | - kind: ServiceAccount 148 | name: kube-state-metrics 149 | namespace: kube-system 150 | --- 151 | apiVersion: apps/v1 152 | kind: Deployment 153 | metadata: 154 | labels: 155 | app.kubernetes.io/name: kube-state-metrics 156 | name: kube-state-metrics 157 | namespace: kube-system 158 | spec: 159 | replicas: 1 160 | selector: 161 | matchLabels: 162 | app.kubernetes.io/name: kube-state-metrics 163 | template: 164 | metadata: 165 | labels: 166 | app.kubernetes.io/name: kube-state-metrics 167 | spec: 168 | automountServiceAccountToken: true 169 | containers: 170 | - image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.8.2 171 | livenessProbe: 172 | httpGet: 173 | path: /healthz 174 | port: 8080 175 | initialDelaySeconds: 5 176 | timeoutSeconds: 5 177 | name: kube-state-metrics 178 | ports: 179 | - containerPort: 8080 180 | name: http-metrics 181 | - containerPort: 8081 182 | name: telemetry 183 | readinessProbe: 184 | httpGet: 185 | path: / 186 | port: 8081 187 | initialDelaySeconds: 5 188 | timeoutSeconds: 5 189 | securityContext: 190 | allowPrivilegeEscalation: false 191 | capabilities: 192 | drop: 193 | - ALL 194 | readOnlyRootFilesystem: true 195 | runAsNonRoot: true 196 | runAsUser: 65534 197 | seccompProfile: 198 | type: RuntimeDefault 199 | nodeSelector: 200 | kubernetes.io/os: linux 201 | serviceAccountName: kube-state-metrics 202 | --- 203 | apiVersion: v1 204 | kind: Service 205 | metadata: 206 | labels: 207 | app.kubernetes.io/name: kube-state-metrics 208 | name: kube-state-metrics 209 | namespace: kube-system 210 | spec: 211 | clusterIP: None 212 | ports: 213 | - name: http-metrics 214 | port: 8080 215 | targetPort: http-metrics 216 | - name: telemetry 217 | port: 8081 218 | targetPort: telemetry 219 | selector: 220 | app.kubernetes.io/name: kube-state-metrics 221 | --- 222 | apiVersion: monitoring.coreos.com/v1 223 | kind: PodMonitor 224 | metadata: 225 | name: kube-state-metrics 226 | labels: 227 | app.kubernetes.io/name: kube-state-metrics 228 | prometheus: prometheus 229 | namespace: prometheus 230 | spec: 231 | selector: 232 | matchLabels: 233 | app.kubernetes.io/name: kube-state-metrics 234 | namespaceSelector: 235 | matchNames: 236 | - kube-system 237 | podMetricsEndpoints: 238 | - path: /metrics 239 | port: http-metrics 240 | relabelings: 241 | - sourceLabels: [exported_pod] 242 | targetLabel: pod 243 | replacement: "$1" 244 | - sourceLabels: [exported_namespace] 245 | targetLabel: namespace 246 | replacement: "$1" 247 | - sourceLabels: [__address__] 248 | # Add the cluster label 249 | targetLabel: cluster 250 | replacement: "$CLUSTER_NAME" 251 | --- 252 | apiVersion: integreatly.org/v1alpha1 253 | kind: GrafanaDashboard 254 | metadata: 255 | name: kube-state-metrics 256 | labels: 257 | app: grafana 258 | grafana: dashabord 259 | namespace: grafana 260 | spec: 261 | json: "" 262 | url: https://grafana.com/api/dashboards/13332/revisions/12/download 263 | datasources: 264 | - inputName: "VAR_DATASOURCE" 265 | datasourceName: "Prometheus" 266 | -------------------------------------------------------------------------------- /sessions/002/install/050-node-exporter.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: node-exporter 5 | labels: 6 | app: node-exporter 7 | namespace: prometheus 8 | spec: 9 | clusterIP: None 10 | ports: 11 | - name: scrape 12 | port: 9100 13 | protocol: TCP 14 | selector: 15 | app: node-exporter 16 | type: ClusterIP 17 | --- 18 | apiVersion: apps/v1 19 | kind: DaemonSet 20 | metadata: 21 | name: node-exporter 22 | labels: 23 | app: prometheus 24 | namespace: prometheus 25 | spec: 26 | selector: 27 | matchLabels: 28 | app: node-exporter 29 | template: 30 | metadata: 31 | labels: 32 | app: node-exporter 33 | name: node-exporter 34 | spec: 35 | containers: 36 | - image: prom/node-exporter 37 | name: node-exporter 38 | ports: 39 | - containerPort: 9100 40 | hostPort: 9100 41 | name: scrape 42 | hostNetwork: true 43 | hostPID: true 44 | --- 45 | apiVersion: monitoring.coreos.com/v1 46 | kind: ServiceMonitor 47 | metadata: 48 | labels: 49 | app: node-exporter 50 | prometheus: prometheus 51 | name: node-exporter 52 | namespace: prometheus 53 | spec: 54 | endpoints: 55 | - honorLabels: true 56 | port: scrape 57 | selector: 58 | matchLabels: 59 | app: node-exporter 60 | --- 61 | apiVersion: integreatly.org/v1alpha1 62 | kind: GrafanaDashboard 63 | metadata: 64 | name: node-exporter 65 | labels: 66 | app: grafana 67 | grafana: dashabord 68 | namespace: grafana 69 | spec: 70 | json: "" 71 | url: https://grafana.com/api/dashboards/1860/revisions/18/download 72 | -------------------------------------------------------------------------------- /sessions/003/README.md: -------------------------------------------------------------------------------- 1 | ## Get diagnostic data 2 | 3 | First, use [this session](/sessions/001) to deploy a Kafka cluster on Kubernetes. 4 | 5 | When debugging issues, you usually need to retrieve various artifacts from the environment, which can be a lot of effort. 6 | Fortunately, Strimzi provides a must-gather script that can be used to download all relevant artifacts and logs from a specific Kafka cluster. 7 | 8 | > [!NOTE] 9 | > You can add the `--secrets=all` option to also get secret values. 10 | 11 | ```sh 12 | $ curl -s https://raw.githubusercontent.com/strimzi/strimzi-kafka-operator/main/tools/report.sh \ 13 | | bash -s -- --namespace=test --cluster=my-cluster --out-dir=~/Downloads 14 | deployments 15 | deployment.apps/my-cluster-entity-operator 16 | statefulsets 17 | replicasets 18 | replicaset.apps/my-cluster-entity-operator-bb7c65dd4 19 | configmaps 20 | configmap/my-cluster-broker-10 21 | configmap/my-cluster-broker-11 22 | configmap/my-cluster-broker-12 23 | configmap/my-cluster-controller-0 24 | configmap/my-cluster-controller-1 25 | configmap/my-cluster-controller-2 26 | configmap/my-cluster-entity-topic-operator-config 27 | configmap/my-cluster-entity-user-operator-config 28 | secrets 29 | secret/my-cluster-clients-ca 30 | secret/my-cluster-clients-ca-cert 31 | secret/my-cluster-cluster-ca 32 | secret/my-cluster-cluster-ca-cert 33 | secret/my-cluster-cluster-operator-certs 34 | secret/my-cluster-entity-topic-operator-certs 35 | secret/my-cluster-entity-user-operator-certs 36 | secret/my-cluster-kafka-brokers 37 | services 38 | service/my-cluster-kafka-bootstrap 39 | service/my-cluster-kafka-brokers 40 | poddisruptionbudgets 41 | poddisruptionbudget.policy/my-cluster-kafka 42 | roles 43 | role.rbac.authorization.k8s.io/my-cluster-entity-operator 44 | rolebindings 45 | rolebinding.rbac.authorization.k8s.io/my-cluster-entity-topic-operator-role 46 | rolebinding.rbac.authorization.k8s.io/my-cluster-entity-user-operator-role 47 | networkpolicies 48 | networkpolicy.networking.k8s.io/my-cluster-entity-operator 49 | networkpolicy.networking.k8s.io/my-cluster-network-policy-kafka 50 | pods 51 | pod/my-cluster-broker-10 52 | pod/my-cluster-broker-11 53 | pod/my-cluster-broker-12 54 | pod/my-cluster-controller-0 55 | pod/my-cluster-controller-1 56 | pod/my-cluster-controller-2 57 | pod/my-cluster-entity-operator-bb7c65dd4-9zdmk 58 | persistentvolumeclaims 59 | persistentvolumeclaim/data-my-cluster-broker-10 60 | persistentvolumeclaim/data-my-cluster-broker-11 61 | persistentvolumeclaim/data-my-cluster-broker-12 62 | persistentvolumeclaim/data-my-cluster-controller-0 63 | persistentvolumeclaim/data-my-cluster-controller-1 64 | persistentvolumeclaim/data-my-cluster-controller-2 65 | ingresses 66 | routes 67 | clusterroles 68 | clusterrole.rbac.authorization.k8s.io/strimzi-cluster-operator-global 69 | clusterrole.rbac.authorization.k8s.io/strimzi-cluster-operator-leader-election 70 | clusterrole.rbac.authorization.k8s.io/strimzi-cluster-operator-namespaced 71 | clusterrole.rbac.authorization.k8s.io/strimzi-cluster-operator-watched 72 | clusterrole.rbac.authorization.k8s.io/strimzi-entity-operator 73 | clusterrole.rbac.authorization.k8s.io/strimzi-kafka-broker 74 | clusterrole.rbac.authorization.k8s.io/strimzi-kafka-client 75 | clusterrolebindings 76 | clusterrolebinding.rbac.authorization.k8s.io/strimzi-cluster-operator 77 | clusterrolebinding.rbac.authorization.k8s.io/strimzi-cluster-operator-kafka-broker-delegation 78 | clusterrolebinding.rbac.authorization.k8s.io/strimzi-cluster-operator-kafka-client-delegation 79 | clusteroperator 80 | deployment.apps/strimzi-cluster-operator 81 | replicaset.apps/strimzi-cluster-operator-6596f469c9 82 | pod/strimzi-cluster-operator-6596f469c9-smsw2 83 | configmap/strimzi-cluster-operator 84 | draincleaner 85 | customresources 86 | kafkanodepools.kafka.strimzi.io 87 | broker 88 | controller 89 | kafkas.kafka.strimzi.io 90 | my-cluster 91 | kafkatopics.kafka.strimzi.io 92 | my-topic 93 | strimzipodsets.core.strimzi.io 94 | my-cluster-broker 95 | my-cluster-controller 96 | events 97 | logs 98 | my-cluster-broker-10 99 | my-cluster-broker-11 100 | my-cluster-broker-12 101 | my-cluster-controller-0 102 | my-cluster-controller-1 103 | my-cluster-controller-2 104 | my-cluster-entity-operator-bb7c65dd4-9zdmk 105 | Report file report-17-03-2025_12-26-05.zip created 106 | ``` 107 | 108 | ## Get heap dumps 109 | 110 | It is also possible to collect broker JVM heap dumps and other advanced diagnostic data (thread dumps, flame graphs, etc). 111 | 112 | > [!WARNING] 113 | > Taking a heap dump is a heavy operation that can cause the Java application to hang. 114 | > It is not recommended in production, unless it is not possible to reproduce the memory issue in a test environment. 115 | 116 | Debugging locally can often be easier and faster. 117 | However, some issues only manifest in Kubernetes due to factors like networking, resource limits, or interactions with other components. 118 | Even if you try to match your local setup to the Kubernetes configuration, subtle differences (e.g. service discovery, security settings, or operator-managed logic) might lead to different behavior. 119 | 120 | Create an additional volume of the desired size using a PVC. 121 | 122 | ```sh 123 | $ kubectl create -f - < [!WARNING] 143 | > Adding a custom volume triggers pod restarts, which can make it difficult to capture an issue that has already occurred. 144 | > If the issue cannot be easily reproduced in a test environment, configuring the volume in advance could help avoid the pod restarts when you need them most. 145 | 146 | ```sh 147 | $ kubectl patch k my-cluster --type merge -p ' 148 | spec: 149 | kafka: 150 | template: 151 | pod: 152 | volumes: 153 | - name: my-volume 154 | persistentVolumeClaim: 155 | claimName: my-pvc 156 | kafkaContainer: 157 | volumeMounts: 158 | - name: my-volume 159 | mountPath: "/mnt/data"' 160 | kafka.kafka.strimzi.io/my-cluster patched 161 | ``` 162 | 163 | When the rolling update completes, create a broker heap dump and copy the output file to localhost. 164 | 165 | ```sh 166 | $ PID="$(kubectl exec my-cluster-broker-10 -- jcmd | grep "kafka.Kafka" | awk '{print $1}')" 167 | 168 | $ kubectl exec my-cluster-broker-10 -- jcmd "$PID" VM.flags 169 | 724: 170 | -XX:CICompilerCount=4 -XX:ConcGCThreads=3 -XX:G1ConcRefinementThreads=10 -XX:G1EagerReclaimRemSetThreshold=32 -XX:G1HeapRegionSize=4194304 171 | -XX:GCDrainStackTargetSize=64 -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/mnt/data/oome.hprof -XX:InitialHeapSize=5368709120 172 | -XX:+ManagementServer -XX:MarkStackSize=4194304 -XX:MaxHeapSize=5368709120 -XX:MaxNewSize=3221225472 -XX:MinHeapDeltaBytes=4194304 173 | -XX:MinHeapSize=5368709120 -XX:NonNMethodCodeHeapSize=5839372 -XX:NonProfiledCodeHeapSize=122909434 -XX:ProfiledCodeHeapSize=122909434 174 | -XX:ReservedCodeCacheSize=251658240 -XX:+SegmentedCodeCache -XX:SoftMaxHeapSize=5368709120 -XX:-THPStackMitigation 175 | -XX:+UseCompressedClassPointers -XX:+UseCompressedOops -XX:+UseFastUnorderedTimeStamps -XX:+UseG1GC 176 | 177 | $ kubectl exec my-cluster-broker-10 -- jcmd "$PID" GC.heap_dump /mnt/data/heap.hprof 178 | 724: 179 | Dumping heap to /mnt/data/heap.hprof ... 180 | Heap dump file created [179236580 bytes in 0.664 secs] 181 | 182 | $ kubectl cp my-cluster-broker-10:/mnt/data/heap.hprof "$HOME"/Downloads/heap.hprof 183 | tar: Removing leading `/' from member names 184 | ``` 185 | 186 | If the pod is crash looping, the dump can still be recovered by spinning up a temporary pod and mounting the volume. 187 | 188 | ```sh 189 | $ kubectl run my-pod --restart "Never" --image "foo" --overrides "{ 190 | \"spec\": { 191 | \"containers\": [ 192 | { 193 | \"name\": \"busybox\", 194 | \"image\": \"busybox\", 195 | \"imagePullPolicy\": \"IfNotPresent\", 196 | \"command\": [\"/bin/sh\", \"-c\", \"trap : TERM INT; sleep infinity & wait\"], 197 | \"volumeMounts\": [ 198 | {\"name\": \"data\", \"mountPath\": \"/mnt/data\"} 199 | ] 200 | } 201 | ], 202 | \"volumes\": [ 203 | {\"name\": \"data\", \"persistentVolumeClaim\": {\"claimName\": \"my-pvc\"}} 204 | ] 205 | } 206 | }" 207 | 208 | $ kubectl exec my-pod -- ls -lh /mnt/data 209 | total 171M 210 | -rw------- 1 1001 root 170.9M Mar 17 14:38 heap.hprof 211 | ``` 212 | 213 | For the heap dump analysis you can use a tool like Eclipse Memory Analyzer. 214 | -------------------------------------------------------------------------------- /sessions/004/README.md: -------------------------------------------------------------------------------- 1 | ## Configure TLS authentication 2 | 3 | First, use [this session](/sessions/001) to deploy a Kafka cluster on Kubernetes. 4 | 5 | We also add an external listener of type ingress with TLS authentication. 6 | Then, wait for the Cluster Operator to restart all pods one by one (rolling update). 7 | 8 | > [!IMPORTANT] 9 | > You need to enable the Nginx ingress controller with `--enable-ssl-passthrough` flag, and add ingress mappings to `/etc/hosts`. 10 | 11 | ```sh 12 | $ kubectl create -f sessions/004/install.yaml \ 13 | && kubectl patch k my-cluster --type merge -p ' 14 | spec: 15 | kafka: 16 | listeners: 17 | - name: external 18 | port: 9094 19 | type: ingress 20 | tls: true 21 | authentication: 22 | type: tls 23 | configuration: 24 | class: nginx 25 | hostTemplate: broker-{nodeId}.my-cluster.f12i.io 26 | bootstrap: 27 | host: bootstrap.my-cluster.f12i.io' 28 | kafkauser.kafka.strimzi.io/my-user created 29 | kafka.kafka.strimzi.io/my-cluster patched 30 | ``` 31 | 32 | The previous command adds a new authentication element to the external listener, which is the endpoint used by clients connecting from outside using TLS. 33 | It also creates a Kafka user resource with a matching configuration. 34 | 35 | ```sh 36 | $ kubectl get ingress 37 | NAME CLASS HOSTS ADDRESS PORTS AGE 38 | my-cluster-broker-10 nginx broker-10.my-cluster.f12i.io 192.168.49.2 80, 443 104s 39 | my-cluster-broker-11 nginx broker-11.my-cluster.f12i.io 192.168.49.2 80, 443 104s 40 | my-cluster-broker-12 nginx broker-12.my-cluster.f12i.io 192.168.49.2 80, 443 104s 41 | my-cluster-kafka-bootstrap nginx bootstrap.my-cluster.f12i.io 192.168.49.2 80, 443 104s 42 | 43 | $ kubectl get ku my-user -o yaml | yq .spec 44 | authentication: 45 | type: tls 46 | ``` 47 | 48 | When the rolling update is completed, you should be able to see the broker certificate running the following command. 49 | 50 | ```sh 51 | $ openssl s_client -connect broker-5.my-cluster.f12i.io:443 -servername bootstrap.my-cluster.f12i.io -showcerts 52 | ... 53 | Server certificate 54 | subject=O=io.strimzi, CN=my-cluster-kafka 55 | issuer=O=io.strimzi, CN=cluster-ca v0 56 | ... 57 | ``` 58 | 59 | Then, we can try to send some messages using an external Kafka client. 60 | Here we are using the console producer tool included in every Kafka distribution. 61 | 62 | ```sh 63 | $ export BOOTSTRAP_SERVERS=$(kubectl get k my-cluster -o yaml | yq '.status.listeners.[] | select(.name == "external").bootstrapServers'); 64 | kubectl get k my-cluster -o yaml | yq '.status.listeners.[] | select(.name == "external").certificates[0]' > /tmp/cluster-ca.crt ; \ 65 | kubectl get secret my-user -o jsonpath="{.data['user\.crt']}" | base64 -d > /tmp/user.crt ; \ 66 | kubectl get secret my-user -o jsonpath="{.data['user\.key']}" | base64 -d > /tmp/user.key 67 | 68 | $ CLUSTER_CA_CRT=$(/tmp/client.properties 73 | security.protocol = SSL 74 | ssl.truststore.type=PEM 75 | ssl.truststore.certificates=$CLUSTER_CA_CRT 76 | ssl.keystore.type=PEM 77 | ssl.keystore.certificate.chain=$USER_CRT 78 | ssl.keystore.key=$USER_KEY 79 | EOF 80 | 81 | $ $KAFKA_HOME/bin/kafka-console-producer.sh --bootstrap-server $BOOTSTRAP_SERVERS --topic my-topic --producer.config /tmp/client.properties 82 | >hello 83 | >world 84 | >^C 85 | 86 | $KAFKA_HOME/bin/kafka-console-consumer.sh --bootstrap-server $BOOTSTRAP_SERVERS --topic my-topic --from-beginning --max-messages 2 --consumer.config /tmp/client.properties 87 | hello 88 | world 89 | Processed a total of 2 messages 90 | ``` 91 | 92 | When dealing with TLS issues, it is useful to look inside the certificate to verify its configuration and expiration. 93 | For example, let's get the cluster CA certificate which is used to sign all server certificates. 94 | We can use use `kubectl` to do so, but let's suppose we have a must-gather script output. 95 | Use the command from the first session to generate a new report from the current cluster. 96 | 97 | ```sh 98 | $ unzip -p ~/Downloads/report-12-10-2024_11-31-59.zip reports/secrets/my-cluster-cluster-ca-cert.yaml \ 99 | | yq '.data."ca.crt"' | base64 -d | openssl x509 -inform pem -noout -text 100 | Certificate: 101 | Data: 102 | Version: 3 (0x2) 103 | Serial Number: 104 | 26:9e:a1:7d:4d:34:cb:6b:ec:98:03:46:fb:7a:82:ad:68:80:bd:8e 105 | Signature Algorithm: sha512WithRSAEncryption 106 | Issuer: O=io.strimzi, CN=cluster-ca v0 107 | Validity 108 | Not Before: Sep 8 16:28:42 2022 GMT 109 | Not After : Sep 8 16:28:42 2023 GMT 110 | Subject: O=io.strimzi, CN=cluster-ca v0 111 | Subject Public Key Info: 112 | Public Key Algorithm: rsaEncryption 113 | Public-Key: (4096 bit) 114 | Modulus: 115 | ... 116 | Exponent: 65537 (0x10001) 117 | X509v3 extensions: 118 | X509v3 Subject Key Identifier: 119 | 2D:1D:63:F6:20:57:33:7D:59:73:DF:15:74:A2:A8:3D:E1:5B:3E:38 120 | X509v3 Basic Constraints: critical 121 | CA:TRUE, pathlen:0 122 | X509v3 Key Usage: critical 123 | Certificate Sign, CRL Sign 124 | Signature Algorithm: sha512WithRSAEncryption 125 | Signature Value: 126 | ... 127 | ``` 128 | 129 | If this is not enough to spot the issue, we can add the `-Djavax.net.debug=ssl:handshake` Java option to the client in order to get more details. 130 | As an additional exercise, try to get the clients CA and user certificates to verify if the first signs the second. 131 | 132 | ## Use custom TLS certificates 133 | 134 | Often, security policies don't allow you to run a Kafka cluster with self-signed certificates in production. 135 | Configure the listeners to use a custom certificate signed by an external or well-known CA. 136 | 137 | Custom certificates are not managed by the operator, so you will be in charge of the renewal process, which requires an update to the listener secret. 138 | A rolling update will start automatically in order to make the new certificate available. 139 | This example only shows TLS encryption, but you can add a custom client certificate for TLS authentication by setting `type: tls-external` in the `KafkaUser` custom resource and creating the user secret (subject can only contain `CN=$USER_NAME`). 140 | 141 | Typically, the security team will provide a certificate bundle which includes the whole trust chain (i.e. root CA + intermediate CA + listener certificate) and a private key. 142 | If that's not the case, you can easily create the bundle from individual certificates in PEM format, because you need to trust the whole chain, if any. 143 | 144 | ```sh 145 | $ cat /tmp/listener.crt /tmp/intermca.crt /tmp/rootca.crt >/tmp/bundle.crt 146 | ``` 147 | 148 | Here we generate our own certificate bundle with only one self-signed certificate, pretending it was handed over by the security team. 149 | We also use a wildcard certificate so that we don't need to specify all broker SANs. 150 | 151 | > [!IMPORTANT] 152 | > The custom server certificate for a listener must not be a CA and it must include a SAN for each broker address, plus one for the bootstrap address. 153 | > Alternatively, you can use a wildcard certificate to include all addresses with one SAN entry. 154 | 155 | ```sh 156 | $ CONFIG=" 157 | [req] 158 | prompt=no 159 | distinguished_name=dn 160 | x509_extensions=ext 161 | [dn] 162 | countryName=IT 163 | stateOrProvinceName=Rome 164 | organizationName=Fede 165 | commonName=my-cluster 166 | [ext] 167 | subjectAltName=@san 168 | [san] 169 | DNS.1=*.my-cluster.f12i.io 170 | " && openssl genrsa -out /tmp/listener.key 2048 \ 171 | && openssl req -new -x509 -days 3650 -key /tmp/listener.key -out /tmp/bundle.crt -config <(echo "$CONFIG") 172 | ``` 173 | 174 | Now we [deploy the Strimzi Cluster Operator and Kafka cluster](/sessions/001), and set the external listener. 175 | Then, we deploy the secret containing the custom certificate and update the Kafka cluster configuration by adding a reference to that secret. 176 | 177 | ```sh 178 | $ kubectl create secret generic ext-listener-crt \ 179 | --from-file=/tmp/bundle.crt --from-file=/tmp/listener.key 180 | secret/ext-listener-crt created 181 | 182 | $ kubectl patch k my-cluster --type merge -p ' 183 | spec: 184 | kafka: 185 | listeners: 186 | - name: external 187 | port: 9094 188 | type: ingress 189 | tls: true 190 | configuration: 191 | class: nginx 192 | hostTemplate: broker-{nodeId}.my-cluster.f12i.io 193 | bootstrap: 194 | host: bootstrap.my-cluster.f12i.io 195 | brokerCertChainAndKey: 196 | secretName: ext-listener-crt 197 | certificate: bundle.crt 198 | key: listener.key' 199 | kafka.kafka.strimzi.io/my-cluster patched 200 | ``` 201 | 202 | When the rolling update is completed, clients just need to trust the external CA and they will be able to connect. 203 | In our case, we don't have a CA, so we just need to trust the self-signed certificate. 204 | 205 | ```sh 206 | $ PUBLIC_CRT=$(/tmp/client.properties 209 | security.protocol=SSL 210 | ssl.truststore.type=PEM 211 | ssl.truststore.certificates=$PUBLIC_CRT 212 | EOF 213 | 214 | $ $KAFKA_HOME/bin/kafka-console-producer.sh --bootstrap-server $BOOTSTRAP_SERVERS --topic my-topic --producer.config /tmp/client.properties 215 | >hello 216 | >world 217 | >^C 218 | ``` 219 | -------------------------------------------------------------------------------- /sessions/004/install.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kafka.strimzi.io/v1beta2 2 | kind: KafkaUser 3 | metadata: 4 | name: my-user 5 | labels: 6 | strimzi.io/cluster: my-cluster 7 | spec: 8 | authentication: 9 | type: tls 10 | -------------------------------------------------------------------------------- /sessions/005/README.md: -------------------------------------------------------------------------------- 1 | ## Use Kafka with Apicurio Registry 2 | 3 | First, use [this session](/sessions/001) to deploy a Kafka cluster on Kubernetes. 4 | 5 | When the Kafka cluster is ready, we deploy the Apicurio Registry operator. 6 | 7 | ```sh 8 | $ envsubst < sessions/005/install/apicurio.yaml | kubectl create -f - 9 | customresourcedefinition.apiextensions.k8s.io/apicurioregistries.registry.apicur.io created 10 | serviceaccount/apicurio-registry-operator created 11 | role.rbac.authorization.k8s.io/apicurio-registry-operator-leader-election-role created 12 | clusterrole.rbac.authorization.k8s.io/apicurio-registry-operator-role created 13 | rolebinding.rbac.authorization.k8s.io/apicurio-registry-operator-leader-election-rolebinding created 14 | clusterrolebinding.rbac.authorization.k8s.io/apicurio-registry-operator-rolebinding created 15 | deployment.apps/apicurio-registry-operator created 16 | ``` 17 | 18 | After that we deploy our registry instance with in-memory storage system and check the result. 19 | 20 | ```sh 21 | $ kubectl create -f registry.yaml 22 | apicurioregistry.registry.apicur.io/my-schema-registry created 23 | 24 | $ kubectl get po 25 | NAME READY STATUS RESTARTS AGE 26 | apicurio-registry-operator-9448ffc74-b6whl 1/1 Running 0 69s 27 | my-cluster-broker-10 1/1 Running 0 4m54s 28 | my-cluster-broker-11 1/1 Running 0 4m27s 29 | my-cluster-broker-12 1/1 Running 0 5m19s 30 | my-cluster-controller-0 1/1 Running 0 7m32s 31 | my-cluster-controller-1 1/1 Running 0 7m32s 32 | my-cluster-controller-2 1/1 Running 0 7m32s 33 | my-cluster-entity-operator-67b8cc5c87-74qlb 2/2 Running 0 6m59s 34 | my-schema-registry-deployment-858c7dc76b-gjkcs 1/1 Running 0 66s 35 | strimzi-cluster-operator-d78fd875b-dcjxw 1/1 Running 0 8m36s 36 | ``` 37 | 38 | Now, we export some connection parameters and register the test Avro message schema. 39 | 40 | > [!NOTE] 41 | > In addition to the REST API, the registry also provides a web interface for handling schemas and set rules. 42 | > This is accessible using the auto-generated ingress address. 43 | 44 | The artifact `id` convention for the mapping is to combine the topic name with the key or value, depending on whether the serializer is used for the message key or value. 45 | The generated `globalId` is then stored in the message headers and used to lookup the schema when consuming messages. 46 | Different schema `version`s use the same artifact `id`, but have different `globalId`s. 47 | 48 | ```sh 49 | $ export BOOTSTRAP_SERVERS=$(kubectl get k my-cluster -o yaml | yq '.status.listeners.[] | select(.name == "plain").bootstrapServers') \ 50 | REGISTRY_URL=http://$(kubectl get apicurioregistries my-schema-registry -o jsonpath="{.status.info.host}")/apis/registry/v2 \ 51 | ARTIFACT_GROUP="default" \ 52 | TOPIC_NAME="my-topic" 53 | 54 | $ curl -s -X POST -H "Content-Type: application/json" \ 55 | -H "X-Registry-ArtifactId: my-topic-value" -H "X-Registry-ArtifactType: AVRO" \ 56 | -d @sessions/005/install/greeting.avsc \ 57 | "$REGISTRY_URL/groups/default/artifacts?ifExists=RETURN_OR_UPDATE" | yq -o json 58 | { 59 | "name": "Greeting", 60 | "createdBy": "", 61 | "createdOn": "2025-03-24T07:26:33+0000", 62 | "modifiedBy": "", 63 | "modifiedOn": "2025-03-24T07:26:33+0000", 64 | "id": "my-topic-value", 65 | "version": "1", 66 | "type": "AVRO", 67 | "globalId": 1, 68 | "state": "ENABLED", 69 | "contentId": 1, 70 | "references": [] 71 | } 72 | ``` 73 | 74 | At this point, we can start the application and observe its output. 75 | 76 | ```sh 77 | $ envsubst < sessions/005/install/application.yaml | kubectl create -f - 78 | deployment.apps/kafka-avro created 79 | 80 | $ kubectl logs -f $(kubectl get po -l app=kafka-avro -o name) 81 | Producing records 82 | Records produced 83 | Consuming all records 84 | Record: Hello-1742801335037 85 | Record: Hello-1742801335160 86 | Record: Hello-1742801335160 87 | Record: Hello-1742801335161 88 | Record: Hello-1742801335161 89 | ``` 90 | 91 | If we now look at one of the messages, we see that the `globalId` is stored in the message headers and used for the schema lookup when consuming messages. 92 | 93 | ```sh 94 | $ kubectl exec my-cluster-broker-10 -- bin/kafka-dump-log.sh --deep-iteration --print-data-log \ 95 | --files /var/lib/kafka/data/kafka-log10/my-topic-0/00000000000000000000.log | tail -n2 96 | | offset: 15 CreateTime: 1742802014915 keySize: -1 valueSize: 12 sequence: 4 headerKeys: [apicurio.value.globalId,apicurio.value.encoding] payload: 97 | Hello????e 98 | ``` 99 | 100 | Finally, we can use the REST API to look at the schema content and metadata, which may be useful for debugging. 101 | 102 | ```sh 103 | $ curl -s "$REGISTRY_URL/search/artifacts" | yq -o json 104 | { 105 | "artifacts": [ 106 | { 107 | "id": "my-topic-value", 108 | "name": "Greeting", 109 | "createdOn": "2025-03-24T07:26:33+0000", 110 | "createdBy": "", 111 | "type": "AVRO", 112 | "state": "ENABLED", 113 | "modifiedOn": "2025-03-24T07:26:33+0000", 114 | "modifiedBy": "" 115 | } 116 | ], 117 | "count": 1 118 | } 119 | 120 | $ curl -s "$REGISTRY_URL/groups/default/artifacts/my-topic-value" | yq -o json 121 | { 122 | "type": "record", 123 | "name": "Greeting", 124 | "fields": [ 125 | { 126 | "name": "Message", 127 | "type": "string" 128 | }, 129 | { 130 | "name": "Time", 131 | "type": "long" 132 | } 133 | ] 134 | } 135 | 136 | $ curl -s "$REGISTRY_URL/groups/default/artifacts/my-topic-value/meta" | yq -o json 137 | { 138 | "name": "Greeting", 139 | "createdBy": "", 140 | "createdOn": "2025-03-24T07:26:33+0000", 141 | "modifiedBy": "", 142 | "modifiedOn": "2025-03-24T07:26:33+0000", 143 | "id": "my-topic-value", 144 | "version": "1", 145 | "type": "AVRO", 146 | "globalId": 1, 147 | "state": "ENABLED", 148 | "contentId": 1, 149 | "references": [] 150 | } 151 | ``` 152 | -------------------------------------------------------------------------------- /sessions/005/install/application.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: kafka-avro 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: kafka-avro 10 | template: 11 | metadata: 12 | labels: 13 | app: kafka-avro 14 | spec: 15 | containers: 16 | - name: kafka-avro 17 | image: ghcr.io/fvaleri/kafka-avro:latest 18 | imagePullPolicy: Always 19 | securityContext: 20 | allowPrivilegeEscalation: false 21 | capabilities: 22 | drop: 23 | - ALL 24 | runAsNonRoot: true 25 | seccompProfile: 26 | type: RuntimeDefault 27 | env: 28 | - name: BOOTSTRAP_SERVERS 29 | value: "${BOOTSTRAP_SERVERS}" 30 | - name: REGISTRY_URL 31 | value: "${REGISTRY_URL}" 32 | - name: ARTIFACT_GROUP 33 | value: "${ARTIFACT_GROUP}" 34 | - name: TOPIC_NAME 35 | value: "${TOPIC_NAME}" 36 | -------------------------------------------------------------------------------- /sessions/005/install/greeting.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "Greeting", 4 | "fields": [{ 5 | "name": "Message", 6 | "type": "string" 7 | }, { 8 | "name": "Time", 9 | "type": "long" 10 | }] 11 | } 12 | -------------------------------------------------------------------------------- /sessions/005/install/registry.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: registry.apicur.io/v1 2 | kind: ApicurioRegistry 3 | metadata: 4 | name: my-schema-registry 5 | spec: 6 | configuration: 7 | persistence: mem 8 | deployment: 9 | host: my-schema-registry.f12i.io 10 | -------------------------------------------------------------------------------- /sessions/006/README.md: -------------------------------------------------------------------------------- 1 | ## Use Kafka Connect with Debezium 2 | 3 | First, use [this session](/sessions/001) to deploy a Kafka cluster on Kubernetes. 4 | When the cluster is ready, we deploy a MySQL instance (the external system), and Kafka Connect cluster. 5 | 6 | > [!IMPORTANT] 7 | > The Kafka Connect image uses Kaniko to build a custom image containing the configured MySQL connector. 8 | > In production, this is not recommended, so you should use your own Connect image built from the Strimzi one. 9 | 10 | ```sh 11 | $ kubectl create -f sessions/006/install/mysql.yaml \ 12 | && kubectl wait --for=condition=Ready pod -l app=my-mysql --timeout=300s \ 13 | && kubectl exec my-mysql-0 -- sh -c 'mysql -u root < /tmp/sql/initdb.sql' 14 | persistentvolumeclaim/my-mysql-data created 15 | configmap/my-mysql-cfg created 16 | configmap/my-mysql-env created 17 | configmap/my-mysql-init created 18 | statefulset.apps/my-mysql created 19 | service/my-mysql-svc created 20 | pod/my-mysql-0 condition met 21 | 22 | $ kubectl create -f sessions/006/install/connect.yaml 23 | kafkaconnect.kafka.strimzi.io/my-connect-cluster created 24 | kafkaconnector.kafka.strimzi.io/mysql-source-connector created 25 | 26 | $ kubectl get po,kt,kctr 27 | NAME READY STATUS RESTARTS AGE 28 | pod/my-cluster-broker-10 1/1 Running 0 6m1s 29 | pod/my-cluster-broker-11 1/1 Running 0 6m1s 30 | pod/my-cluster-broker-12 1/1 Running 0 6m1s 31 | pod/my-cluster-controller-0 1/1 Running 0 6m1s 32 | pod/my-cluster-controller-1 1/1 Running 0 6m1s 33 | pod/my-cluster-controller-2 1/1 Running 0 6m1s 34 | pod/my-cluster-entity-operator-7bc799c449-8jxmb 2/2 Running 0 5m27s 35 | pod/my-connect-cluster-connect-0 1/1 Running 0 2m46s 36 | pod/my-mysql-0 1/1 Running 0 4m19s 37 | pod/strimzi-cluster-operator-d78fd875b-q9sds 1/1 Running 0 6m30s 38 | 39 | NAME CLUSTER PARTITIONS REPLICATION FACTOR READY 40 | kafkatopic.kafka.strimzi.io/my-topic my-cluster 3 3 True 41 | 42 | NAME CLUSTER CONNECTOR CLASS MAX TASKS READY 43 | kafkaconnector.kafka.strimzi.io/mysql-source-connector my-connect-cluster io.debezium.connector.mysql.MySqlConnector 1 True 44 | ``` 45 | 46 | As you may have guessed at this point, we are going to emit MySQL row changes and import them into Kafka, so that other applications can pick them up and process them. 47 | Let's check if the connector and its tasks are running fine by using the `KafkaConnector` resource, which is easier than interacting via REST requests. 48 | 49 | ```sh 50 | $ kubectl get kctr mysql-source-connector -o yaml | yq .status 51 | conditions: 52 | - lastTransitionTime: "2024-10-28T10:53:20.123553787Z" 53 | status: "True" 54 | type: Ready 55 | connectorStatus: 56 | connector: 57 | state: RUNNING 58 | worker_id: my-connect-cluster-connect-0.my-connect-cluster-connect.test.svc:8083 59 | name: mysql-source-connector 60 | tasks: 61 | - id: 0 62 | state: RUNNING 63 | worker_id: my-connect-cluster-connect-0.my-connect-cluster-connect.test.svc:8083 64 | type: source 65 | observedGeneration: 1 66 | tasksMax: 1 67 | topics: 68 | - __debezium-heartbeat.my-mysql 69 | - my-mysq 70 | ``` 71 | 72 | Debezium configuration is specific to each connector and it is documented in detail. 73 | The value of `server_id` must be unique for each server and replication client in the MySQL cluster. 74 | In this case, the MySQL user must have appropriate permissions on all databases for which the connector captures changes. 75 | 76 | ```sh 77 | $ kubectl get cm my-mysql-cfg -o yaml | yq .data 78 | my.cnf: | 79 | !include /etc/my.cnf 80 | [mysqld] 81 | server_id = 111111 82 | log_bin = mysql-bin 83 | binlog_format = ROW 84 | binlog_row_image = FULL 85 | binlog_rows_query_log_events = ON 86 | expire_logs_days = 10 87 | gtid_mode = ON 88 | enforce_gtid_consistency = ON 89 | 90 | $ kubectl get cm my-mysql-init -o yaml | yq .data 91 | initdb.sql: | 92 | use testdb; 93 | CREATE TABLE IF NOT EXISTS customers ( 94 | id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, 95 | first_name VARCHAR(255) NOT NULL, 96 | last_name VARCHAR(255) NOT NULL, 97 | email VARCHAR(255) NOT NULL UNIQUE 98 | ); 99 | 100 | CREATE USER IF NOT EXISTS 'debezium'@'%' IDENTIFIED WITH caching_sha2_password BY 'changeit'; 101 | GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'debezium'@'%'; 102 | FLUSH PRIVILEGES; 103 | ``` 104 | 105 | Enough with describing the configuration, now let's create some changes using good old SQL. 106 | 107 | ```sh 108 | $ kubectl exec my-mysql-0 -- sh -c 'MYSQL_PWD="changeit" mysql -u admin testdb -e " 109 | INSERT INTO customers (first_name, last_name, email) VALUES (\"John\", \"Doe\", \"jdoe@example.com\"); 110 | UPDATE customers SET first_name = \"Jane\" WHERE id = 1; 111 | INSERT INTO customers (first_name, last_name, email) VALUES (\"Dylan\", \"Dog\", \"ddog@example.com\"); 112 | SELECT * FROM customers;"' 113 | id first_name last_name email 114 | 1 Jane Doe jdoe@example.com 115 | 2 Dylan Dog ddog@example.com 116 | ``` 117 | 118 | The MySQL connector writes change events that occur in a table to a Kafka topic named like `serverName.databaseName.tableName`. 119 | We created 3 changes (insert-update-insert), so we have 3 records in that topic. 120 | It's interesting to look at some record properties: `op` is the change type (c=create, r=read for snapshot only, u=update, d=delete), `gtid` is the global transaction identifier that is unique in a MySQL cluster, `payload.source.ts_ms` is the timestamp when the change was applied, `payload.ts_ms` is the timestamp when Debezium processed that event. The notification lag is the difference with the source timestamp. 121 | 122 | ```sh 123 | $ kubectl-kafka bin/kafka-console-consumer.sh --bootstrap-server my-cluster-kafka-bootstrap:9092 \ 124 | --topic my-mysql.testdb.customers --from-beginning --max-messages 3 125 | Struct{after=Struct{id=2,first_name=Dylan,last_name=Dog,email=ddog@example.com},source=Struct{version=2.3.7.Final,connector=mysql,name=my-mysql,ts_ms=1730112871000,db=testdb,table=customers,server_id=111111,gtid=500bc4b7-951a-11ef-aae4-9e82de0bd73c:16,file=mysql-bin.000002,pos=2602,row=0,thread=61},op=c,ts_ms=1730112871209} 126 | Struct{after=Struct{id=1,first_name=John,last_name=Doe,email=jdoe@example.com},source=Struct{version=2.3.7.Final,connector=mysql,name=my-mysql,ts_ms=1730112871000,db=testdb,table=customers,server_id=111111,gtid=500bc4b7-951a-11ef-aae4-9e82de0bd73c:14,file=mysql-bin.000002,pos=1707,row=0,thread=61},op=c,ts_ms=1730112871199} 127 | Struct{before=Struct{id=1,first_name=John,last_name=Doe,email=jdoe@example.com},after=Struct{id=1,first_name=Jane,last_name=Doe,email=jdoe@example.com},source=Struct{version=2.3.7.Final,connector=mysql,name=my-mysql,ts_ms=1730112871000,db=testdb,table=customers,server_id=111111,gtid=500bc4b7-951a-11ef-aae4-9e82de0bd73c:15,file=mysql-bin.000002,pos=2120,row=0,thread=61},op=u,ts_ms=1730112871207} 128 | Processed a total of 3 messages 129 | ``` 130 | 131 | As an additional exercise, you can extend this data pipeline by configuring a sink connector and exporting these changes to an external system like Artemis Broker. 132 | -------------------------------------------------------------------------------- /sessions/006/install/connect.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kafka.strimzi.io/v1beta2 2 | kind: KafkaConnect 3 | metadata: 4 | name: my-connect-cluster 5 | annotations: 6 | strimzi.io/use-connector-resources: "true" 7 | spec: 8 | replicas: 1 9 | #version: x.y.z 10 | bootstrapServers: my-cluster-kafka-bootstrap:9093 11 | tls: 12 | trustedCertificates: 13 | - secretName: my-cluster-cluster-ca-cert 14 | certificate: ca.crt 15 | config: 16 | group.id: my-connect-cluster 17 | offset.storage.topic: connect-cluster-offsets 18 | config.storage.topic: connect-cluster-configs 19 | status.storage.topic: connect-cluster-status 20 | key.converter: org.apache.kafka.connect.storage.StringConverter 21 | value.converter: org.apache.kafka.connect.storage.StringConverter 22 | # -1 means use default broker RF 23 | config.storage.replication.factor: -1 24 | offset.storage.replication.factor: -1 25 | status.storage.replication.factor: -1 26 | logging: 27 | type: inline 28 | loggers: 29 | rootLogger.level: INFO 30 | logger.sourcetask.name: org.apache.kafka.connect.runtime.WorkerSourceTask 31 | logger.sourcetask.level: INFO 32 | logger.sinktask.name: org.apache.kafka.connect.runtime.WorkerSinkTask 33 | logger.sinktask.level: INFO 34 | resources: 35 | limits: 36 | cpu: 3000m 37 | memory: 3Gi 38 | requests: 39 | cpu: 1000m 40 | memory: 3Gi 41 | build: 42 | output: 43 | type: docker 44 | image: ttl.sh/fvaleri/kafka-connect:24h 45 | plugins: 46 | - name: debezium-mysql 47 | artifacts: 48 | - type: tgz 49 | url: https://repo1.maven.org/maven2/io/debezium/debezium-connector-mysql/2.3.7.Final/debezium-connector-mysql-2.3.7.Final-plugin.tar.gz 50 | --- 51 | apiVersion: kafka.strimzi.io/v1beta2 52 | kind: KafkaConnector 53 | metadata: 54 | name: mysql-source-connector 55 | labels: 56 | # must match the connect name 57 | strimzi.io/cluster: my-connect-cluster 58 | spec: 59 | tasksMax: 1 60 | class: io.debezium.connector.mysql.MySqlConnector 61 | config: 62 | database.hostname: "my-mysql-svc" 63 | database.port: 3306 64 | database.user: "debezium" 65 | database.password: "changeit" 66 | database.dbname: "testdb" 67 | # never change topic.prefix after connector startup 68 | topic.prefix: "my-mysql" 69 | # the server.id must be unique for each server or replication client 70 | database.server.id: "222222" 71 | database.include.list: "testdb" 72 | table.include.list: "testdb.customers" 73 | schema.history.internal.kafka.bootstrap.servers: "my-cluster-kafka-bootstrap:9092" 74 | schema.history.internal.kafka.topic: "testdb.history" 75 | include.schema.changes: "true" 76 | # commit progress even when there are no changes 77 | heartbeat.interval.ms: 10000 78 | snapshot.mode: "when_needed" 79 | -------------------------------------------------------------------------------- /sessions/006/install/mysql.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolumeClaim 3 | metadata: 4 | name: my-mysql-data 5 | spec: 6 | accessModes: 7 | - ReadWriteOnce 8 | resources: 9 | requests: 10 | storage: 10Gi 11 | --- 12 | apiVersion: v1 13 | kind: ConfigMap 14 | metadata: 15 | name: my-mysql-cfg 16 | data: 17 | # the server_id must be unique for each server or replication client 18 | my.cnf: | 19 | !include /etc/my.cnf 20 | [mysqld] 21 | server_id = 111111 22 | log_bin = mysql-bin 23 | binlog_format = ROW 24 | binlog_row_image = FULL 25 | binlog_rows_query_log_events = ON 26 | expire_logs_days = 10 27 | gtid_mode = ON 28 | enforce_gtid_consistency = ON 29 | --- 30 | apiVersion: v1 31 | kind: Secret 32 | metadata: 33 | name: my-mysql-env 34 | type: Opaque 35 | stringData: 36 | MYSQL_DEFAULTS_FILE: /config/configdb.d/my.cnf 37 | MYSQL_DATABASE: testdb 38 | MYSQL_USER: admin 39 | MYSQL_PASSWORD: changeit 40 | MYSQL_ALLOW_EMPTY_PASSWORD: "true" 41 | --- 42 | apiVersion: v1 43 | kind: ConfigMap 44 | metadata: 45 | name: my-mysql-init 46 | data: 47 | initdb.sql: | 48 | use testdb; 49 | CREATE TABLE IF NOT EXISTS customers ( 50 | id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, 51 | first_name VARCHAR(255) NOT NULL, 52 | last_name VARCHAR(255) NOT NULL, 53 | email VARCHAR(255) NOT NULL UNIQUE 54 | ); 55 | 56 | CREATE USER IF NOT EXISTS 'debezium'@'%' IDENTIFIED WITH caching_sha2_password BY 'changeit'; 57 | GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'debezium'@'%'; 58 | FLUSH PRIVILEGES; 59 | --- 60 | apiVersion: apps/v1 61 | kind: StatefulSet 62 | metadata: 63 | name: my-mysql 64 | spec: 65 | replicas: 1 66 | serviceName: my-mysql 67 | selector: 68 | matchLabels: 69 | app: my-mysql 70 | template: 71 | metadata: 72 | labels: 73 | app: my-mysql 74 | spec: 75 | containers: 76 | - name: mysql 77 | image: quay.io/centos7/mysql-80-centos7:20230712 78 | resources: 79 | limits: 80 | cpu: 1000m 81 | memory: 1Gi 82 | requests: 83 | cpu: 500m 84 | memory: 1Gi 85 | envFrom: 86 | - secretRef: 87 | name: my-mysql-env 88 | ports: 89 | - containerPort: 3306 90 | protocol: TCP 91 | volumeMounts: 92 | - name: my-mysql-data 93 | mountPath: /var/lib/mysql 94 | - name: my-mysql-cfg 95 | mountPath: /config/configdb.d 96 | - name: my-mysql-init 97 | mountPath: /tmp/sql 98 | readinessProbe: 99 | exec: 100 | command: [ "mysqladmin", "-uroot", "ping" ] 101 | initialDelaySeconds: 60 102 | timeoutSeconds: 10 103 | livenessProbe: 104 | exec: 105 | command: [ "mysqladmin", "-uroot", "ping" ] 106 | initialDelaySeconds: 60 107 | timeoutSeconds: 10 108 | volumes: 109 | - name: my-mysql-data 110 | persistentVolumeClaim: 111 | claimName: my-mysql-data 112 | - name: my-mysql-cfg 113 | configMap: 114 | name: my-mysql-cfg 115 | - name: my-mysql-init 116 | configMap: 117 | name: my-mysql-init 118 | --- 119 | apiVersion: v1 120 | kind: Service 121 | metadata: 122 | name: my-mysql-svc 123 | spec: 124 | ports: 125 | - name: mysql 126 | port: 3306 127 | protocol: TCP 128 | targetPort: 3306 129 | selector: 130 | app: my-mysql 131 | -------------------------------------------------------------------------------- /sessions/007/README.md: -------------------------------------------------------------------------------- 1 | ## Use Mirror Maker 2 for disaster recovery 2 | 3 | First, use [this session](/sessions/001) to deploy a Kafka cluster on Kubernetes. 4 | 5 | At this point, we can deploy the target cluster. 6 | 7 | ```sh 8 | $ kubectl create -f sessions/007/instal/target.yaml 9 | kafkanodepool.kafka.strimzi.io/combined created 10 | kafka.kafka.strimzi.io/my-cluster-tgt created 11 | ``` 12 | 13 | When the target cluster is ready, we can deploy Mirror Maker 2 (MM2). 14 | The recommended way of deploying the MM2 is near the target Kafka cluster (same subnet or zone), because the producer overhead is greater than the consumer overhead. 15 | 16 | > [!IMPORTANT] 17 | > When source and target clusters run on different namespaces or Kubernetes clusters, you have to copy the source `cluster-ca-cert` in the target namespace where MM2 is running. 18 | 19 | ```sh 20 | $ export SOURCE_NS="$NAMESPACE" TARGET_NS="$NAMESPACE"; envsubst < sessions/007/mm2.yaml | kubectl create -f - 21 | kafkamirrormaker2.kafka.strimzi.io/my-mm2-cluster created 22 | configmap/mirror-maker-2-metrics created 23 | ``` 24 | 25 | MM2 runs on top of Kafka Connect with a set of configurable built-in connectors. 26 | The `MirrorSourceConnector` replicates remote topics, ACLs, and configurations of a single source cluster and emits offset syncs. 27 | The `MirrorCheckpointConnector` emits consumer group offsets checkpoints to enable failover points. 28 | 29 | ```sh 30 | $ kubectl get po 31 | NAME READY STATUS RESTARTS AGE 32 | my-cluster-broker-10 1/1 Running 0 11m 33 | my-cluster-broker-11 1/1 Running 0 11m 34 | my-cluster-broker-12 1/1 Running 0 11m 35 | my-cluster-controller-0 1/1 Running 0 11m 36 | my-cluster-controller-1 1/1 Running 0 11m 37 | my-cluster-controller-2 1/1 Running 0 11m 38 | my-cluster-entity-operator-657b477d4f-sv77v 2/2 Running 0 10m 39 | my-cluster-tgt-combined-0 1/1 Running 0 6m18s 40 | my-cluster-tgt-combined-1 1/1 Running 0 6m18s 41 | my-cluster-tgt-combined-2 1/1 Running 0 6m18s 42 | my-mm2-cluster-mirrormaker2-0 1/1 Running 0 2m5s 43 | strimzi-cluster-operator-d78fd875b-ljmpl 1/1 Running 0 11m 44 | 45 | $ kubectl get kmm2 my-mm2-cluster -o yaml | yq .status 46 | conditions: 47 | - lastTransitionTime: "2024-10-12T10:14:20.521458310Z" 48 | status: "True" 49 | type: Ready 50 | connectors: 51 | - connector: 52 | state: RUNNING 53 | worker_id: my-mm2-cluster-mirrormaker2-0.my-mm2-cluster-mirrormaker2.test.svc:8083 54 | name: my-cluster->my-cluster-tgt.MirrorCheckpointConnector 55 | tasks: [] 56 | type: source 57 | - connector: 58 | state: RUNNING 59 | worker_id: my-mm2-cluster-mirrormaker2-0.my-mm2-cluster-mirrormaker2.test.svc:8083 60 | name: my-cluster->my-cluster-tgt.MirrorSourceConnector 61 | tasks: 62 | - id: 0 63 | state: RUNNING 64 | worker_id: my-mm2-cluster-mirrormaker2-0.my-mm2-cluster-mirrormaker2.test.svc:8083 65 | - id: 1 66 | state: RUNNING 67 | worker_id: my-mm2-cluster-mirrormaker2-0.my-mm2-cluster-mirrormaker2.test.svc:8083 68 | - id: 2 69 | state: RUNNING 70 | worker_id: my-mm2-cluster-mirrormaker2-0.my-mm2-cluster-mirrormaker2.test.svc:8083 71 | type: source 72 | labelSelector: strimzi.io/cluster=my-mm2-cluster,strimzi.io/name=my-mm2-cluster-mirrormaker2,strimzi.io/kind=KafkaMirrorMaker2 73 | observedGeneration: 2 74 | replicas: 1 75 | url: http://my-mm2-cluster-mirrormaker2-api.test.svc:8083 76 | ``` 77 | 78 | In order to test message replication, we can send 1 million messages to the test topic in the source Kafka cluster. 79 | 80 | > [!WARNING] 81 | > Message replication is asynchronous, so there is always a delta of messaging that is at risk in case of disaster. 82 | 83 | After some time, the log end offsets should match on both clusters. 84 | In real world scenarios, the actual offsets tend to naturally diverge with time, because each Kafka cluster operates independently. 85 | 86 | ```sh 87 | $ kubectl-kafka bin/kafka-producer-perf-test.sh --topic my-topic --record-size 100 --num-records 1000000 \ 88 | --throughput -1 --producer-props acks=1 bootstrap.servers=my-cluster-kafka-bootstrap:9092 89 | 837463 records sent, 167492.6 records/sec (15.97 MB/sec), 1207.8 ms avg latency, 2358.0 ms max latency. 90 | 1000000 records sent, 174733.531365 records/sec (16.66 MB/sec), 1202.91 ms avg latency, 2358.00 ms max latency, 1298 ms 50th, 2138 ms 95th, 2266 ms 99th, 2332 ms 99.9th. 91 | 92 | $ kubectl-kafka bin/kafka-get-offsets.sh --bootstrap-server my-cluster-kafka-bootstrap:9092 --topic my-topic --time -1 93 | my-topic:0:353737 94 | my-topic:1:358846 95 | my-topic:2:287417 96 | 97 | $ kubectl-kafka bin/kafka-get-offsets.sh --bootstrap-server my-cluster-tgt-kafka-bootstrap:9092 --topic my-topic --time -1 98 | my-topic:0:353737 99 | my-topic:1:358846 100 | my-topic:2:287417 101 | ``` 102 | 103 | ## Tuning MM2 for throughput 104 | 105 | High-volume message generation, as seen in web activity tracking, can result in a large number of messages. 106 | Additionally, even a source cluster with moderate throughput can create a significant volume of messages when mirroring large amounts of existing data. 107 | In this case MM2 replication is slow even if you have a fast network, because default producers are not optimized for throughput. 108 | 109 | Let's run a load test and see how fast we can replicate data with default settings. 110 | By looking at `MirrorSourceConnector` task metrics, we see that we are saturating the producer buffer (default: 16384 bytes), which is a bottleneck. 111 | 112 | ```sh 113 | $ kubectl scale kmm2 my-mm2-cluster --replicas 0 114 | kafkamirrormaker2.kafka.strimzi.io/my-mm2-cluster scaled 115 | 116 | $ kubectl-kafka bin/kafka-producer-perf-test.sh --topic my-topic --record-size 100 --num-records 30000000 \ 117 | --throughput -1 --producer-props acks=1 bootstrap.servers=my-cluster-kafka-bootstrap:9092 118 | 1040165 records sent, 207825.2 records/sec (19.82 MB/sec), 752.2 ms avg latency, 1588.0 ms max latency. 119 | ... 120 | 30000000 records sent, 642659.754504 records/sec (61.29 MB/sec), 137.34 ms avg latency, 2517.00 ms max latency, 39 ms 50th, 614 ms 95th, 1474 ms 99th, 2408 ms 99.9th. 121 | ``` 122 | 123 | On my machine, it takes about 10 minutes to get back `NaN` from the following metrics, which means replication completed. 124 | 125 | ```sh 126 | $ kubectl scale kmm2 my-mm2-cluster --replicas 1 127 | kafkamirrormaker2.kafka.strimzi.io/my-mm2-cluster scaled 128 | 129 | $ kubectl exec $(kubectl get po | grep my-mm2-cluster | awk '{print $1}') -- curl -s http://localhost:9404/metrics \ 130 | | grep -e 'kafka_producer_batch_size_avg{clientid="\\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector' \ 131 | -e 'kafka_producer_request_latency_avg{clientid="\\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector' 132 | kafka_producer_batch_size_avg{clientid="\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector-0\""} 16277.085847267712 133 | kafka_producer_batch_size_avg{clientid="\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector-1\""} 16278.264065335754 134 | kafka_producer_batch_size_avg{clientid="\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector-2\""} 16277.15397200509 135 | kafka_producer_request_latency_avg{clientid="\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector-0\""} 10.944482877896922 136 | kafka_producer_request_latency_avg{clientid="\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector-1\""} 14.26193724420191 137 | kafka_producer_request_latency_avg{clientid="\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector-2\""} 11.238677867056245 138 | ``` 139 | 140 | We now increase the producer buffer to default value x20 by overriding its configuration. 141 | Every batch will include more data, so the same test should complete in about half of the time or even less. 142 | The request latency increases, but it is still within reasonable bounds. 143 | 144 | ```sh 145 | $ kubectl get kmm2 my-mm2-cluster -o yaml | yq '.spec.mirrors[0].sourceConnector.config |= ({"producer.override.batch.size": 327680} + .)' | kubectl apply -f - 146 | kafkamirrormaker2.kafka.strimzi.io/my-mm2-cluster configured 147 | 148 | $ kubectl scale kmm2 my-mm2-cluster --replicas 0 149 | kafkamirrormaker2.kafka.strimzi.io/my-mm2-cluster scaled 150 | 151 | $ kubectl-kafka bin/kafka-producer-perf-test.sh --topic my-topic --record-size 100 --num-records 30000000 \ 152 | --throughput -1 --producer-props acks=1 bootstrap.servers=my-cluster-kafka-bootstrap:9092 153 | 3402475 records sent, 680495.0 records/sec (64.90 MB/sec), 32.4 ms avg latency, 342.0 ms max latency. 154 | ... 155 | 30000000 records sent, 923105.326318 records/sec (88.03 MB/sec), 21.94 ms avg latency, 1495.00 ms max latency, 3 ms 50th, 66 ms 95th, 201 ms 99th, 1329 ms 99.9th. 156 | ``` 157 | 158 | On my machine, it now takes about 5 minutes. 159 | 160 | ```sh 161 | $ kubectl scale kmm2 my-mm2-cluster --replicas 1 162 | kafkamirrormaker2.kafka.strimzi.io/my-mm2-cluster scaled 163 | 164 | $ kubectl exec $(kubectl get po | grep my-mm2-cluster | awk '{print $1}') -- curl -s http://localhost:9404/metrics \ 165 | | grep -e 'kafka_producer_batch_size_avg{clientid="\\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector' \ 166 | -e 'kafka_producer_request_latency_avg{clientid="\\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector' 167 | kafka_producer_batch_size_avg{clientid="\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector-0\""} 140310.91324200912 168 | kafka_producer_batch_size_avg{clientid="\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector-1\""} 143986.90502793295 169 | kafka_producer_batch_size_avg{clientid="\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector-2\""} 122895.43076923076 170 | kafka_producer_batch_size_avg{clientid="\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector-3\""} 33464.164893617024 171 | kafka_producer_request_latency_avg{clientid="\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector-0\""} 59.678899082568805 172 | kafka_producer_request_latency_avg{clientid="\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector-1\""} 71.0561797752809 173 | kafka_producer_request_latency_avg{clientid="\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector-2\""} 52.08247422680412 174 | kafka_producer_request_latency_avg{clientid="\"connector-producer-my-cluster->my-cluster-tgt.MirrorSourceConnector-3\""} 41.670212765957444 175 | ``` 176 | -------------------------------------------------------------------------------- /sessions/007/install/mm2.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kafka.strimzi.io/v1beta2 2 | kind: KafkaMirrorMaker2 3 | metadata: 4 | name: my-mm2-cluster 5 | spec: 6 | replicas: 0 7 | #version: x.y.z 8 | connectCluster: my-cluster-tgt 9 | clusters: 10 | - alias: my-cluster 11 | bootstrapServers: my-cluster-kafka-bootstrap.${SOURCE_NS}.svc:9093 12 | tls: 13 | trustedCertificates: 14 | - certificate: ca.crt 15 | secretName: my-cluster-cluster-ca-cert 16 | - alias: my-cluster-tgt 17 | bootstrapServers: my-cluster-tgt-kafka-bootstrap.${TARGET_NS}.svc:9093 18 | tls: 19 | trustedCertificates: 20 | - certificate: ca.crt 21 | secretName: my-cluster-tgt-cluster-ca-cert 22 | config: 23 | # -1 means use default broker RF 24 | config.storage.replication.factor: -1 25 | offset.storage.replication.factor: -1 26 | status.storage.replication.factor: -1 27 | ssl.cipher.suites: TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 28 | ssl.enabled.protocols: TLSv1.2 29 | ssl.protocol: TLSv1.2 30 | mirrors: 31 | - sourceCluster: my-cluster 32 | targetCluster: my-cluster-tgt 33 | topicsPattern: ".*" 34 | groupsPattern: ".*" 35 | sourceConnector: 36 | tasksMax: 10 37 | config: 38 | replication.factor: -1 39 | offset-syncs.topic.replication.factor: -1 40 | offset-syncs.topic.location: "target" 41 | key.converter: "org.apache.kafka.connect.converters.ByteArrayConverter" 42 | value.converter: "org.apache.kafka.connect.converters.ByteArrayConverter" 43 | # disable source cluster name prefix on target topic 44 | replication.policy.class: "org.apache.kafka.connect.mirror.IdentityReplicationPolicy" 45 | refresh.topics.interval.seconds: 20 46 | sync.topic.configs.enabled: true 47 | sync.topic.acls.enabled: false 48 | checkpointConnector: 49 | tasksMax: 10 50 | config: 51 | checkpoints.topic.replication.factor: -1 52 | key.converter: "org.apache.kafka.connect.converters.ByteArrayConverter" 53 | value.converter: "org.apache.kafka.connect.converters.ByteArrayConverter" 54 | # disable source cluster name prefix on target topic 55 | replication.policy.class: "org.apache.kafka.connect.mirror.IdentityReplicationPolicy" 56 | sync.group.offsets.enabled: true 57 | sync.group.offsets.interval.seconds: 20 58 | emit.checkpoints.enabled: true 59 | emit.checkpoints.interval.seconds: 20 60 | refresh.groups.interval.seconds: 20 61 | logging: 62 | type: inline 63 | loggers: 64 | rootLogger.level: INFO 65 | logger.sourcetask.name: org.apache.kafka.connect.runtime.WorkerSourceTask 66 | logger.sourcetask.level: INFO 67 | logger.sinktask.name: org.apache.kafka.connect.runtime.WorkerSinkTask 68 | logger.sinktask.level: INFO 69 | resources: 70 | limits: 71 | cpu: 3000m 72 | memory: 3Gi 73 | requests: 74 | cpu: 1000m 75 | memory: 3Gi 76 | # expose JMX metrics in Prometheus format on port 9404 77 | metricsConfig: 78 | type: jmxPrometheusExporter 79 | valueFrom: 80 | configMapKeyRef: 81 | name: mirror-maker-2-metrics 82 | key: metrics-config.yml 83 | --- 84 | kind: ConfigMap 85 | apiVersion: v1 86 | metadata: 87 | name: mirror-maker-2-metrics 88 | labels: 89 | app: strimzi 90 | data: 91 | metrics-config.yml: | 92 | # See https://github.com/prometheus/jmx_exporter for more info about JMX Prometheus Exporter metrics 93 | lowercaseOutputName: true 94 | lowercaseOutputLabelNames: true 95 | rules: 96 | #kafka.connect:type=app-info,client-id="{clientid}" 97 | #kafka.consumer:type=app-info,client-id="{clientid}" 98 | #kafka.producer:type=app-info,client-id="{clientid}" 99 | - pattern: 'kafka.(.+)<>start-time-ms' 100 | name: kafka_$1_start_time_seconds 101 | labels: 102 | clientId: "$2" 103 | help: "Kafka $1 JMX metric start time seconds" 104 | type: GAUGE 105 | valueFactor: 0.001 106 | - pattern: 'kafka.(.+)<>(commit-id|version): (.+)' 107 | name: kafka_$1_$3_info 108 | value: 1 109 | labels: 110 | clientId: "$2" 111 | $3: "$4" 112 | help: "Kafka $1 JMX metric info version and commit-id" 113 | type: UNTYPED 114 | 115 | #kafka.producer:type=producer-topic-metrics,client-id="{clientid}",topic="{topic}"", partition="{partition}" 116 | #kafka.consumer:type=consumer-fetch-manager-metrics,client-id="{clientid}",topic="{topic}"", partition="{partition}" 117 | - pattern: kafka.(.+)<>(.+-total) 118 | name: kafka_$2_$6 119 | labels: 120 | clientId: "$3" 121 | topic: "$4" 122 | partition: "$5" 123 | help: "Kafka $1 JMX metric type $2" 124 | type: COUNTER 125 | - pattern: kafka.(.+)<>(compression-rate|.+-avg|.+-replica|.+-lag|.+-lead) 126 | name: kafka_$2_$6 127 | labels: 128 | clientId: "$3" 129 | topic: "$4" 130 | partition: "$5" 131 | help: "Kafka $1 JMX metric type $2" 132 | type: GAUGE 133 | 134 | #kafka.producer:type=producer-topic-metrics,client-id="{clientid}",topic="{topic}" 135 | #kafka.consumer:type=consumer-fetch-manager-metrics,client-id="{clientid}",topic="{topic}"", partition="{partition}" 136 | - pattern: kafka.(.+)<>(.+-total) 137 | name: kafka_$2_$5 138 | labels: 139 | clientId: "$3" 140 | topic: "$4" 141 | help: "Kafka $1 JMX metric type $2" 142 | type: COUNTER 143 | - pattern: kafka.(.+)<>(compression-rate|.+-avg) 144 | name: kafka_$2_$5 145 | labels: 146 | clientId: "$3" 147 | topic: "$4" 148 | help: "Kafka $1 JMX metric type $2" 149 | type: GAUGE 150 | 151 | #kafka.connect:type=connect-node-metrics,client-id="{clientid}",node-id="{nodeid}" 152 | #kafka.consumer:type=consumer-node-metrics,client-id=consumer-1,node-id="{nodeid}" 153 | - pattern: kafka.(.+)<>(.+-total) 154 | name: kafka_$2_$5 155 | labels: 156 | clientId: "$3" 157 | nodeId: "$4" 158 | help: "Kafka $1 JMX metric type $2" 159 | type: COUNTER 160 | - pattern: kafka.(.+)<>(.+-avg) 161 | name: kafka_$2_$5 162 | labels: 163 | clientId: "$3" 164 | nodeId: "$4" 165 | help: "Kafka $1 JMX metric type $2" 166 | type: GAUGE 167 | 168 | #kafka.connect:type=kafka-metrics-count,client-id="{clientid}" 169 | #kafka.consumer:type=consumer-fetch-manager-metrics,client-id="{clientid}" 170 | #kafka.consumer:type=consumer-coordinator-metrics,client-id="{clientid}" 171 | #kafka.consumer:type=consumer-metrics,client-id="{clientid}" 172 | - pattern: kafka.(.+)<>(.+-total) 173 | name: kafka_$2_$4 174 | labels: 175 | clientId: "$3" 176 | help: "Kafka $1 JMX metric type $2" 177 | type: COUNTER 178 | - pattern: kafka.(.+)<>(.+-avg|.+-bytes|.+-count|.+-ratio|.+-age|.+-flight|.+-threads|.+-connectors|.+-tasks|.+-ago) 179 | name: kafka_$2_$4 180 | labels: 181 | clientId: "$3" 182 | help: "Kafka $1 JMX metric type $2" 183 | type: GAUGE 184 | 185 | #kafka.connect:type=connector-task-metrics,connector="{connector}",task="{task}<> status" 186 | - pattern: 'kafka.connect<>status: ([a-z-]+)' 187 | name: kafka_connect_connector_status 188 | value: 1 189 | labels: 190 | connector: "$1" 191 | task: "$2" 192 | status: "$3" 193 | help: "Kafka Connect JMX Connector status" 194 | type: GAUGE 195 | 196 | #kafka.connect:type=task-error-metrics,connector="{connector}",task="{task}" 197 | #kafka.connect:type=source-task-metrics,connector="{connector}",task="{task}" 198 | #kafka.connect:type=sink-task-metrics,connector="{connector}",task="{task}" 199 | #kafka.connect:type=connector-task-metrics,connector="{connector}",task="{task}" 200 | - pattern: kafka.connect<>(.+-total) 201 | name: kafka_connect_$1_$4 202 | labels: 203 | connector: "$2" 204 | task: "$3" 205 | help: "Kafka Connect JMX metric type $1" 206 | type: COUNTER 207 | - pattern: kafka.connect<>(.+-count|.+-ms|.+-ratio|.+-avg|.+-failures|.+-requests|.+-timestamp|.+-logged|.+-errors|.+-retries|.+-skipped) 208 | name: kafka_connect_$1_$4 209 | labels: 210 | connector: "$2" 211 | task: "$3" 212 | help: "Kafka Connect JMX metric type $1" 213 | type: GAUGE 214 | 215 | #kafka.connect:type=connector-metrics,connector="{connector}" 216 | #kafka.connect:type=connect-worker-metrics,connector="{connector}" 217 | - pattern: kafka.connect<>([a-z-]+) 218 | name: kafka_connect_worker_$2 219 | labels: 220 | connector: "$1" 221 | help: "Kafka Connect JMX metric $1" 222 | type: GAUGE 223 | 224 | #kafka.connect:type=connect-worker-metrics 225 | - pattern: kafka.connect<>([a-z-]+-total) 226 | name: kafka_connect_worker_$1 227 | help: "Kafka Connect JMX metric worker" 228 | type: COUNTER 229 | - pattern: kafka.connect<>([a-z-]+) 230 | name: kafka_connect_worker_$1 231 | help: "Kafka Connect JMX metric worker" 232 | type: GAUGE 233 | 234 | #kafka.connect:type=connect-worker-rebalance-metrics 235 | - pattern: kafka.connect<>([a-z-]+-total) 236 | name: kafka_connect_worker_rebalance_$1 237 | help: "Kafka Connect JMX metric rebalance information" 238 | type: COUNTER 239 | - pattern: kafka.connect<>([a-z-]+) 240 | name: kafka_connect_worker_rebalance_$1 241 | help: "Kafka Connect JMX metric rebalance information" 242 | type: GAUGE 243 | 244 | #kafka.connect:type=MirrorSourceConnector 245 | - pattern: kafka.connect.mirror<>([a-z-_]+) 246 | name: kafka_connect_mirror_mirrorsourceconnector_$4 247 | labels: 248 | target: "$1" 249 | topic: "$2" 250 | partition: "$3" 251 | help: "Kafka Mirror Maker 2 Source Connector metrics" 252 | type: GAUGE 253 | 254 | #kafka.connect:type=MirrorCheckpointConnector 255 | - pattern: kafka.connect.mirror<>([a-z-_]+) 256 | name: kafka_connect_mirror_mirrorcheckpointconnector_$6 257 | labels: 258 | source: "$1" 259 | target: "$2" 260 | group: "$3" 261 | topic: "$4" 262 | partition: "$5" 263 | help: "Kafka Mirror Maker 2 Checkpoint Connector metrics" 264 | type: GAUGE 265 | -------------------------------------------------------------------------------- /sessions/007/install/target.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kafka.strimzi.io/v1beta2 2 | kind: KafkaNodePool 3 | metadata: 4 | name: combined 5 | labels: 6 | # must match the cluster name 7 | strimzi.io/cluster: my-cluster-tgt 8 | spec: 9 | replicas: 3 10 | roles: 11 | - controller 12 | - broker 13 | resources: 14 | # set requests==limits to have Guaranteed QoS 15 | limits: 16 | cpu: 500m 17 | memory: 1Gi 18 | requests: 19 | cpu: 250m 20 | memory: 1Gi 21 | storage: 22 | size: 5Gi 23 | type: persistent-claim 24 | deleteClaim: false 25 | --- 26 | apiVersion: kafka.strimzi.io/v1beta2 27 | kind: Kafka 28 | metadata: 29 | name: my-cluster-tgt 30 | annotations: 31 | strimzi.io/node-pools: enabled 32 | strimzi.io/kraft: enabled 33 | spec: 34 | kafka: 35 | config: 36 | num.partitions: 3 37 | default.replication.factor: 3 38 | min.insync.replicas: 2 39 | offsets.topic.replication.factor: 3 40 | transaction.state.log.replication.factor: 3 41 | transaction.state.log.min.isr: 2 42 | listeners: 43 | - name: plain 44 | port: 9092 45 | type: internal 46 | tls: false 47 | - name: tls 48 | port: 9093 49 | type: internal 50 | tls: true 51 | -------------------------------------------------------------------------------- /sessions/008/README.md: -------------------------------------------------------------------------------- 1 | ## Avoid running out of disk space with the Strimzi quota plugin 2 | 3 | > [!WARNING] 4 | > Don't use Minikube, as it uses hostpath volumes that do not enforce storage capacity. 5 | 6 | For the sake of this example, we deploy the Kafka cluster reducing the volume size. 7 | 8 | ```sh 9 | $ sed -E 's/size: .*/size: "1Gi"/g' sessions/001/install.yaml | kubectl create -f - 10 | kafkanodepool.kafka.strimzi.io/broker created 11 | kafkanodepool.kafka.strimzi.io/controller created 12 | kafka.kafka.strimzi.io/my-cluster created 13 | kafkatopic.kafka.strimzi.io/my-topic created 14 | 15 | $ kubectl get pv | grep my-cluster-broker 16 | pvc-2609aaa7-3a13-4bc3-9d0a-cc19c4ccef50 1Gi RWO Delete Bound test/data-my-cluster-broker-10 gp3-csi 4m15s 17 | pvc-55f69017-6ef9-4701-94ef-ffb90433cebd 1Gi RWO Delete Bound test/data-my-cluster-broker-11 gp3-csi 4m15s 18 | pvc-741a3a77-9f5a-4656-af71-d41619e12bfc 1Gi RWO Delete Bound test/data-my-cluster-broker-12 gp3-csi 4m15s 19 | ``` 20 | 21 | Only network bandwidth and request rate quotas are supported by the default Kafka quota plugin. 22 | Instead, the [Strimzi quota plugin](https://github.com/strimzi/kafka-quotas-plugin) allows to set storage limits independent of the number of clients. 23 | 24 | The Strimzi Kafka images already contains this plugin. 25 | With the following configuration, all clients will be throttled to 0 when any volume in the cluster has less than 30% available space. 26 | The check interval is set to 5 seconds. 27 | 28 | ```sh 29 | $ kubectl patch k my-cluster --type=json \ 30 | -p='[{"op": "add", "path": "/spec/kafka/config/client.quota.callback.static.storage.check-interval", "value": "5"}]' \ 31 | && kubectl patch k my-cluster --type merge -p ' 32 | spec: 33 | kafka: 34 | quotas: 35 | type: strimzi 36 | minAvailableRatioPerVolume: 0.3' 37 | kafka.kafka.strimzi.io/my-cluster patched 38 | kafka.kafka.strimzi.io/my-cluster patched 39 | ``` 40 | 41 | After that, the cluster operator will roll all brokers to enable the quota plugin. 42 | When the cluster is ready, we try to break it by sending 3.3 GiB of data to a topic, which exceeds the cluster capacity. 43 | 44 | ```sh 45 | $ kubectl-kafka bin/kafka-producer-perf-test.sh --topic my-topic --record-size 1000 --num-records 3300000 \ 46 | --throughput -1 --producer-props acks=all bootstrap.servers=my-cluster-kafka-bootstrap:9092 47 | 21873 records sent, 4373.7 records/sec (4.17 MB/sec), 2509.9 ms avg latency, 4285.0 ms max latency. 48 | 41344 records sent, 8268.8 records/sec (7.89 MB/sec), 4536.6 ms avg latency, 5997.0 ms max latency. 49 | 49104 records sent, 9820.8 records/sec (9.37 MB/sec), 3575.3 ms avg latency, 4295.0 ms max latency. 50 | ... 51 | org.apache.kafka.clients.producer.BufferExhaustedException: Failed to allocate 16384 bytes within the configured max blocking time 60000 ms. Total memory: 33554432 bytes. Available memory: 0 bytes. Poolable size: 16384 bytes 52 | org.apache.kafka.common.errors.TimeoutException: Expiring 16 record(s) for my-topic-0:120018 ms has passed since batch creation 53 | org.apache.kafka.common.errors.TimeoutException: Expiring 16 record(s) for my-topic-0:120018 ms has passed since batch creation 54 | org.apache.kafka.common.errors.TimeoutException: Expiring 16 record(s) for my-topic-0:120018 ms has passed since batch creation 55 | ... 56 | ^C 57 | ``` 58 | 59 | At some point, the perf client can't send data anymore, but the cluster is still healthy. 60 | 61 | ```sh 62 | $ kubectl get po | grep my-cluster-broker 63 | my-cluster-broker-10 0/1 CrashLoopBackOff 8 (70s ago) 27m 64 | my-cluster-broker-11 0/1 CrashLoopBackOff 8 (84s ago) 25m 65 | my-cluster-broker-12 0/1 CrashLoopBackOff 8 (87s ago) 26m 66 | 67 | $ kubectl exec my-cluster-broker-10 -- df -h /var/lib/kafka/data \ 68 | && kubectl exec my-cluster-broker-11-- df -h /var/lib/kafka/data \ 69 | && kubectl exec my-cluster-broker-12-- df -h /var/lib/kafka/data 70 | Filesystem Size Used Avail Use% Mounted on 71 | /dev/nvme1n1 974M 735M 223M 77% /var/lib/kafka/data 72 | Filesystem Size Used Avail Use% Mounted on 73 | /dev/nvme1n1 974M 735M 223M 77% /var/lib/kafka/data 74 | Filesystem Size Used Avail Use% Mounted on 75 | /dev/nvme1n1 974M 735M 223M 77% /var/lib/kafka/data 76 | ``` 77 | 78 | ## Online Kafka volume recovery with expansion support 79 | 80 | > [!WARNING] 81 | > Don't use Minikube, as it uses hostpath volumes that do not enforce storage capacity. 82 | 83 | For the sake of this example, we deploy the Kafka cluster reducing the volume size. 84 | 85 | ```sh 86 | $ sed -E 's/size: .*/size: "1Gi"/g' sessions/001/install.yaml | kubectl create -f - 87 | kafkanodepool.kafka.strimzi.io/broker created 88 | kafkanodepool.kafka.strimzi.io/controller created 89 | kafka.kafka.strimzi.io/my-cluster created 90 | kafkatopic.kafka.strimzi.io/my-topic created 91 | 92 | $ kubectl get pv | grep my-cluster-broker 93 | pvc-568b390e-d8a3-4efa-a528-dbd0934e18e8 1Gi RWO Delete Bound test/data-my-cluster-broker-11 gp3-csi 4m57s 94 | pvc-875bbcc9-5f86-442e-9e05-f2b8852c83ce 1Gi RWO Delete Bound test/data-my-cluster-broker-10 gp3-csi 4m57s 95 | pvc-c328aab2-8948-4791-88df-a488e9fd9faa 1Gi RWO Delete Bound test/data-my-cluster-broker-12 gp3-csi 4m57s 96 | ``` 97 | 98 | When the cluster is ready, we break it by sending 3.3 GiB of data to a topic, which exceeds the cluster capacity. 99 | 100 | ```sh 101 | $ kubectl-kafka bin/kafka-producer-perf-test.sh --topic my-topic --record-size 1000 --num-records 3300000 \ 102 | --throughput -1 --producer-props acks=all bootstrap.servers=my-cluster-kafka-bootstrap:9092 103 | 22513 records sent, 4486.4 records/sec (4.28 MB/sec), 2544.8 ms avg latency, 4258.0 ms max latency. 104 | 39104 records sent, 7820.8 records/sec (7.46 MB/sec), 4756.2 ms avg latency, 6197.0 ms max latency. 105 | 52928 records sent, 10585.6 records/sec (10.10 MB/sec), 3318.4 ms avg latency, 4669.0 ms max latency. 106 | ... 107 | [2024-10-12 12:04:09,916] WARN [Producer clientId=perf-producer-client] Connection to node 5 (my-cluster-broker-10.my-cluster-kafka-brokers.test.svc/10.130.0.31:9092) could not be established. Node may not be available. (org.apache.kafka.clients.NetworkClient) 108 | [2024-10-12 12:04:09,920] WARN [Producer clientId=perf-producer-client] Connection to node 7 (my-cluster-broker-12.my-cluster-kafka-brokers.test.svc/10.129.0.28:9092) could not be established. Node may not be available. (org.apache.kafka.clients.NetworkClient) 109 | [2024-10-12 12:04:09,931] WARN [Producer clientId=perf-producer-client] Connection to node 6 (my-cluster-broker-11.my-cluster-kafka-brokers.test.svc/10.131.0.18:9092) could not be established. Node may not be available. (org.apache.kafka.clients.NetworkClient) 110 | ^C 111 | 112 | $ kubectl get po | grep my-cluster-broker 113 | my-cluster-broker-10 0/1 CrashLoopBackOff 8 (70s ago) 27m 114 | my-cluster-broker-11 0/1 CrashLoopBackOff 8 (84s ago) 25m 115 | my-cluster-broker-12 0/1 CrashLoopBackOff 8 (87s ago) 26m 116 | 117 | $ kubectl logs my-cluster-broker-10| grep "No space left on device" | tail -n1 118 | Caused by: java.io.IOException: No space left on device 119 | ``` 120 | 121 | Even if not all pods failed, we still need to increase the volume size of all brokers because the storage configuration is shared. 122 | If volume expansion is supported on the storage class, you can simply increase the storage size in the Kafka resource, and the operator will take care of it. 123 | This operation may take some time to complete, depending on the size of the volume and the available resources in the cluster. 124 | 125 | > [!WARNING] 126 | > The expansion is not always feasible in cloud deployments, for example with a standard block size of 4KB an AWS EBS volume can support only up to 16TB. 127 | 128 | ```sh 129 | [[ $(kubectl get sc $(kubectl get pv | grep data-my-cluster-broker-10| awk '{print $7}') -o yaml | yq .allowVolumeExpansion) == "true" ]] \ 130 | && kubectl patch knp broker --type merge -p ' 131 | spec: 132 | storage: 133 | size: 10Gi' 134 | kafkanodepool.kafka.strimzi.io/broker patched 135 | 136 | $ kubectl logs $(kubectl get po | grep cluster-operator | awk '{print $1}') | grep "Resizing" 137 | 2024-10-12 12:10:08 INFO PvcReconciler:137 - Reconciliation #1(watch) Kafka(test/my-cluster): Resizing PVC data-my-cluster-broker-10 from 1 to 10Gi. 138 | 2024-10-12 12:10:08 INFO PvcReconciler:137 - Reconciliation #1(watch) Kafka(test/my-cluster): Resizing PVC data-my-cluster-broker-11 from 1 to 10Gi. 139 | 2024-10-12 12:10:08 INFO PvcReconciler:137 - Reconciliation #1(watch) Kafka(test/my-cluster): Resizing PVC data-my-cluster-broker-12 from 1 to 10Gi. 140 | 141 | $ kubectl get po | grep my-cluster-broker 142 | my-cluster-broker-10 1/1 Running 0 13m 143 | my-cluster-broker-11 1/1 Running 0 13m 144 | my-cluster-broker-12 1/1 Running 0 13m 145 | 146 | $ kubectl get pv | grep my-cluster-broker 147 | pvc-568b390e-d8a3-4efa-a528-dbd0934e18e8 10Gi RWO Delete Bound test/data-my-cluster-broker-11 gp3-csi 14m 148 | pvc-875bbcc9-5f86-442e-9e05-f2b8852c83ce 10Gi RWO Delete Bound test/data-my-cluster-broker-10 gp3-csi 14m 149 | pvc-c328aab2-8948-4791-88df-a488e9fd9faa 10Gi RWO Delete Bound test/data-my-cluster-broker-12 gp3-csi 14m 150 | ``` 151 | 152 | ## Offline Kafka volume recovery with no expansion support (expert level) 153 | 154 | > [!WARNING] 155 | > Don't use Minikube, as it uses hostpath volumes that do not enforce storage capacity. 156 | 157 | For the sake of this example, we deploy the Kafka cluster reducing the volume size. 158 | 159 | ```sh 160 | $ sed -E 's/size: .*/size: "1Gi"/g' sessions/001/install.yaml | kubectl create -f - 161 | kafkanodepool.kafka.strimzi.io/broker created 162 | kafkanodepool.kafka.strimzi.io/controller created 163 | kafka.kafka.strimzi.io/my-cluster created 164 | kafkatopic.kafka.strimzi.io/my-topic created 165 | 166 | $ kubectl wait --timeout=120s --for=condition=ready k my-cluster; \ 167 | KAFKA_PODS="$(kubectl get po | grep my-cluster-broker | awk '{print $1}')" \ 168 | VOLUME_CLASS="$(kubectl get pv | grep my-cluster-broker | head -n1 | awk '{print $7}')" \ 169 | CLUSTER_ID="$(kubectl get k my-cluster -o yaml | yq .status.clusterId)" 170 | NEW_VOLUME_SIZE="10Gi" 171 | 172 | $ kubectl get pv | grep my-cluster-broker 173 | pvc-6efa4986-a8f8-42d3-ae80-0229d262cf81 1Gi RWO Delete Bound test/data-my-cluster-broker-12 gp3-csi 66s 174 | pvc-d76d68c6-52e9-4a9f-a20f-3b052ea49c55 1Gi RWO Delete Bound test/data-my-cluster-broker-11 gp3-csi 66s 175 | pvc-fe5ccdb3-b550-467e-b6e0-f4d3ece79ed0 1Gi RWO Delete Bound test/data-my-cluster-broker-10 gp3-csi 66s 176 | ``` 177 | 178 | When the cluster is ready, we break it by sending 3.3 GiB of data to a topic, which exceeds the cluster capacity. 179 | 180 | ```sh 181 | $ kubectl-kafka bin/kafka-producer-perf-test.sh --topic my-topic --record-size 1000 --num-records 3300000 \ 182 | --throughput -1 --producer-props acks=all bootstrap.servers=my-cluster-kafka-bootstrap:9092 183 | 15521 records sent, 3104.2 records/sec (2.96 MB/sec), 2627.4 ms avg latency, 4363.0 ms max latency. 184 | 36192 records sent, 7222.5 records/sec (6.89 MB/sec), 5360.9 ms avg latency, 6964.0 ms max latency. 185 | 43728 records sent, 8745.6 records/sec (8.34 MB/sec), 4132.9 ms avg latency, 5104.0 ms max latency. 186 | ... 187 | [2024-10-16 16:06:47,718] WARN [Producer clientId=perf-producer-client] Connection to node 5 (my-cluster-broker-10.my-cluster-kafka-brokers.test.svc/10.130.0.17:9092) could not be established. Node may not be available. (org.apache.kafka.clients.NetworkClient) 188 | [2024-10-16 16:06:47,718] WARN [Producer clientId=perf-producer-client] Connection to node 7 (my-cluster-broker-12.my-cluster-kafka-brokers.test.svc/10.131.0.24:9092) could not be established. Node may not be available. (org.apache.kafka.clients.NetworkClient) 189 | [2024-10-16 16:06:47,718] WARN [Producer clientId=perf-producer-client] Connection to node 6 (my-cluster-broker-11.my-cluster-kafka-brokers.test.svc/10.129.0.14:9092) could not be established. Node may not be available. (org.apache.kafka.clients.NetworkClient) 190 | ^C 191 | 192 | $ kubectl get po | grep my-cluster-broker 193 | my-cluster-broker-10 0/1 CrashLoopBackOff 2 (12s ago) 3m41s 194 | my-cluster-broker-11 0/1 CrashLoopBackOff 2 (11s ago) 3m41s 195 | my-cluster-broker-12 0/1 CrashLoopBackOff 2 (14s ago) 3m41s 196 | 197 | $ kubectl logs $(kubectl get po | grep my-cluster-broker | head -n1 | awk '{print $1}') | grep "No space left on device" | tail -n1 198 | Caused by: java.io.IOException: No space left on device 199 | ``` 200 | 201 | Even if not all pods are failed, we still need to increase the volume size of all brokers because the storage configuration is shared. 202 | This procedure works offline because copying data while they are being modified can cause tricky problems, especially if transactions are enabled. 203 | 204 | > [!WARNING] 205 | > Before deleting the Kafka cluster, make sure that delete claim storage configuration is set to false in Kafka resource. 206 | 207 | ```sh 208 | $ [[ $(kubectl get knp broker -o yaml | yq .spec.storage.deleteClaim) == "false" ]] \ 209 | && kubectl delete knp broker controller && kubectl delete k my-cluster 210 | kafkanodepool.kafka.strimzi.io "controller" deleted 211 | kafkanodepool.kafka.strimzi.io "broker" deleted 212 | kafka.kafka.strimzi.io "my-cluster" deleted 213 | ``` 214 | 215 | Create new and bigger volumes for our brokers. 216 | In this case, volumes are created automatically, but you may need to create them manually. 217 | They will be bound only when the first consumer (pod) will be created. 218 | 219 | ```sh 220 | $ for pod in $KAFKA_PODS; do 221 | echo "apiVersion: v1 222 | kind: PersistentVolumeClaim 223 | metadata: 224 | name: data-$pod-new 225 | labels: 226 | strimzi.io/name: my-cluster-kafka 227 | strimzi.io/pool-name: broker 228 | spec: 229 | accessModes: 230 | - ReadWriteOnce 231 | storageClassName: $VOLUME_CLASS 232 | resources: 233 | requests: 234 | storage: $NEW_VOLUME_SIZE" | kubectl create -f- 235 | done 236 | persistentvolumeclaim/data-my-cluster-broker-10-new created 237 | persistentvolumeclaim/data-my-cluster-broker-11-new created 238 | persistentvolumeclaim/data-my-cluster-broker-12-new created 239 | 240 | $ kubectl get pvc | grep my-cluster-broker 241 | data-my-cluster-broker-10 Bound pvc-fe5ccdb3-b550-467e-b6e0-f4d3ece79ed0 1Gi RWO gp3-csi 8m25s 242 | data-my-cluster-broker-10-new Pending gp3-csi 15s 243 | data-my-cluster-broker-11 Bound pvc-d76d68c6-52e9-4a9f-a20f-3b052ea49c55 1Gi RWO gp3-csi 8m25s 244 | data-my-cluster-broker-11-new Pending gp3-csi 15s 245 | data-my-cluster-broker-12 Bound pvc-6efa4986-a8f8-42d3-ae80-0229d262cf81 1Gi RWO gp3-csi 8m25s 246 | data-my-cluster-broker-12-new Pending gp3-csi 14s 247 | ``` 248 | 249 | Using a maintenance pod, copy all broker data from the old volumes to the new volumes. 250 | 251 | > [!NOTE] 252 | > The following command may take some time, depending on the amount of data to copy. 253 | 254 | ```sh 255 | $ for pod in $KAFKA_PODS; do 256 | kubectl run kubectl-patch-$pod -itq --rm --restart "Never" --image "foo" --overrides "{ 257 | \"spec\": { 258 | \"containers\": [ 259 | { 260 | \"name\": \"busybox\", 261 | \"image\": \"busybox\", 262 | \"imagePullPolicy\": \"IfNotPresent\", 263 | \"command\": [\"/bin/sh\", \"-c\", \"cp -auvR /old/* /new\"], 264 | \"volumeMounts\": [ 265 | {\"name\": \"old\", \"mountPath\": \"/old\"}, 266 | {\"name\": \"new\", \"mountPath\": \"/new\"} 267 | ] 268 | } 269 | ], 270 | \"volumes\": [ 271 | {\"name\": \"old\", \"persistentVolumeClaim\": {\"claimName\": \"data-$pod\"}}, 272 | {\"name\": \"new\", \"persistentVolumeClaim\": {\"claimName\": \"data-$pod-new\"}} 273 | ] 274 | } 275 | }" 276 | done 277 | '/old/kafka-log10/.lock' -> '/new/kafka-log10/.lock' 278 | '/old/kafka-log10/bootstrap.checkpoint' -> '/new/kafka-log10/bootstrap.checkpoint' 279 | '/old/kafka-log10/recovery-point-offset-checkpoint' -> '/new/kafka-log10/recovery-point-offset-checkpoint' 280 | '/old/kafka-log10/meta.properties' -> '/new/kafka-log10/meta.properties' 281 | '/old/kafka-log10/__cluster_metadata-0/00000000000000000256.snapshot' -> '/new/kafka-log10/__cluster_metadata-0/00000000000000000256.snapshot' 282 | '/old/kafka-log10/__cluster_metadata-0/partition.metadata' -> '/new/kafka-log10/__cluster_metadata-0/partition.metadata' 283 | '/old/kafka-log10/__cluster_metadata-0/00000000000000000000.log' -> '/new/kafka-log10/__cluster_metadata-0/00000000000000000000.log' 284 | '/old/kafka-log10/__cluster_metadata-0/00000000000000000000.index' -> '/new/kafka-log10/__cluster_metadata-0/00000000000000000000.index' 285 | '/old/kafka-log10/__cluster_metadata-0/00000000000000000000.timeindex' -> '/new/kafka-log10/__cluster_metadata-0/00000000000000000000.timeindex' 286 | '/old/kafka-log10/__cluster_metadata-0/leader-epoch-checkpoint.tmp' -> '/new/kafka-log10/__cluster_metadata-0/leader-epoch-checkpoint.tmp' 287 | '/old/kafka-log10/__cluster_metadata-0/leader-epoch-checkpoint' -> '/new/kafka-log10/__cluster_metadata-0/leader-epoch-checkpoint' 288 | ... 289 | 290 | $ kubectl get pv | grep my-cluster-broker 291 | pvc-327097ee-094b-4725-afb9-1077b42f8504 10Gi RWO Delete Bound test/data-my-cluster-broker-11-new gp3-csi 106s 292 | pvc-5f306a61-0d84-4cbb-b1b4-8e05728f0397 10Gi RWO Delete Bound test/data-my-cluster-broker-10-new gp3-csi 2m6s 293 | pvc-6efa4986-a8f8-42d3-ae80-0229d262cf81 1Gi RWO Delete Bound test/data-my-cluster-broker-12 gp3-csi 24m 294 | pvc-777daab8-91e0-4560-8c12-e22318ffd9df 10Gi RWO Delete Bound test/data-my-cluster-broker-12-new gp3-csi 84s 295 | pvc-d76d68c6-52e9-4a9f-a20f-3b052ea49c55 1Gi RWO Delete Bound test/data-my-cluster-broker-11 gp3-csi 24m 296 | pvc-fe5ccdb3-b550-467e-b6e0-f4d3ece79ed0 1Gi RWO Delete Bound test/data-my-cluster-broker-10 gp3-csi 24m 297 | ``` 298 | 299 | > [!WARNING] 300 | > Set the persistent volume reclaim policy as Retain to avoid losing data when deleting broker PVCs. 301 | 302 | ```sh 303 | $ for pv in $(kubectl get pv | grep my-cluster-broker | awk '{print $1}'); do 304 | kubectl patch pv $pv --type merge -p ' 305 | spec: 306 | persistentVolumeReclaimPolicy: Retain' 307 | done 308 | persistentvolume/pvc-6efa4986-a8f8-42d3-ae80-0229d262cf81 patched 309 | persistentvolume/pvc-d76d68c6-52e9-4a9f-a20f-3b052ea49c55 patched 310 | persistentvolume/pvc-fe5ccdb3-b550-467e-b6e0-f4d3ece79ed0 patched 311 | persistentvolume/pvc-777daab8-91e0-4560-8c12-e22318ffd9df patched 312 | persistentvolume/pvc-327097ee-094b-4725-afb9-1077b42f8504 patched 313 | persistentvolume/pvc-5f306a61-0d84-4cbb-b1b4-8e05728f0397 patched 314 | 315 | $ kubectl get pv | grep my-cluster-broker 316 | pvc-13e660ba-6a21-4bad-876b-cabab93ce38b 1Gi RWO Retain Bound test/data-my-cluster-broker-11 gp3-csi 14m 317 | pvc-2522a5ad-5275-4459-83f0-149d8cd007f3 10Gi RWO Retain Bound test/data-my-cluster-broker-11-new gp3-csi 79s 318 | pvc-26590b0f-c1ba-4069-9c24-f731287a7ed3 10Gi RWO Retain Bound test/data-my-cluster-broker-10-new gp3-csi 100s 319 | pvc-35fed9c0-f12f-4012-899a-759add4cef4e 10Gi RWO Retain Bound test/data-my-cluster-broker-12-new gp3-csi 57s 320 | pvc-aed21c6a-3b78-4a18-8e44-596285652b9d 1Gi RWO Retain Bound test/data-my-cluster-broker-10 gp3-csi 14m 321 | pvc-d7b08cd6-8199-4cbf-9193-98f0f6a3a29d 1Gi RWO Retain Bound test/data-my-cluster-broker-12 gp3-csi 14m 322 | ``` 323 | 324 | Now, delete all Kafka PVCs and PV claim references, just before creating the new PVCs with the new storage size. 325 | We have to use the same resource name that the operator expects, so that the new volumes will be bound on cluster startup. 326 | 327 | ```sh 328 | $ for pod in $KAFKA_PODS; do 329 | PVC_NAMES="$(kubectl get pvc | grep data-$pod | awk '{print $1}')" 330 | PV_NAMES="$(kubectl get pv | grep data-$pod | awk '{print $1}')" 331 | NEW_PV_NAME="$(kubectl get pv | grep data-$pod-new | awk '{print $1}')" 332 | kubectl delete pvc $PVC_NAMES 333 | kubectl patch pv $PV_NAMES --type json -p '[{"op":"remove","path":"/spec/claimRef"}]' 334 | echo "apiVersion: v1 335 | kind: PersistentVolumeClaim 336 | metadata: 337 | name: data-$pod 338 | labels: 339 | strimzi.io/name: my-cluster-kafka 340 | strimzi.io/pool-name: broker 341 | spec: 342 | accessModes: 343 | - ReadWriteOnce 344 | storageClassName: $VOLUME_CLASS 345 | volumeName: $NEW_PV_NAME 346 | resources: 347 | requests: 348 | storage: $NEW_VOLUME_SIZE" | kubectl create -f - 349 | done 350 | persistentvolumeclaim "data-my-cluster-broker-10" deleted 351 | persistentvolumeclaim "data-my-cluster-broker-10-new" deleted 352 | persistentvolume/pvc-26590b0f-c1ba-4069-9c24-f731287a7ed3 patched 353 | persistentvolume/pvc-aed21c6a-3b78-4a18-8e44-596285652b9d patched 354 | persistentvolumeclaim/data-my-cluster-broker-10created 355 | persistentvolumeclaim "data-my-cluster-broker-11" deleted 356 | persistentvolumeclaim "data-my-cluster-broker-11-new" deleted 357 | persistentvolume/pvc-13e660ba-6a21-4bad-876b-cabab93ce38b patched 358 | persistentvolume/pvc-2522a5ad-5275-4459-83f0-149d8cd007f3 patched 359 | persistentvolumeclaim/data-my-cluster-broker-11created 360 | persistentvolumeclaim "data-my-cluster-broker-12" deleted 361 | persistentvolumeclaim "data-my-cluster-broker-12-new" deleted 362 | persistentvolume/pvc-35fed9c0-f12f-4012-899a-759add4cef4e patched 363 | persistentvolume/pvc-d7b08cd6-8199-4cbf-9193-98f0f6a3a29d patched 364 | persistentvolumeclaim/data-my-cluster-broker-12created 365 | 366 | $ kubectl get pvc | grep my-cluster-broker 367 | data-my-cluster-broker-10 Bound pvc-26590b0f-c1ba-4069-9c24-f731287a7ed3 10Gi RWO gp3-csi 25s 368 | data-my-cluster-broker-11 Bound pvc-2522a5ad-5275-4459-83f0-149d8cd007f3 10Gi RWO gp3-csi 21s 369 | data-my-cluster-broker-12 Bound pvc-35fed9c0-f12f-4012-899a-759add4cef4e 10Gi RWO gp3-csi 17 370 | ``` 371 | 372 | Deploy the Kafka cluster with our brand new volumes, wait for the cluster to be ready, and try to consume some data. 373 | 374 | > [!IMPORTANT] 375 | > We adjust the storage size in Kafka custom resource, and set the previous `clusterId` in the Kafka CR status. 376 | > To speed up log recovery and partition synchronization, we can also tune recovery threads and replica fetchers. 377 | 378 | ```sh 379 | $ cat sessions/001/install/001-broker-pool.yaml \ 380 | | yq ".spec.storage.size = \"10Gi\"" | kubectl create -f - \ 381 | && cat sessions/001/install/002-my-cluster.yaml \ 382 | | yq ".metadata.annotations.\"strimzi.io/pause-reconciliation\" = \"true\"" \ 383 | | yq ".spec.kafka.config.\"num.recovery.threads.per.data.dir\" = 5" \ 384 | | yq ".spec.kafka.config.\"num.replica.fetchers\" = 5" | kubectl create -f - \ 385 | && kubectl create -f sessions/001/install 2>/dev/null 386 | kafkanodepool.kafka.strimzi.io/broker created 387 | kafka.kafka.strimzi.io/my-cluster created 388 | kafkanodepool.kafka.strimzi.io/controller created 389 | 390 | $ kubectl patch k my-cluster --subresource status --type merge -p " 391 | status: 392 | clusterId: \"$CLUSTER_ID\"" 393 | kafka.kafka.strimzi.io/my-cluster patched 394 | 395 | $ kubectl annotate k my-cluster strimzi.io/pause-reconciliation=false --overwrite 396 | kafka.kafka.strimzi.io/my-cluster annotated 397 | 398 | $ kubectl get po | grep my-cluster-broker 399 | my-cluster-broker-10 1/1 Running 0 4m34s 400 | my-cluster-broker-11 1/1 Running 0 4m34s 401 | my-cluster-broker-12 1/1 Running 0 4m33s 402 | 403 | $ kubectl-kafka bin/kafka-console-consumer.sh --bootstrap-server my-cluster-kafka-bootstrap:9092 \ 404 | --topic my-topic --from-beginning --max-messages 3 405 | XVFTWDJKAIYRBIKZRFOEZNWURGQHGPDMOZYAEBTFLNCXMVOJOCPCXZLUZJKPTIFQVRHWKHBMTMHFHJGAIXNWURPJOKMXRAWLHMUNNWVYSNPIMZXJDKSLVMLJYZFJCQOIQXNFLYYYTEFK... 406 | FVABXPFDUNYNYMNVYWZDVZLGZASDYATOWNFMRODUPWCUVVIZFRLZNDOSQWZVNGMGEYHDVAWZDQLXBAIZGFDUOKGGHDBTLOJLMLPXTPXXZZQXFIVTAZOHHGWJBUSMPKIPCMOAJVSLUYGJ... 407 | OAPJJFCTIWBLZMWUVMWRSGJQMXVLATYRECKCHDEIHYOMLCLKAULDWNSRIXKVWSNHLJUADUZNUMCJQYASBCSJWHIKXLATGMGNENPSSVIUAWSRRABUBXFZZRKOGOFGTBVIWTWFUWHEEMGF... 408 | Processed a total of 3 messages 409 | ``` 410 | 411 | Finally, we delete the old volumes to reclaim some space. 412 | 413 | ```sh 414 | $ kubectl delete pv $(kubectl get pv | grep Available | awk '{print $1}') 415 | persistentvolume "pvc-2522a5ad-5275-4459-83f0-149d8cd007f3" deleted 416 | persistentvolume "pvc-26590b0f-c1ba-4069-9c24-f731287a7ed3" deleted 417 | persistentvolume "pvc-35fed9c0-f12f-4012-899a-759add4cef4e" deleted 418 | 419 | $ kubectl get pv | grep my-cluster-broker 420 | pvc-2522a5ad-5275-4459-83f0-149d8cd007f3 10Gi RWO Retain Bound test/data-my-cluster-broker-11-new gp3-csi 79s 421 | pvc-26590b0f-c1ba-4069-9c24-f731287a7ed3 10Gi RWO Retain Bound test/data-my-cluster-broker-10-new gp3-csi 100s 422 | pvc-35fed9c0-f12f-4012-899a-759add4cef4e 10Gi RWO Retain Bound test/data-my-cluster-broker-12-new gp3-csi 57s 423 | ``` 424 | -------------------------------------------------------------------------------- /sessions/009/README.md: -------------------------------------------------------------------------------- 1 | ## Scaling up the cluster with the reassign tool 2 | 3 | First, use [this session](/sessions/001) to deploy a Kafka cluster on Kubernetes. 4 | 5 | Then, we send some data. 6 | 7 | ```sh 8 | $ kubectl-kafka bin/kafka-producer-perf-test.sh --topic my-topic --record-size 100 --num-records 1000000 \ 9 | --throughput -1 --producer-props acks=1 bootstrap.servers=my-cluster-kafka-bootstrap:9092 10 | 1000000 records sent, 233699.462491 records/sec (22.29 MB/sec), 866.05 ms avg latency, 1652.00 ms max latency, 827 ms 50th, 1500 ms 95th, 1595 ms 99th, 1614 ms 99.9th. 11 | ``` 12 | 13 | When the cluster is ready, we want to scale it up and put some load on the new broker, which otherwise will sit idle waiting for new topic creation. 14 | Thanks to the Cluster Operator, we can scale the cluster up by simply raising the number of broker replicas in the Kafka custom resource (CR). 15 | 16 | ```sh 17 | $ kubectl patch knp broker --type merge -p ' 18 | spec: 19 | replicas: 4' 20 | kafkanodepool.kafka.strimzi.io/broker patched 21 | 22 | $ kubectl get po -l app.kubernetes.io/name=broker 23 | NAME READY STATUS RESTARTS AGE 24 | my-cluster-broker-10 1/1 Running 0 2m8s 25 | my-cluster-broker-11 1/1 Running 0 2m8s 26 | my-cluster-broker-12 1/1 Running 0 2m8s 27 | my-cluster-broker-13 1/1 Running 0 30s 28 | ``` 29 | 30 | One option is to use the `kafka-reassign-partitions.sh` tool to move existing data. 31 | We only have one topic here, but you may have hundreds of them, where some of them are busier than others. 32 | You would need a custom procedure to figure out which replica changes can be done in order to improve the balance, also considering available disk space and preferred replicas. 33 | The result of this procedure would be a `reassign.json` file describing the desired partition state for each topic that we can pass to the tool. 34 | 35 | ```sh 36 | $ kubectl exec -it my-cluster-broker-10 -- bash 37 | [kafka@my-cluster-broker-10 kafka]$ /opt/kafka/bin/kafka-topics.sh --bootstrap-server my-cluster-kafka-bootstrap:9092 --topic my-topic --describe 38 | Topic: my-topic TopicId: XbszKNVQSSKTPB3sGvRaGg PartitionCount: 3 ReplicationFactor: 3 Configs: min.insync.replicas=2,message.format.version=3.0-IV1 39 | Topic: my-topic Partition: 0 Leader: 10 Replicas: 10,12,11 Isr: 10,12,11 40 | Topic: my-topic Partition: 1 Leader: 12 Replicas: 12,11,10 Isr: 12,11,10 41 | Topic: my-topic Partition: 2 Leader: 11 Replicas: 11,10,12 Isr: 11,10,12 42 | 43 | [kafka@my-cluster-broker-10 kafka]$ cat </tmp/reassign.json 44 | { 45 | "version": 1, 46 | "partitions": [ 47 | {"topic": "my-topic", "partition": 0, "replicas": [13, 12, 11]}, 48 | {"topic": "my-topic", "partition": 1, "replicas": [12, 11, 13]}, 49 | {"topic": "my-topic", "partition": 2, "replicas": [11, 13, 12]} 50 | ] 51 | } 52 | EOF 53 | 54 | [kafka@my-cluster-broker-10 kafka]$/opt/kafka/bin/kafka-reassign-partitions.sh --bootstrap-server my-cluster-kafka-bootstrap:9092 \ 55 | --reassignment-json-file /tmp/reassign.json --throttle 10000000 --execute 56 | Current partition replica assignment 57 | 58 | {"version":1,"partitions":[{"topic":"my-topic","partition":0,"replicas":[10,12,11],"log_dirs":["any","any","any"]},{"topic":"my-topic","partition":1,"replicas":[12,11,10],"log_dirs":["any","any","any"]},{"topic":"my-topic","partition":2,"replicas":[11,10,12],"log_dirs":["any","any","any"]}]} 59 | 60 | Save this to use as the --reassignment-json-file option during rollback 61 | Warning: You must run --verify periodically, until the reassignment completes, to ensure the throttle is removed. 62 | The inter-broker throttle limit was set to 10000000 B/s 63 | Successfully started partition reassignments for my-topic-0,my-topic-1,my-topic-2 64 | ``` 65 | 66 | To prevent any impact on the cluster while moving partitions between brokers, we use the `--throttle` option with a limit of 10 MB/s. 67 | 68 | > [!IMPORTANT] 69 | > The `--throttle` option also applies throttling to the normal replication traffic between brokers. 70 | > We need to find the right balance to ensure that we can move data in a reasonable amount of time without slowing down replication too much. 71 | > Don't forget to call `--verify` at the end to disable replication throttling, which otherwise will continue to affect the cluster. 72 | 73 | We can start from a safe throttle value and then use the `kafka.server:type=FetcherLagMetrics,name=ConsumerLag,clientId=([-.\w]+),topic=([-.\w]+),partition=([0-9]+)` metric to observe how far the followers are lagging behind the leader for a given partition. 74 | If this lag is growing or the reassignment is taking too much time, we can run the command again with the `--additional` option to increase the throttle value. 75 | 76 | After the reassignment is started, we use the `--verify` option to check the status of the reassignment process and disable the replication throttling. 77 | When the process is done, we can check if the topic configuration changes have been applied. 78 | 79 | ```sh 80 | [kafka@my-cluster-broker-10 kafka]$ /opt/kafka/bin/kafka-reassign-partitions.sh --bootstrap-server my-cluster-kafka-bootstrap:9092 \ 81 | --reassignment-json-file /tmp/reassign.json --verify 82 | Status of partition reassignment: 83 | Reassignment of partition my-topic-0 is completed. 84 | Reassignment of partition my-topic-1 is completed. 85 | Reassignment of partition my-topic-2 is completed. 86 | 87 | Clearing broker-level throttles on brokers 10,11,12,13 88 | Clearing topic-level throttles on topic my-topic 89 | 90 | [kafka@my-cluster-broker-10 kafka]$ /opt/kafka/bin/kafka-topics.sh --bootstrap-server my-cluster-kafka-bootstrap:9092 --topic my-topic --describe 91 | Topic: my-topic TopicId: XbszKNVQSSKTPB3sGvRaGg PartitionCount: 3 ReplicationFactor: 3 Configs: min.insync.replicas=2,message.format.version=3.0-IV1 92 | Topic: my-topic Partition: 0 Leader: 13 Replicas: 13,12,11 Isr: 12,11,13 93 | Topic: my-topic Partition: 1 Leader: 12 Replicas: 12,11,13 Isr: 12,11,13 94 | Topic: my-topic Partition: 2 Leader: 11 Replicas: 11,13,12 Isr: 11,12,13 95 | 96 | [kafka@my-cluster-broker-10 kafka]$ exit 97 | exit 98 | ``` 99 | 100 | ## Scaling up the cluster with Cruise Control 101 | 102 | First, use [this session](/sessions/001) to deploy a Kafka cluster on Kubernetes. 103 | 104 | When the cluster is ready, we send some data and check how partitions are distributed between the brokers. 105 | 106 | ```sh 107 | $ kubectl-kafka bin/kafka-producer-perf-test.sh --topic my-topic --record-size 100 --num-records 10000000 \ 108 | --throughput -1 --producer-props acks=1 bootstrap.servers=my-cluster-kafka-bootstrap:9092 109 | ... 110 | 10000000 records sent, 435085.3 records/sec (41.49 MB/sec), 521.45 ms avg latency, 9808.00 ms max latency, 258 ms 50th, 1399 ms 95th, 9636 ms 99th, 9781 ms 99.9th. 111 | 112 | $ kubectl-kafka bin/kafka-topics.sh --bootstrap-server my-cluster-kafka-bootstrap:9092 --describe --topic my-topic 113 | Topic: my-topic TopicId: w7uEJVDXSm22zscX2-9AYA PartitionCount: 3 ReplicationFactor: 3 Configs: min.insync.replicas=2,retention.bytes=1073741824 114 | Topic: my-topic Partition: 0 Leader: 11 Replicas: 11,12,10 Isr: 11,12,10 Elr: LastKnownElr: 115 | Topic: my-topic Partition: 1 Leader: 12 Replicas: 12,10,11 Isr: 12,11,10 Elr: LastKnownElr: 116 | Topic: my-topic Partition: 2 Leader: 11 Replicas: 10,11,12 Isr: 11,12,10 Elr: LastKnownElr: 117 | ``` 118 | 119 | Then, we deploy Cruise Control with the auto-rebalancing feature enabled. 120 | 121 | > [!NOTE] 122 | > The auto-rebalancing feature will automatically generate and execute KafkaRebalance resources on cluster scale up and down. 123 | > Each mode can be customized by adding custom KafkaRebalance templates. 124 | 125 | The Cluster Operator will trigger a rolling update of the brokers to add the metrics reporter, and then it will deploy Cruise Control. 126 | 127 | ```sh 128 | $ kubectl patch k my-cluster --type merge -p ' 129 | spec: 130 | cruiseControl: 131 | autoRebalance: 132 | - mode: add-brokers 133 | - mode: remove-brokers' 134 | kafka.kafka.strimzi.io/my-cluster patched 135 | ``` 136 | 137 | Wait some time for Cruise Control to build its internal workload model, and then scale up the Kafka cluster adding a new broker. 138 | 139 | ```sh 140 | $ kubectl patch knp broker --type merge -p ' 141 | spec: 142 | replicas: 4' 143 | kafkanodepool.kafka.strimzi.io/broker patched 144 | ``` 145 | 146 | Follow the KafkaRebalance execution from command line. 147 | 148 | ```sh 149 | $ kubectl get kr -w 150 | NAME CLUSTER TEMPLATE STATUS 151 | my-cluster-auto-rebalancing-add-brokers my-cluster PendingProposal 152 | my-cluster-auto-rebalancing-add-brokers my-cluster ProposalReady 153 | my-cluster-auto-rebalancing-add-brokers my-cluster Rebalancing 154 | my-cluster-auto-rebalancing-add-brokers my-cluster Ready 155 | ``` 156 | 157 | When KafkaRebalance is ready, we can see that the new broker now contains existing replicas. 158 | 159 | ```sh 160 | $ kubectl-kafka bin/kafka-topics.sh --bootstrap-server my-cluster-kafka-bootstrap:9092 --describe --topic my-topic 161 | Topic: my-topic TopicId: w7uEJVDXSm22zscX2-9AYA PartitionCount: 3 ReplicationFactor: 3 Configs: min.insync.replicas=2,retention.bytes=1073741824 162 | Topic: my-topic Partition: 0 Leader: 11 Replicas: 11,12,10 Isr: 10,11,12 Elr: LastKnownElr: 163 | Topic: my-topic Partition: 1 Leader: 12 Replicas: 12,10,11 Isr: 10,11,12 Elr: LastKnownElr: 164 | Topic: my-topic Partition: 2 Leader: 10 Replicas: 10,11,13 Isr: 10,11,13 Elr: LastKnownElr: 165 | ``` 166 | -------------------------------------------------------------------------------- /sessions/010/README.md: -------------------------------------------------------------------------------- 1 | ## Run transactional applications 2 | 3 | First, use [this session](/sessions/001) to deploy a Kafka cluster on Kubernetes. 4 | 5 | Then, run a transactional application example (read-process-write). 6 | 7 | ```sh 8 | $ kubectl create -f install.yaml 9 | kafkatopic.kafka.strimzi.io/input-topic created 10 | kafkatopic.kafka.strimzi.io/output-topic created 11 | statefulset.apps/kafka-txn created 12 | 13 | $ kubectl get po -l app=kafka-txn 14 | NAME READY STATUS RESTARTS AGE 15 | kafka-txn-0 1/1 Running 0 9m56s 16 | kafka-txn-1 1/1 Running 0 9m53s 17 | kafka-txn-2 1/1 Running 0 9m50s 18 | ``` 19 | 20 | When the application is running, we send one sentence to the input topic and check the result from the output topic. 21 | 22 | ```sh 23 | $ kubectl-kafka bin/kafka-console-producer.sh --bootstrap-server my-cluster-kafka-bootstrap:9092 --topic input-topic 24 | >this is a test 25 | >^C 26 | 27 | $ kubectl-kafka bin/kafka-console-consumer.sh --bootstrap-server my-cluster-kafka-bootstrap:9092 --topic output-topic --from-beginning 28 | tset a si siht 29 | ^CProcessed a total of 1 messages 30 | ``` 31 | 32 | After that, we can take a look at partition content. 33 | Our output topic has one partition, but what are the `__consumer_offsets` and `__transaction_state` coordinating partitions? 34 | We can pass the `group.id` and `transactional.id` to the following function define in `init.sh` to find out. 35 | 36 | ```sh 37 | $ kafka-cp my-group 38 | 12 39 | 40 | $ kafka-cp kafka-txn-0 41 | 30 42 | ``` 43 | 44 | We now check what's happening inside all the partitions involved in this transaction. 45 | In `output-topic-0`, we see that the data batch is transactional (`isTransactional`) and contains the PID and epoch. 46 | This batch is followed by a control batch (`isControl`), which contains a single end transaction marker record (`endTxnMarker`). 47 | In `__consumer_offsets-12`, the consumer group's offset commit batch is followed by a similar control batch. 48 | 49 | ```sh 50 | $ kubectl exec my-cluster-broker-10 -- bin/kafka-dump-log.sh --deep-iteration --print-data-log \ 51 | --files /var/lib/kafka/data/kafka-log10/output-topic-0/00000000000000000000.log 52 | Dumping /var/lib/kafka/data/kafka-log10/output-topic-0/00000000000000000000.log 53 | Log starting offset: 0 54 | baseOffset: 0 lastOffset: 0 count: 1 baseSequence: 0 lastSequence: 0 producerId: 1 producerEpoch: 0 partitionLeaderEpoch: 0 isTransactional: true isControl: false deleteHorizonMs: OptionalLong.empty position: 0 CreateTime: 1742739702864 size: 82 magic: 2 compresscodec: none crc: 758896000 isvalid: true 55 | | offset: 0 CreateTime: 1742739702864 keySize: -1 valueSize: 14 sequence: 0 headerKeys: [] payload: tset a si siht 56 | baseOffset: 1 lastOffset: 1 count: 1 baseSequence: -1 lastSequence: -1 producerId: 1 producerEpoch: 0 partitionLeaderEpoch: 0 isTransactional: true isControl: true deleteHorizonMs: OptionalLong.empty position: 82 CreateTime: 1742739703234 size: 78 magic: 2 compresscodec: none crc: 2557578104 isvalid: true 57 | | offset: 1 CreateTime: 1742739703234 keySize: 4 valueSize: 6 sequence: -1 headerKeys: [] endTxnMarker: COMMIT coordinatorEpoch: 0 58 | 59 | $ kubectl exec my-cluster-broker-10 -- bin/kafka-dump-log.sh --deep-iteration --print-data-log --offsets-decoder \ 60 | --files /var/lib/kafka/data/kafka-log10/__consumer_offsets-12/00000000000000000000.log 61 | Dumping /var/lib/kafka/data/kafka-log10/__consumer_offsets-12/00000000000000000000.log 62 | Log starting offset: 0 63 | ... 64 | baseOffset: 7 lastOffset: 7 count: 1 baseSequence: 0 lastSequence: 0 producerId: 1 producerEpoch: 0 partitionLeaderEpoch: 0 isTransactional: true isControl: false deleteHorizonMs: OptionalLong.empty position: 1974 CreateTime: 1742739703027 size: 121 magic: 2 compresscodec: none crc: 4292816145 isvalid: true 65 | | offset: 7 CreateTime: 1742739703027 keySize: 29 valueSize: 24 sequence: 0 headerKeys: [] key: {"type":"1","data":{"group":"my-group","topic":"input-topic","partition":0}} payload: {"version":"3","data":{"offset":1,"leaderEpoch":-1,"metadata":"","commitTimestamp":1742739702993}} 66 | baseOffset: 8 lastOffset: 8 count: 1 baseSequence: -1 lastSequence: -1 producerId: 1 producerEpoch: 0 partitionLeaderEpoch: 0 isTransactional: true isControl: true deleteHorizonMs: OptionalLong.empty position: 2095 CreateTime: 1742739703213 size: 78 magic: 2 compresscodec: none crc: 1231080676 isvalid: true 67 | | offset: 8 CreateTime: 1742739703213 keySize: 4 valueSize: 6 sequence: -1 headerKeys: [] endTxnMarker: COMMIT coordinatorEpoch: 0 68 | ... 69 | ``` 70 | 71 | That was straightforward, but how is the transaction state managed by the coordinator? 72 | In `__transaction_state-20` record payloads, we can see all transaction state changes keyed by TID `kafka-txn-0` (we also have PID+epoch). 73 | The transaction starts in the `Empty` state, then we have two `Ongoing` state changes (one for each partition registration). 74 | Then, when the commit is called, we have `PrepareCommit` state change, which means the broker is now committed to the transaction. 75 | This happens in the last batch, where the state is changed to `CompleteCommit`, terminating the transaction. 76 | 77 | ```sh 78 | $ kubectl exec my-cluster-broker-10 -- bin/kafka-dump-log.sh --deep-iteration --print-data-log --transaction-log-decoder \ 79 | --files /var/lib/kafka/data/kafka-log10/__transaction_state-30/00000000000000000000.log 80 | Dumping /var/lib/kafka/data/kafka-log10/__transaction_state-30/00000000000000000000.log 81 | Log starting offset: 0 82 | baseOffset: 0 lastOffset: 0 count: 1 baseSequence: -1 lastSequence: -1 producerId: -1 producerEpoch: -1 partitionLeaderEpoch: 0 isTransactional: false isControl: false deleteHorizonMs: OptionalLong.empty position: 0 CreateTime: 1742739549438 size: 120 magic: 2 compresscodec: none crc: 3663501755 isvalid: true 83 | | offset: 0 CreateTime: 1742739549438 keySize: 15 valueSize: 37 sequence: -1 headerKeys: [] key: transaction_metadata::transactionalId=kafka-txn-0 payload: producerId:1,producerEpoch:0,state=Empty,partitions=[],txnLastUpdateTimestamp=1742739549435,txnTimeoutMs=60000 84 | baseOffset: 1 lastOffset: 1 count: 1 baseSequence: -1 lastSequence: -1 producerId: -1 producerEpoch: -1 partitionLeaderEpoch: 0 isTransactional: false isControl: false deleteHorizonMs: OptionalLong.empty position: 120 CreateTime: 1742739702876 size: 143 magic: 2 compresscodec: none crc: 563111626 isvalid: true 85 | | offset: 1 CreateTime: 1742739702876 keySize: 15 valueSize: 59 sequence: -1 headerKeys: [] key: transaction_metadata::transactionalId=kafka-txn-0 payload: producerId:1,producerEpoch:0,state=Ongoing,partitions=[output-topic-0],txnLastUpdateTimestamp=1742739702876,txnTimeoutMs=60000 86 | baseOffset: 2 lastOffset: 2 count: 1 baseSequence: -1 lastSequence: -1 producerId: -1 producerEpoch: -1 partitionLeaderEpoch: 0 isTransactional: false isControl: false deleteHorizonMs: OptionalLong.empty position: 263 CreateTime: 1742739702882 size: 172 magic: 2 compresscodec: none crc: 1296972565 isvalid: true 87 | | offset: 2 CreateTime: 1742739702882 keySize: 15 valueSize: 87 sequence: -1 headerKeys: [] key: transaction_metadata::transactionalId=kafka-txn-0 payload: producerId:1,producerEpoch:0,state=Ongoing,partitions=[output-topic-0,__consumer_offsets-12],txnLastUpdateTimestamp=1742739702882,txnTimeoutMs=60000 88 | baseOffset: 3 lastOffset: 3 count: 1 baseSequence: -1 lastSequence: -1 producerId: -1 producerEpoch: -1 partitionLeaderEpoch: 0 isTransactional: false isControl: false deleteHorizonMs: OptionalLong.empty position: 435 CreateTime: 1742739703134 size: 172 magic: 2 compresscodec: none crc: 598474139 isvalid: true 89 | | offset: 3 CreateTime: 1742739703134 keySize: 15 valueSize: 87 sequence: -1 headerKeys: [] key: transaction_metadata::transactionalId=kafka-txn-0 payload: producerId:1,producerEpoch:0,state=PrepareCommit,partitions=[output-topic-0,__consumer_offsets-12],txnLastUpdateTimestamp=1742739703132,txnTimeoutMs=60000 90 | baseOffset: 4 lastOffset: 4 count: 1 baseSequence: -1 lastSequence: -1 producerId: -1 producerEpoch: -1 partitionLeaderEpoch: 0 isTransactional: false isControl: false deleteHorizonMs: OptionalLong.empty position: 607 CreateTime: 1742739703240 size: 120 magic: 2 compresscodec: none crc: 4205821491 isvalid: true 91 | | offset: 4 CreateTime: 1742739703240 keySize: 15 valueSize: 37 sequence: -1 headerKeys: [] key: transaction_metadata::transactionalId=kafka-txn-0 payload: producerId:1,producerEpoch:0,state=CompleteCommit,partitions=[],txnLastUpdateTimestamp=1742739703142,txnTimeoutMs=60000 92 | ``` 93 | 94 | ## Transaction rollback 95 | 96 | When there is a hanging transaction the LSO is stuck, which means that transactional consumers of this partition can't make any progress (CURRENT-OFFSET==LSO). 97 | 98 | ```sh 99 | # application log 100 | [Consumer clientId=my-client, groupId=my-group] The following partitions still have unstable offsets which are not cleared on the broker side: [__consumer_offsets-27], 101 | this could be either transactional offsets waiting for completion, or normal offsets waiting for replication after appending to local log 102 | 103 | # consumer lag grows 104 | $ kubectl-kafka bin/kafka-consumer-groups.sh --bootstrap-server my-cluster-kafka-bootstrap:9092 --describe --group my-group 105 | GROUP TOPIC PARTITION CURRENT-OFFSET LOG-END-OFFSET LAG CONSUMER-ID HOST CLIENT-ID 106 | my-group __consumer_offsets-27 9 913095344 913097449 2105 my-client-0 /10.60.172.97 my-client 107 | ``` 108 | 109 | If the partition is part of a compacted topic like `__consumer_offsets`, compaction is also blocked, causing unbounded partition growth. 110 | The last cleaned offset never changes. 111 | 112 | ```sh 113 | $ kubectl exec -it my-cluster-broker-10 -- bash 114 | 115 | [kafka@my-cluster-broker-10 kafka]$ grep "__consumer_offsets 27" /var/lib/kafka/data/kafka-log10/cleaner-offset-checkpoint 116 | __consumer_offsets 27 913095344 117 | 118 | [kafka@my-cluster-broker-10 kafka]$ exit 119 | exit 120 | ``` 121 | 122 | In Kafka 3+ there is an official command line tool that you can use to identify and rollback hanging transactions. 123 | 124 | > [!IMPORTANT] 125 | > The `CLUSTER_ACTION` operation type is required when authorization is enabled. 126 | 127 | ```sh 128 | $ kubectl-kafka bin/kafka-transactions.sh --bootstrap-server my-cluster-kafka-bootstrap:9092 find-hanging --broker 10 129 | Topic Partition ProducerId ProducerEpoch StartOffset LastTimestamp Duration(s) 130 | __consumer_offsets 27 171100 1 913095344 2022-06-06T03:16:47Z 209793 131 | 132 | $ kubectl-kafka bin/kafka-transactions.sh --bootstrap-server my-cluster-kafka-bootstrap:9092 abort \ 133 | --topic __consumer_offsets --partition 27 --start-offset 913095344 134 | ``` 135 | -------------------------------------------------------------------------------- /sessions/010/install.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kafka.strimzi.io/v1beta2 2 | kind: KafkaTopic 3 | metadata: 4 | name: input-topic 5 | labels: 6 | strimzi.io/cluster: my-cluster 7 | spec: 8 | partitions: 1 9 | replicas: 3 10 | config: 11 | retention.ms: 7200000 12 | segment.bytes: 1073741824 13 | --- 14 | apiVersion: kafka.strimzi.io/v1beta2 15 | kind: KafkaTopic 16 | metadata: 17 | name: output-topic 18 | labels: 19 | strimzi.io/cluster: my-cluster 20 | spec: 21 | partitions: 1 22 | replicas: 3 23 | config: 24 | retention.ms: 7200000 25 | segment.bytes: 1073741824 26 | --- 27 | # using sts because we need a stable identity (pod names) 28 | apiVersion: apps/v1 29 | kind: StatefulSet 30 | metadata: 31 | name: kafka-txn 32 | spec: 33 | replicas: 3 34 | serviceName: kafka-txn 35 | selector: 36 | matchLabels: 37 | app: kafka-txn 38 | template: 39 | metadata: 40 | labels: 41 | app: kafka-txn 42 | spec: 43 | containers: 44 | - name: kafka-txn 45 | image: ghcr.io/fvaleri/kafka-txn:latest 46 | imagePullPolicy: Always 47 | env: 48 | - name: BOOTSTRAP_SERVERS 49 | value: "my-cluster-kafka-bootstrap:9092" 50 | - name: GROUP_ID 51 | value: "my-group" 52 | - name: INSTANCE_ID 53 | valueFrom: 54 | fieldRef: 55 | fieldPath: metadata.name 56 | - name: INPUT_TOPIC 57 | value: "input-topic" 58 | - name: OUTPUT_TOPIC 59 | value: "output-topic" 60 | --------------------------------------------------------------------------------