├── .gitignore ├── README.md ├── fetch.sh ├── pom.xml ├── step1 ├── README.md └── docker-compose.yml ├── step10 ├── README.md └── docker-compose.yml ├── step11 ├── README.md ├── debezium-connector-mysql-0.7.3-plugin.tar.gz ├── debezium-connector-mysql │ ├── CHANGELOG.md │ ├── CONTRIBUTE.md │ ├── COPYRIGHT.txt │ ├── LICENSE.txt │ ├── README.md │ ├── debezium-connector-mysql-0.7.3.jar │ ├── debezium-core-0.7.3.jar │ ├── mysql-binlog-connector-java-0.13.0.jar │ └── mysql-connector-java-5.1.40.jar ├── docker-compose.yml ├── mysql-init.sql └── mysql.cnf ├── step12 ├── README.md └── docker-compose.yml ├── step13 ├── README.md ├── docker-compose.yml └── minio.png ├── step14 ├── README.md └── docker-compose.yml ├── step15 ├── README.md ├── create-streams.png ├── docker-compose.yml ├── ksql-ui.png ├── live_topic_data.png ├── streams-field.png ├── streams-result.png └── streams-topic.png ├── step16 ├── README.md └── docker-compose.yml ├── step17 ├── README.md ├── docker-compose.yml └── mysql-init.sql ├── step18 ├── README.md ├── docker-compose.yml └── streamjoins │ ├── Dockerfile │ ├── pom.xml │ └── src │ └── main │ ├── java │ └── com │ │ └── github │ │ └── framiere │ │ ├── Domain.java │ │ ├── JsonSerde.java │ │ ├── JsonSerializer.java │ │ ├── RandomProducer.java │ │ ├── Runner.java │ │ └── SimpleJoinStream.java │ └── resources │ └── log4j.properties ├── step19 ├── README.md ├── cdcdiff │ ├── Dockerfile │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── github │ │ │ │ └── framiere │ │ │ │ ├── Application.java │ │ │ │ └── CdcChange.java │ │ └── resources │ │ │ └── log4j.properties │ │ └── test │ │ └── java │ │ └── com │ │ └── github │ │ └── framiere │ │ └── CdcChangeTest.java ├── docker-compose-no-control-center.png ├── docker-compose-novolumne.png ├── docker-compose-volumes.png ├── docker-compose.yml ├── ksql-json-to-influxdb.conf ├── mysql-init.sql └── operationtomysql │ ├── Dockerfile │ ├── pom.xml │ └── src │ └── main │ ├── java │ └── com │ │ └── github │ │ └── framiere │ │ ├── AddressRepository.java │ │ ├── Application.java │ │ ├── Domain.java │ │ ├── JsonSerde.java │ │ ├── JsonSerializer.java │ │ ├── MemberRepository.java │ │ └── TeamRepository.java │ └── resources │ ├── application.properties │ └── log4j.properties ├── step2 ├── README.md └── docker-compose.yml ├── step3 ├── README.md ├── consumer │ ├── Dockerfile │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── com │ │ └── github │ │ └── framiere │ │ └── SimpleConsumer.java ├── docker-compose.yml ├── pom.xml └── producer │ ├── Dockerfile │ ├── pom.xml │ └── src │ └── main │ └── java │ └── com │ └── github │ └── framiere │ └── SimpleProducer.java ├── step4 ├── README.md ├── docker-compose.yml └── telegraf.conf ├── step5 ├── README.md ├── docker-compose.yml └── telegraf.conf ├── step6 ├── README.md ├── docker-compose.yml ├── pom.xml ├── streams │ ├── Dockerfile │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── github │ │ │ │ └── framiere │ │ │ │ └── SimpleStream.java │ │ └── resources │ │ │ └── log4j.properties │ │ └── test │ │ └── java │ │ └── com │ │ └── github │ │ └── framiere │ │ └── SimpleStreamTest.java └── telegraf.conf ├── step7 ├── README.md ├── docker-compose.yml ├── expose-volume.yml ├── images │ ├── jmc.png │ └── mbean-info.png ├── telegraf-inputs │ ├── consumer.conf │ ├── docker.conf │ ├── kafka.conf │ ├── producer.conf │ ├── streams.conf │ └── zookeeper.conf └── telegraf.conf ├── step8 ├── README.md ├── docker-compose.yml ├── grafana-setup │ ├── Dockerfile │ ├── README.MD │ ├── alert-channels │ │ ├── email.json │ │ └── pager-duty.json │ ├── dashboards │ │ ├── README.MD │ │ ├── cpu.json │ │ ├── docker.json │ │ ├── jvm.json │ │ ├── kafka-broker-produce.json │ │ ├── kafka-broker.json │ │ ├── kafka-consumer.json │ │ ├── kafka-producer.json │ │ ├── kafka-streams.json │ │ └── kafka-topics.json │ ├── datasources │ │ ├── elasticsearch.json │ │ └── influxdb.json │ └── startup.sh └── telegraf-kafka-to-influxdb.conf └── step9 ├── README.md ├── docker-compose.yml ├── mysql-connector-java-5.1.45-bin.jar └── mysql-init.sql /.gitignore: -------------------------------------------------------------------------------- 1 | **/target/ 2 | **/*.iml 3 | **/.idea/ 4 | **/.settings/ 5 | **/.classpath 6 | **/.project 7 | **/.vscode 8 | **/.DS_Store 9 | .cache 10 | .factorypath 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Please checkout these awesome references 2 | 1. http://developer.confluent.io/ 3 | 1. https://kafka-tutorials.confluent.io/ 4 | 5 | And if you want to learn another way, just follow these steps. 6 | 7 | Make docker and maven do their thing once for all by running `./fetch.sh` 8 | 9 | Then jump in the Kafka Story! 10 | 11 | 1. [One zookeeper, one kafka broker](step1/) 12 | 1. [One zookeeper, many kafka brokers](step2/) 13 | 1. [Java consumer, java producer](step3/) 14 | 1. [Let's add data with telegraf](step4/) 15 | 1. [Let's setup better defaults](step5/) 16 | 1. [Enter kafka stream](step6/) 17 | 1. [Capture JMX metrics](step7/) 18 | 1. [Grafana](step8/) 19 | 1. [Kafka Connect](step9/) 20 | 1. [Kafka Connect and Schema Registry](step10/) 21 | 1. [Change Data Capture](step11/) 22 | 1. [Change Data Capture and Schema Registry](step12/) 23 | 1. [Change Data Capture and Schema Registry and export to S3](step13/) 24 | 1. [Ksql](step14/) 25 | 1. [Ksql server and UI](step15/) 26 | 1. [Change Data Capture, Schema Registry and Ksql](step16/) 27 | 1. [Change Data Capture, JSON, Ksql and join](step17/) 28 | 1. [Random producer and Complex joins](step18/) 29 | 1. [Sync random producer and mysql, capture CDC diff and push it to telegraf](step19/) 30 | 31 | Don't like Docker ? Please download Confluent platform here: https://www.confluent.io/download/ 32 | 33 | Also, please take a look at 34 | 1. https://github.com/confluentinc/cp-demo 35 | 1. https://github.com/confluentinc/demo-scene 36 | 1. https://github.com/confluentinc/examples 37 | 1. https://github.com/confluentinc/kafka-streams-examples 38 | 1. https://www.confluent.io/stream-processing-cookbook/ 39 | -------------------------------------------------------------------------------- /fetch.sh: -------------------------------------------------------------------------------- 1 | find . -name "docker-compose.yml" -exec docker-compose -f {} pull \; 2 | find . -name "docker-compose.yml" -exec docker-compose -f {} build \; 3 | mvn install 4 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | com.github.framiere 6 | kafka-story 7 | 1.0.0 8 | pom 9 | 10 | install 11 | 12 | 13 | step3 14 | step6 15 | step18/streamjoins 16 | step19/operationtomysql 17 | step19/cdcdiff 18 | 19 | 20 | -------------------------------------------------------------------------------- /step1/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | ports: 7 | - "2181:2181" 8 | environment: 9 | ZOOKEEPER_CLIENT_PORT: 2181 10 | 11 | kafka: 12 | image: confluentinc/cp-kafka:5.3.1 13 | hostname: kafka 14 | depends_on: 15 | - zookeeper 16 | ports: 17 | - "9092:9092" 18 | environment: 19 | KAFKA_BROKER_ID: 1 20 | KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181' 21 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9092 22 | KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 23 | -------------------------------------------------------------------------------- /step10/README.md: -------------------------------------------------------------------------------- 1 | # Objective 2 | 3 | Getting Kafka Connect and Schema registry setup 4 | 5 | 6 | # Kafka Connect 7 | 8 | Let's post it 9 | 10 | ``` 11 | $ docker-compose exec connect curl -s -XPOST -H "Content-Type: application/json; charset=UTF-8" http://localhost:8083/connectors/ -d ' 12 | { 13 | "name": "my-mysql-connector", 14 | "config": { 15 | "connector.class":"io.confluent.connect.jdbc.JdbcSourceConnector", 16 | "tasks.max":"10", 17 | "connection.url":"jdbc:mysql://mysql:3306/db?user=user&password=password&useSSL=false", 18 | "table.whitelist":"application", 19 | "mode":"timestamp+incrementing", 20 | "timestamp.column.name":"last_modified", 21 | "incrementing.column.name":"id", 22 | "topic.prefix":"mysql-", 23 | "key.ignore": true, 24 | "key.converter.schema.registry.url": "http://schema-registry:8082", 25 | "value.converter": "io.confluent.connect.avro.AvroConverter", 26 | "value.converter.schema.registry.url": "http://schema-registry:8082", 27 | "schema.ignore": true 28 | 29 | } 30 | } 31 | ' | jq . 32 | ``` 33 | 34 | 35 | Let's see its status 36 | 37 | ``` 38 | $ docker-compose exec connect curl -s localhost:8083/connectors/my-mysql-connector/status | jq . 39 | { 40 | "name": "my-mysql-connector", 41 | "connector": { 42 | "state": "RUNNING", 43 | "worker_id": "connect:8083" 44 | }, 45 | "tasks": [ 46 | { 47 | "state": "RUNNING", 48 | "id": 0, 49 | "worker_id": "connect:8083" 50 | } 51 | ], 52 | "type": "source" 53 | } 54 | 55 | ``` 56 | 57 | 58 | We have our data 59 | 60 | ``` 61 | $ docker-compose exec kafka-1 kafka-topics --zookeeper zookeeper:2181 --list 62 | __consumer_offsets 63 | connect-config 64 | connect-offsets 65 | connect-status 66 | mysql-application 67 | ``` 68 | 69 | and 70 | 71 | ``` 72 | $ docker-compose exec kafka-1 kafka-topics --zookeeper zookeeper:2181 --topic mysql-application --describe 73 | Topic:mysql-application PartitionCount:1 ReplicationFactor:3 Configs: 74 | Topic: mysql-application Partition: 0 Leader: 1 Replicas: 1,2,3 Isr: 1,2,3 75 | ``` 76 | 77 | Let's see the data 78 | 79 | ``` 80 | $ docker-compose exec schema-registry kafka-avro-console-consumer -bootstrap-server kafka-1:9092,kafka-2:9092,kafka-3:9092 --topic mysql-application --from-beginning --property schema.registry.url=http://localhost:8082 81 | [2018-02-13 19:09:49,930] INFO ConsumerConfig values: 82 | auto.commit.interval.ms = 5000 83 | auto.offset.reset = earliest 84 | bootstrap.servers = [kafka-1:9092, kafka-2:9092, kafka-3:9092] 85 | 86 | ... 87 | {"id":1,"name":"kafka","team_email":"kafka@apache.org","last_modified":1518544661000} 88 | ... 89 | ``` 90 | 91 | So much better with a Schema Registry! 92 | 93 | Let's add another element in the application 94 | 95 | ``` 96 | $ docker-compose exec mysql mysql --user=root --password=password --database=db -e " 97 | INSERT INTO application ( \ 98 | id, \ 99 | name, \ 100 | team_email, \ 101 | last_modified \ 102 | ) VALUES ( \ 103 | 2, \ 104 | 'another', \ 105 | 'another@apache.org', \ 106 | NOW() \ 107 | ); " 108 | ``` 109 | 110 | ``` 111 | $ docker-compose exec mysql bash -c "mysql --user=root --password=password --database=db -e 'select * from application'" 112 | mysql: [Warning] Using a password on the command line interface can be insecure. 113 | +----+---------+--------------------+---------------------+ 114 | | id | name | team_email | last_modified | 115 | +----+---------+--------------------+---------------------+ 116 | | 1 | kafka | kafka@apache.org | 2018-02-25 11:25:23 | 117 | | 2 | another | another@apache.org | 2018-02-25 11:31:10 | 118 | +----+---------+--------------------+---------------------+ 119 | ``` 120 | 121 | Let's verify that we have them in our topic 122 | 123 | ``` 124 | $ docker-compose exec schema-registry kafka-avro-console-consumer -bootstrap-server kafka-1:9092,kafka-2:9092,kafka-3:9092 --topic mysql-application --from-beginning --property schema.registry.url=http://localhost:8082 --property print.key=true 125 | [2018-02-13 19:09:49,930] INFO ConsumerConfig values: 126 | auto.commit.interval.ms = 5000 127 | auto.offset.reset = earliest 128 | bootstrap.servers = [kafka-1:9092, kafka-2:9092, kafka-3:9092] 129 | ... 130 | {"id":1,"name":"kafka","team_email":"kafka@apache.org","last_modified":1519557923000} 131 | {"id":2,"name":"another","team_email":"another@apache.org","last_modified":1519558270000} 132 | ``` 133 | 134 | What about update ? 135 | 136 | ``` 137 | $ docker-compose exec mysql mysql --user=root --password=password --database=db -e "UPDATE application set name='another2', last_modified = NOW() where id = '2'" 138 | ``` 139 | 140 | 141 | ``` 142 | $ docker-compose exec mysql bash -c "mysql --user=root --password=password --database=db -e 'select * from application'" 143 | mysql: [Warning] Using a password on the command line interface can be insecure. 144 | +----+---------+--------------------+---------------------+ 145 | | id | name | team_email | last_modified | 146 | +----+----------+--------------------+---------------------+ 147 | | 1 | kafka | kafka@apache.org | 2018-02-25 11:25:23 | 148 | | 2 | another2 | another@apache.org | 2018-02-25 11:36:10 | 149 | +----+----------+--------------------+---------------------+ 150 | ``` 151 | 152 | Let's verify that we the update reflected in the topic 153 | 154 | ``` 155 | $ docker-compose exec schema-registry kafka-avro-console-consumer -bootstrap-server kafka-1:9092,kafka-2:9092,kafka-3:9092 --topic mysql-application --from-beginning --property schema.registry.url=http://localhost:8082 156 | [2018-02-13 19:09:49,930] INFO ConsumerConfig values: 157 | auto.commit.interval.ms = 5000 158 | auto.offset.reset = earliest 159 | bootstrap.servers = [kafka-1:9092, kafka-2:9092, kafka-3:9092] 160 | ... 161 | {"id":1,"name":"kafka","team_email":"kafka@apache.org","last_modified":1519557923000} 162 | {"id":2,"name":"another","team_email":"another@apache.org","last_modified":1519558270000} 163 | {"id":2,"name":"another2","team_email":"another@apache.org","last_modified":1519568679000} 164 | ``` 165 | 166 | What about deletion ? 167 | 168 | ``` 169 | $ docker-compose exec mysql mysql --user=root --password=password --database=db -e "DELETE FROM application where id = '2'" 170 | ``` 171 | 172 | ``` 173 | $ docker-compose exec schema-registry kafka-avro-console-consumer -bootstrap-server kafka-1:9092,kafka-2:9092,kafka-3:9092 --topic mysql-application --from-beginning --property schema.registry.url=http://localhost:8082 174 | [2018-02-13 19:09:49,930] INFO ConsumerConfig values: 175 | auto.commit.interval.ms = 5000 176 | auto.offset.reset = earliest 177 | bootstrap.servers = [kafka-1:9092, kafka-2:9092, kafka-3:9092] 178 | ... 179 | {"id":1,"name":"kafka","team_email":"kafka@apache.org","last_modified":1519557923000} 180 | {"id":2,"name":"another","team_email":"another@apache.org","last_modified":1519558270000} 181 | {"id":2,"name":"another2","team_email":"another@apache.org","last_modified":1519568679000} 182 | ``` 183 | 184 | Nope, no new event ! With this method, either you load all data using `batch` or you need to use soft-delete to support deletion. 185 | 186 | See https://docs.confluent.io/current/connect/connect-jdbc/docs/source_config_options.html#mode 187 | 188 | Enter Change data capture. 189 | 190 | -------------------------------------------------------------------------------- /step10/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.4' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | healthcheck: 9 | test: ["CMD", "bash", "-c", "echo ruok | nc localhost 2181 | grep imok"] 10 | start_period: 30s 11 | 12 | kafka-1: 13 | image: confluentinc/cp-kafka:5.3.1 14 | hostname: kafka-1 15 | depends_on: 16 | - zookeeper 17 | environment: 18 | KAFKA_BROKER_ID: 1 19 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 20 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 21 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 22 | healthcheck: 23 | test: ["CMD", "nc", "127.0.0.1", "9092"] 24 | start_period: 30s 25 | 26 | kafka-2: 27 | image: confluentinc/cp-kafka:5.3.1 28 | hostname: kafka-2 29 | depends_on: 30 | - zookeeper 31 | environment: 32 | KAFKA_BROKER_ID: 2 33 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 34 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 35 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 36 | healthcheck: 37 | test: ["CMD", "nc", "127.0.0.1", "9092"] 38 | start_period: 30s 39 | 40 | kafka-3: 41 | image: confluentinc/cp-kafka:5.3.1 42 | hostname: kafka-3 43 | depends_on: 44 | - zookeeper 45 | environment: 46 | KAFKA_BROKER_ID: 3 47 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 48 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 49 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 50 | healthcheck: 51 | test: ["CMD", "nc", "127.0.0.1", "9092"] 52 | start_period: 30s 53 | 54 | mysql: 55 | image: mysql:5.7 56 | volumes: 57 | - ../step9/mysql-init.sql:/docker-entrypoint-initdb.d/mysql-init.sql 58 | environment: 59 | MYSQL_ROOT_PASSWORD: password 60 | MYSQL_DATABASE: db 61 | MYSQL_USER: user 62 | MYSQL_PASSWORD: password 63 | 64 | schema-registry: 65 | image: confluentinc/cp-schema-registry:5.3.1 66 | ports: 67 | - "8082:8082" 68 | depends_on: 69 | - zookeeper 70 | - kafka-1 71 | - kafka-2 72 | - kafka-3 73 | environment: 74 | SCHEMA_REGISTRY_HOST_NAME: schema-registry 75 | SCHEMA_REGISTRY_LISTENERS: "http://0.0.0.0:8082" 76 | SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: "PLAINTEXT://kafka-1:9092,PLAINTEXT://kafka-2:9092,PLAINTEXT://kafka-3:9092" 77 | 78 | connect: 79 | image: confluentinc/cp-kafka-connect:5.3.1 80 | hostname: connect 81 | restart: always 82 | ports: 83 | - "8083:8083" 84 | depends_on: 85 | - zookeeper 86 | - kafka-1 87 | - kafka-2 88 | - kafka-3 89 | - schema-registry 90 | - mysql 91 | environment: 92 | CONNECT_BOOTSTRAP_SERVERS: "kafka-1:9092,kafka-2:9092,kafka-3:9092" 93 | CONNECT_GROUP_ID: "connect" 94 | CONNECT_CONFIG_STORAGE_TOPIC: connect-config 95 | CONNECT_OFFSET_STORAGE_TOPIC: connect-offsets 96 | CONNECT_STATUS_STORAGE_TOPIC: connect-status 97 | CONNECT_REPLICATION_FACTOR: 2 98 | CONNECT_KEY_CONVERTER: "org.apache.kafka.connect.storage.StringConverter" 99 | CONNECT_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 100 | CONNECT_INTERNAL_KEY_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 101 | CONNECT_INTERNAL_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 102 | CONNECT_REST_ADVERTISED_HOST_NAME: "connect" 103 | CONNECT_PLUGIN_PATH: "/usr/share/java" 104 | CONNECT_LOG4J_LOGGERS: org.reflections=ERROR 105 | volumes: 106 | - ../step9/mysql-connector-java-5.1.45-bin.jar:/usr/share/java/kafka-connect-jdbc/mysql-connector-java-5.1.45-bin.jar 107 | healthcheck: 108 | test: ["CMD", "nc", "127.0.0.1", "8083"] 109 | start_period: 30s 110 | -------------------------------------------------------------------------------- /step11/debezium-connector-mysql-0.7.3-plugin.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step11/debezium-connector-mysql-0.7.3-plugin.tar.gz -------------------------------------------------------------------------------- /step11/debezium-connector-mysql/COPYRIGHT.txt: -------------------------------------------------------------------------------- 1 | Aaron Rosenberg 2 | Akshath Patkar 3 | Andras Istvan Nagy 4 | Andrey Pustovetov 5 | Attila Szucs 6 | Barry LaFond 7 | Ben Williams 8 | Brandon Maguire 9 | Chris Riccomini 10 | Christian Posta 11 | David Chen 12 | David Leibovic 13 | David Szabo 14 | Denis Mikhaylov 15 | Dennis Persson 16 | Duncan Sands 17 | Emrul Islam 18 | Eric S. Kreiseir 19 | Ewen Cheslack-Postava 20 | Gunnar Morling 21 | Henryk Konsek 22 | Horia Chiorean 23 | Jiri Pechanec 24 | Jure Kajzer 25 | MaoXiang Pan 26 | Mario Mueller 27 | Matteo Capitanio 28 | Omar Al-Safi 29 | Liu Hanlin 30 | Peter Goransson 31 | Prannoy Mittal 32 | Raf Liwoch 33 | Ramesh Reddy 34 | Randall Hauch 35 | Sairam Polavarapu 36 | Sanne Grinovero 37 | Satyajit Vegesna 38 | Scofield Xu 39 | Sherafudheen PM 40 | Stanley Shyiko 41 | Steven Siahetiong 42 | Tom Bentley 43 | Willie Cheong 44 | -------------------------------------------------------------------------------- /step11/debezium-connector-mysql/debezium-connector-mysql-0.7.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step11/debezium-connector-mysql/debezium-connector-mysql-0.7.3.jar -------------------------------------------------------------------------------- /step11/debezium-connector-mysql/debezium-core-0.7.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step11/debezium-connector-mysql/debezium-core-0.7.3.jar -------------------------------------------------------------------------------- /step11/debezium-connector-mysql/mysql-binlog-connector-java-0.13.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step11/debezium-connector-mysql/mysql-binlog-connector-java-0.13.0.jar -------------------------------------------------------------------------------- /step11/debezium-connector-mysql/mysql-connector-java-5.1.40.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step11/debezium-connector-mysql/mysql-connector-java-5.1.40.jar -------------------------------------------------------------------------------- /step11/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.4' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | healthcheck: 9 | test: ["CMD", "bash", "-c", "echo ruok | nc localhost 2181 | grep imok"] 10 | start_period: 30s 11 | 12 | kafka-1: 13 | image: confluentinc/cp-kafka:5.3.1 14 | hostname: kafka-1 15 | depends_on: 16 | - zookeeper 17 | environment: 18 | KAFKA_BROKER_ID: 1 19 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 20 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 21 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 22 | healthcheck: 23 | test: ["CMD", "nc", "127.0.0.1", "9092"] 24 | start_period: 30s 25 | 26 | kafka-2: 27 | image: confluentinc/cp-kafka:5.3.1 28 | hostname: kafka-2 29 | depends_on: 30 | - zookeeper 31 | environment: 32 | KAFKA_BROKER_ID: 2 33 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 34 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 35 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 36 | healthcheck: 37 | test: ["CMD", "nc", "127.0.0.1", "9092"] 38 | start_period: 30s 39 | 40 | kafka-3: 41 | image: confluentinc/cp-kafka:5.3.1 42 | hostname: kafka-3 43 | depends_on: 44 | - zookeeper 45 | environment: 46 | KAFKA_BROKER_ID: 3 47 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 48 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 49 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 50 | healthcheck: 51 | test: ["CMD", "nc", "127.0.0.1", "9092"] 52 | start_period: 30s 53 | 54 | mysql: 55 | image: mysql:5.7 56 | volumes: 57 | - ./mysql.cnf:/etc/mysql/conf.d/custom.cnf 58 | - ./mysql-init.sql:/docker-entrypoint-initdb.d/mysql-init.sql 59 | environment: 60 | MYSQL_ROOT_PASSWORD: password 61 | MYSQL_DATABASE: db 62 | MYSQL_USER: user 63 | MYSQL_PASSWORD: password 64 | MYSQL_ROOT_HOST: 172.% # Allow docker containers to connect to mysql 65 | 66 | schema-registry: 67 | image: confluentinc/cp-schema-registry:5.3.1 68 | ports: 69 | - "8082:8082" 70 | depends_on: 71 | - zookeeper 72 | - kafka-1 73 | - kafka-2 74 | - kafka-3 75 | environment: 76 | SCHEMA_REGISTRY_HOST_NAME: schema-registry 77 | SCHEMA_REGISTRY_LISTENERS: "http://0.0.0.0:8082" 78 | SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: "PLAINTEXT://kafka-1:9092,PLAINTEXT://kafka-2:9092,PLAINTEXT://kafka-3:9092" 79 | 80 | connect: 81 | image: confluentinc/cp-kafka-connect:5.3.1 82 | hostname: connect 83 | restart: always 84 | ports: 85 | - "8083:8083" 86 | depends_on: 87 | - zookeeper 88 | - kafka-1 89 | - kafka-2 90 | - kafka-3 91 | - schema-registry 92 | - mysql 93 | environment: 94 | CONNECT_BOOTSTRAP_SERVERS: "kafka-1:9092,kafka-2:9092,kafka-3:9092" 95 | CONNECT_GROUP_ID: "connect" 96 | CONNECT_CONFIG_STORAGE_TOPIC: connect-config 97 | CONNECT_OFFSET_STORAGE_TOPIC: connect-offsets 98 | CONNECT_STATUS_STORAGE_TOPIC: connect-status 99 | CONNECT_REPLICATION_FACTOR: 2 100 | CONNECT_KEY_CONVERTER: "org.apache.kafka.connect.storage.StringConverter" 101 | CONNECT_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 102 | CONNECT_INTERNAL_KEY_CONVERTER: org.apache.kafka.connect.json.JsonConverter 103 | CONNECT_INTERNAL_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter 104 | CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081 105 | CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081 106 | CONNECT_REST_ADVERTISED_HOST_NAME: "connect" 107 | CONNECT_PLUGIN_PATH: "/usr/share/java" 108 | CONNECT_LOG4J_LOGGERS: org.reflections=ERROR 109 | volumes: 110 | - ../step9/mysql-connector-java-5.1.45-bin.jar:/usr/share/java/kafka-connect-jdbc/mysql-connector-java-5.1.45-bin.jar 111 | - ./debezium-connector-mysql:/usr/share/java/debezium-connector-mysql 112 | healthcheck: 113 | test: ["CMD", "nc", "127.0.0.1", "8083"] 114 | start_period: 30s 115 | -------------------------------------------------------------------------------- /step11/mysql-init.sql: -------------------------------------------------------------------------------- 1 | GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'replicator' IDENTIFIED BY 'replpass'; 2 | 3 | GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'debezium' IDENTIFIED BY 'dbz'; 4 | 5 | 6 | CREATE DATABASE mydb; 7 | 8 | GRANT ALL PRIVILEGES ON mydb.* TO 'user'@'%'; 9 | 10 | USE mydb; 11 | 12 | CREATE TABLE team ( 13 | id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, 14 | name VARCHAR(255) NOT NULL, 15 | email VARCHAR(255) NOT NULL, 16 | last_modified DATETIME NOT NULL 17 | ); 18 | 19 | 20 | INSERT INTO team ( 21 | id, 22 | name, 23 | email, 24 | last_modified 25 | ) VALUES ( 26 | 1, 27 | 'kafka', 28 | 'kafka@apache.org', 29 | NOW() 30 | ); 31 | 32 | ALTER TABLE team AUTO_INCREMENT = 101; 33 | 34 | -------------------------------------------------------------------------------- /step11/mysql.cnf: -------------------------------------------------------------------------------- 1 | # For advice on how to change settings please see 2 | # http://dev.mysql.com/doc/refman/5.7/en/server-configuration-defaults.html 3 | 4 | [mysqld] 5 | # 6 | # Remove leading # and set to the amount of RAM for the most important data 7 | # cache in MySQL. Start at 70% of total RAM for dedicated server, else 10%. 8 | # innodb_buffer_pool_size = 128M 9 | # 10 | # Remove leading # to turn on a very important data integrity option: logging 11 | # changes to the binary log between backups. 12 | # log_bin 13 | # 14 | # Remove leading # to set options mainly useful for reporting servers. 15 | # The server defaults are faster for transactions and fast SELECTs. 16 | # Adjust sizes as needed, experiment to find the optimal values. 17 | # join_buffer_size = 128M 18 | # sort_buffer_size = 2M 19 | # read_rnd_buffer_size = 2M 20 | skip-host-cache 21 | skip-name-resolve 22 | #datadir=/var/lib/mysql 23 | #socket=/var/lib/mysql/mysql.sock 24 | #secure-file-priv=/var/lib/mysql-files 25 | user=mysql 26 | 27 | # Disabling symbolic-links is recommended to prevent assorted security risks 28 | symbolic-links=0 29 | 30 | #log-error=/var/log/mysqld.log 31 | #pid-file=/var/run/mysqld/mysqld.pid 32 | 33 | # ---------------------------------------------- 34 | # Enable the binlog for replication & CDC 35 | # ---------------------------------------------- 36 | 37 | # Enable binary replication log and set the prefix, expiration, and log format. 38 | # The prefix is arbitrary, expiration can be short for integration tests but would 39 | # be longer on a production system. Row-level info is required for ingest to work. 40 | # Server ID is required, but this will vary on production systems 41 | server-id = 223344 42 | log_bin = mysql-bin 43 | expire_logs_days = 1 44 | binlog_format = row -------------------------------------------------------------------------------- /step12/README.md: -------------------------------------------------------------------------------- 1 | # Objective 2 | 3 | Getting Change data capture ready with mysql and debezium with Avro 4 | 5 | 6 | Let's add add Kafka Connect configuration 7 | 8 | ``` 9 | CONNECT_KEY_CONVERTER: io.confluent.connect.avro.AvroConverter 10 | CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter 11 | CONNECT_INTERNAL_KEY_CONVERTER: org.apache.kafka.connect.json.JsonConverter 12 | CONNECT_INTERNAL_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter 13 | CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8082 14 | CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8082 15 | ``` 16 | 17 | ``` 18 | $ docker-compose exec connect curl -s -XPOST -H "Content-Type: application/json; charset=UTF-8" http://localhost:8083/connectors/ -d ' 19 | { 20 | "name": "debezium-connector", 21 | "config": { 22 | "connector.class": "io.debezium.connector.mysql.MySqlConnector", 23 | "tasks.max": "1", 24 | "database.hostname": "mysql", 25 | "database.port": "3306", 26 | "database.user": "debezium", 27 | "database.password": "dbz", 28 | "database.server.id": "223344", 29 | "database.server.name": "dbserver1", 30 | "database.whitelist": "mydb", 31 | "database.history.kafka.bootstrap.servers": "kafka-1:9092,kafka-2:9092,kafka-3:9092", 32 | "database.history.kafka.topic": "schema-changes.mydb" 33 | } 34 | }' 35 | ``` 36 | 37 | Let's see its status 38 | 39 | ```sh 40 | $ docker-compose exec connect curl -s localhost:8083/connectors/debezium-connector/status | jq . 41 | { 42 | "name": "debezium-connector", 43 | "connector": { 44 | "state": "RUNNING", 45 | "worker_id": "connect:8083" 46 | }, 47 | "tasks": [ 48 | { 49 | "state": "RUNNING", 50 | "id": 0, 51 | "worker_id": "connect:8083" 52 | } 53 | ], 54 | "type": "source" 55 | } 56 | ``` 57 | 58 | Let's see if we have our topic 59 | 60 | ```sh 61 | $ docker-compose exec kafka-1 kafka-topics --zookeeper zookeeper:2181 --list 62 | __consumer_offsets 63 | _schemas 64 | connect-config 65 | connect-offsets 66 | connect-status 67 | dbserver1 68 | dbserver1.mydb.team 69 | schema-changes.mydb 70 | ``` 71 | 72 | Let's dig into out team topic 73 | 74 | ``` 75 | $ docker-compose exec kafka-1 kafka-console-consumer --bootstrap-server kafka-1:9092 --topic dbserver1.mydb.team --from-beginning 76 | 77 | kafka kafka@apache.org����X 78 | 0.7.3dbserver1 mysql-bin.000003mydteamc����X 79 | ``` 80 | 81 | Let's use the avro consumers 82 | 83 | ``` 84 | $ docker-compose exec schema-registry kafka-avro-console-consumer -bootstrap-server kafka-1:9092,kafka-2:9092,kafka-3:9092 --topic dbserver1.mydb.team --from-beginning --property schema.registry.url=http://localhost:8082 85 | {"before":null,"after":{"dbserver1.mydb.team.Value":{"id":1,"name":"kafka","email":"kafka@apache.org","last_modified":1519584693000}},"source":{"version":{"string":"0.7.3"},"name":"dbserver1","server_id":0,"ts_sec":0,"gtid":null,"file":"mysql-bin.000003","pos":154,"row":0,"snapshot":{"boolean":true},"thread":null,"db":{"string":"mydb"},"table":{"string":"team"}},"op":"c","ts_ms":{"long":1519584821699}} 86 | ``` 87 | 88 | Perfect! 89 | 90 | -------------------------------------------------------------------------------- /step12/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.4' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | healthcheck: 9 | test: ["CMD", "bash", "-c", "echo ruok | nc localhost 2181 | grep imok"] 10 | start_period: 30s 11 | 12 | kafka-1: 13 | image: confluentinc/cp-kafka:5.3.1 14 | hostname: kafka-1 15 | depends_on: 16 | - zookeeper 17 | environment: 18 | KAFKA_BROKER_ID: 1 19 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 20 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 21 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 22 | healthcheck: 23 | test: ["CMD", "nc", "127.0.0.1", "9092"] 24 | start_period: 30s 25 | 26 | kafka-2: 27 | image: confluentinc/cp-kafka:5.3.1 28 | hostname: kafka-2 29 | depends_on: 30 | - zookeeper 31 | environment: 32 | KAFKA_BROKER_ID: 2 33 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 34 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 35 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 36 | healthcheck: 37 | test: ["CMD", "nc", "127.0.0.1", "9092"] 38 | start_period: 30s 39 | 40 | kafka-3: 41 | image: confluentinc/cp-kafka:5.3.1 42 | hostname: kafka-3 43 | depends_on: 44 | - zookeeper 45 | environment: 46 | KAFKA_BROKER_ID: 3 47 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 48 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 49 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 50 | healthcheck: 51 | test: ["CMD", "nc", "127.0.0.1", "9092"] 52 | start_period: 30s 53 | 54 | mysql: 55 | image: mysql:5.7 56 | volumes: 57 | - ../step11/mysql.cnf:/etc/mysql/conf.d/custom.cnf 58 | - ../step11/mysql-init.sql:/docker-entrypoint-initdb.d/mysql-init.sql 59 | environment: 60 | MYSQL_ROOT_PASSWORD: password 61 | MYSQL_DATABASE: db 62 | MYSQL_USER: user 63 | MYSQL_PASSWORD: password 64 | MYSQL_ROOT_HOST: 172.% # Allow docker containers to connect to mysql 65 | 66 | schema-registry: 67 | image: confluentinc/cp-schema-registry:5.3.1 68 | ports: 69 | - "8082:8082" 70 | depends_on: 71 | - zookeeper 72 | - kafka-1 73 | - kafka-2 74 | - kafka-3 75 | environment: 76 | SCHEMA_REGISTRY_HOST_NAME: schema-registry 77 | SCHEMA_REGISTRY_LISTENERS: "http://0.0.0.0:8082" 78 | SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: "PLAINTEXT://kafka-1:9092,PLAINTEXT://kafka-2:9092,PLAINTEXT://kafka-3:9092" 79 | 80 | connect: 81 | image: confluentinc/cp-kafka-connect:5.3.1 82 | hostname: connect 83 | restart: always 84 | ports: 85 | - "8083:8083" 86 | depends_on: 87 | - zookeeper 88 | - kafka-1 89 | - kafka-2 90 | - kafka-3 91 | - schema-registry 92 | - mysql 93 | environment: 94 | CONNECT_BOOTSTRAP_SERVERS: "kafka-1:9092,kafka-2:9092,kafka-3:9092" 95 | CONNECT_GROUP_ID: "connect" 96 | CONNECT_CONFIG_STORAGE_TOPIC: connect-config 97 | CONNECT_OFFSET_STORAGE_TOPIC: connect-offsets 98 | CONNECT_STATUS_STORAGE_TOPIC: connect-status 99 | CONNECT_REPLICATION_FACTOR: 2 100 | CONNECT_KEY_CONVERTER: io.confluent.connect.avro.AvroConverter 101 | CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter 102 | CONNECT_INTERNAL_KEY_CONVERTER: org.apache.kafka.connect.json.JsonConverter 103 | CONNECT_INTERNAL_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter 104 | CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8082 105 | CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8082 106 | CONNECT_REST_ADVERTISED_HOST_NAME: "connect" 107 | CONNECT_PLUGIN_PATH: "/usr/share/java" 108 | CONNECT_LOG4J_LOGGERS: org.reflections=ERROR 109 | volumes: 110 | - ../step9/mysql-connector-java-5.1.45-bin.jar:/usr/share/java/kafka-connect-jdbc/mysql-connector-java-5.1.45-bin.jar 111 | - ../step11/debezium-connector-mysql:/usr/share/java/debezium-connector-mysql 112 | healthcheck: 113 | test: ["CMD", "nc", "127.0.0.1", "8083"] 114 | start_period: 30s 115 | -------------------------------------------------------------------------------- /step13/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.4' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | healthcheck: 9 | test: ["CMD", "bash", "-c", "echo ruok | nc localhost 2181 | grep imok"] 10 | start_period: 30s 11 | 12 | kafka-1: 13 | image: confluentinc/cp-kafka:5.3.1 14 | hostname: kafka-1 15 | depends_on: 16 | - zookeeper 17 | environment: 18 | KAFKA_BROKER_ID: 1 19 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 20 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 21 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 22 | healthcheck: 23 | test: ["CMD", "nc", "127.0.0.1", "9092"] 24 | start_period: 30s 25 | 26 | kafka-2: 27 | image: confluentinc/cp-kafka:5.3.1 28 | hostname: kafka-2 29 | depends_on: 30 | - zookeeper 31 | environment: 32 | KAFKA_BROKER_ID: 2 33 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 34 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 35 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 36 | healthcheck: 37 | test: ["CMD", "nc", "127.0.0.1", "9092"] 38 | start_period: 30s 39 | 40 | kafka-3: 41 | image: confluentinc/cp-kafka:5.3.1 42 | hostname: kafka-3 43 | depends_on: 44 | - zookeeper 45 | environment: 46 | KAFKA_BROKER_ID: 3 47 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 48 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 49 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 50 | healthcheck: 51 | test: ["CMD", "nc", "127.0.0.1", "9092"] 52 | start_period: 30s 53 | 54 | mysql: 55 | image: mysql:5.7 56 | volumes: 57 | - ../step11/mysql.cnf:/etc/mysql/conf.d/custom.cnf 58 | - ../step11/mysql-init.sql:/docker-entrypoint-initdb.d/mysql-init.sql 59 | environment: 60 | MYSQL_ROOT_PASSWORD: password 61 | MYSQL_DATABASE: db 62 | MYSQL_USER: user 63 | MYSQL_PASSWORD: password 64 | MYSQL_ROOT_HOST: 172.% # Allow docker containers to connect to mysql 65 | 66 | schema-registry: 67 | image: confluentinc/cp-schema-registry:5.3.1 68 | ports: 69 | - "8082:8082" 70 | depends_on: 71 | - zookeeper 72 | - kafka-1 73 | - kafka-2 74 | - kafka-3 75 | environment: 76 | SCHEMA_REGISTRY_HOST_NAME: schema-registry 77 | SCHEMA_REGISTRY_LISTENERS: "http://0.0.0.0:8082" 78 | SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: "PLAINTEXT://kafka-1:9092,PLAINTEXT://kafka-2:9092,PLAINTEXT://kafka-3:9092" 79 | 80 | minio: 81 | image: minio/minio 82 | ports: 83 | - "9000:9000" 84 | environment: 85 | MINIO_ACCESS_KEY: AKIAIOSFODNN7EXAMPLE 86 | MINIO_SECRET_KEY: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY 87 | command: server /data 88 | 89 | create-buckets: 90 | image: minio/mc 91 | depends_on: 92 | - minio 93 | entrypoint: > 94 | /bin/sh -c " 95 | /usr/bin/mc config host add myminio http://minio:9000 AKIAIOSFODNN7EXAMPLE wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY; 96 | /usr/bin/mc mb myminio/cdc; 97 | exit 0; 98 | " 99 | 100 | list-buckets: 101 | image: minio/mc 102 | depends_on: 103 | - minio 104 | entrypoint: > 105 | /bin/sh -c " 106 | /usr/bin/mc config host add myminio http://minio:9000 AKIAIOSFODNN7EXAMPLE wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY; 107 | /usr/bin/mc ls -r myminio/cdc; 108 | exit 0; 109 | " 110 | 111 | connect: 112 | image: confluentinc/cp-kafka-connect:5.3.1 113 | hostname: connect 114 | restart: always 115 | ports: 116 | - "8083:8083" 117 | depends_on: 118 | - zookeeper 119 | - kafka-1 120 | - kafka-2 121 | - kafka-3 122 | - schema-registry 123 | - mysql 124 | - minio 125 | environment: 126 | CONNECT_BOOTSTRAP_SERVERS: "kafka-1:9092,kafka-2:9092,kafka-3:9092" 127 | CONNECT_GROUP_ID: "connect" 128 | CONNECT_CONFIG_STORAGE_TOPIC: connect-config 129 | CONNECT_OFFSET_STORAGE_TOPIC: connect-offsets 130 | CONNECT_STATUS_STORAGE_TOPIC: connect-status 131 | CONNECT_REPLICATION_FACTOR: 2 132 | CONNECT_KEY_CONVERTER: io.confluent.connect.avro.AvroConverter 133 | CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter 134 | CONNECT_INTERNAL_KEY_CONVERTER: org.apache.kafka.connect.json.JsonConverter 135 | CONNECT_INTERNAL_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter 136 | CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8082 137 | CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8082 138 | CONNECT_REST_ADVERTISED_HOST_NAME: "connect" 139 | CONNECT_PLUGIN_PATH: "/usr/share/java" 140 | CONNECT_LOG4J_LOGGERS: org.reflections=ERROR 141 | AWS_ACCESS_KEY_ID: AKIAIOSFODNN7EXAMPLE 142 | AWS_SECRET_ACCESS_KEY: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY 143 | volumes: 144 | - ../step9/mysql-connector-java-5.1.45-bin.jar:/usr/share/java/kafka-connect-jdbc/mysql-connector-java-5.1.45-bin.jar 145 | - ../step11/debezium-connector-mysql:/usr/share/java/debezium-connector-mysql 146 | healthcheck: 147 | test: ["CMD", "nc", "127.0.0.1", "8083"] 148 | start_period: 30s 149 | -------------------------------------------------------------------------------- /step13/minio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step13/minio.png -------------------------------------------------------------------------------- /step14/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.4' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | healthcheck: 9 | test: ["CMD", "bash", "-c", "echo ruok | nc localhost 2181 | grep imok"] 10 | start_period: 30s 11 | 12 | kafka-1: 13 | image: confluentinc/cp-kafka:5.3.1 14 | hostname: kafka-1 15 | depends_on: 16 | - zookeeper 17 | environment: 18 | KAFKA_BROKER_ID: 1 19 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 20 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 21 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 22 | healthcheck: 23 | test: ["CMD", "nc", "127.0.0.1", "9092"] 24 | start_period: 30s 25 | 26 | kafka-2: 27 | image: confluentinc/cp-kafka:5.3.1 28 | hostname: kafka-2 29 | depends_on: 30 | - zookeeper 31 | environment: 32 | KAFKA_BROKER_ID: 2 33 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 34 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 35 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 36 | healthcheck: 37 | test: ["CMD", "nc", "127.0.0.1", "9092"] 38 | start_period: 30s 39 | 40 | kafka-3: 41 | image: confluentinc/cp-kafka:5.3.1 42 | hostname: kafka-3 43 | depends_on: 44 | - zookeeper 45 | environment: 46 | KAFKA_BROKER_ID: 3 47 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 48 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 49 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 50 | healthcheck: 51 | test: ["CMD", "nc", "127.0.0.1", "9092"] 52 | start_period: 30s 53 | 54 | ksql: 55 | image: confluentinc/cp-ksql-server:5.2.1 56 | hostname: ksql-cli 57 | depends_on: 58 | - kafka-1 59 | - kafka-2 60 | - kafka-3 61 | environment: 62 | KSQL_BOOTSTRAP_SERVERS: kafka-1:9092 63 | KSQL_LISTENERS: http://0.0.0.0:8088 64 | -------------------------------------------------------------------------------- /step15/README.md: -------------------------------------------------------------------------------- 1 | # Objective 2 | 3 | Create Streams, query streams from UI 4 | 5 | Access the Control Center UI at http://localhost:9021 6 | 7 | On the left menu click on KSQL 8 | ![ksql](./ksql-ui.png "ksql") 9 | 10 | Click `streams` tab, then on `Add a stream` Button 11 | 12 | ![create-streams](./create-streams.png "Create Streams")` 13 | 14 | Select the source topic for the stream 15 | 16 | ![streams-topic](./streams-topic.png "Choose topic") 17 | 18 | Configure the Stream 19 | 20 | **IMPORTANT: Don't forget to fill `How are your message encoded?``** 21 | 22 | Note that Control Center discovered all the field ! 23 | 24 | ![streams-field](./streams-field.png "Configure stream") 25 | 26 | Let's query the result ! 27 | 28 | ![run-query](./streams-result.png "Run a query") -------------------------------------------------------------------------------- /step15/create-streams.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step15/create-streams.png -------------------------------------------------------------------------------- /step15/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.4' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | healthcheck: 9 | test: ["CMD", "bash", "-c", "echo ruok | nc localhost 2181 | grep imok"] 10 | start_period: 30s 11 | 12 | kafka-1: 13 | image: confluentinc/cp-kafka:5.3.1 14 | hostname: kafka-1 15 | depends_on: 16 | - zookeeper 17 | environment: 18 | KAFKA_BROKER_ID: 1 19 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 20 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 21 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 22 | healthcheck: 23 | test: ["CMD", "nc", "127.0.0.1", "9092"] 24 | start_period: 30s 25 | 26 | kafka-2: 27 | image: confluentinc/cp-kafka:5.3.1 28 | hostname: kafka-2 29 | depends_on: 30 | - zookeeper 31 | environment: 32 | KAFKA_BROKER_ID: 2 33 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 34 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 35 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 36 | healthcheck: 37 | test: ["CMD", "nc", "127.0.0.1", "9092"] 38 | start_period: 30s 39 | 40 | kafka-3: 41 | image: confluentinc/cp-kafka:5.3.1 42 | hostname: kafka-3 43 | depends_on: 44 | - zookeeper 45 | environment: 46 | KAFKA_BROKER_ID: 3 47 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 48 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 49 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 50 | healthcheck: 51 | test: ["CMD", "nc", "127.0.0.1", "9092"] 52 | start_period: 30s 53 | 54 | ksql: 55 | image: confluentinc/cp-ksql-server:5.3.1 56 | hostname: ksql-cli 57 | depends_on: 58 | - kafka-1 59 | - kafka-2 60 | - kafka-3 61 | ports: 62 | - "8088:8088" 63 | environment: 64 | KSQL_BOOTSTRAP_SERVERS: kafka-1:9092 65 | KSQL_LISTENERS: http://0.0.0.0:8088 66 | 67 | control-center: 68 | image: confluentinc/cp-enterprise-control-center:5.3.1 69 | container_name: control-center 70 | restart: always 71 | depends_on: 72 | - zookeeper 73 | - kafka-1 74 | - kafka-2 75 | - kafka-3 76 | - ksql 77 | ports: 78 | - "9021:9021" 79 | environment: 80 | CONTROL_CENTER_BOOTSTRAP_SERVERS: "kafka-1:9091,kafka-2:9092" 81 | CONTROL_CENTER_ZOOKEEPER_CONNECT: "zookeeper:2181" 82 | CONTROL_CENTER_KSQL_URL: "http://ksql:8088" 83 | CONTROL_CENTER_REST_LISTENERS: "http://0.0.0.0:9021" 84 | CONTROL_CENTER_KSQL_ADVERTISED_URL: "http://localhost:8088" 85 | CONTROL_CENTER_REPLICATION_FACTOR: 3 86 | 87 | -------------------------------------------------------------------------------- /step15/ksql-ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step15/ksql-ui.png -------------------------------------------------------------------------------- /step15/live_topic_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step15/live_topic_data.png -------------------------------------------------------------------------------- /step15/streams-field.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step15/streams-field.png -------------------------------------------------------------------------------- /step15/streams-result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step15/streams-result.png -------------------------------------------------------------------------------- /step15/streams-topic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step15/streams-topic.png -------------------------------------------------------------------------------- /step16/README.md: -------------------------------------------------------------------------------- 1 | # Objective 2 | 3 | Let's add a transform to get only the modified data http://kafka.apache.org/documentation/#connect_transforms 4 | 5 | ```sh 6 | docker-compose exec connect curl -s -XPOST -H "Content-Type: application/json; charset=UTF-8" http://localhost:8083/connectors/ -d ' 7 | { 8 | "name": "debezium-connector", 9 | "config": { 10 | "connector.class": "io.debezium.connector.mysql.MySqlConnector", 11 | "tasks.max": "1", 12 | "database.hostname": "mysql", 13 | "database.port": "3306", 14 | "database.user": "debezium", 15 | "database.password": "dbz", 16 | "database.server.id": "223344", 17 | "database.server.name": "dbserver1", 18 | "database.whitelist": "mydb", 19 | "database.history.kafka.bootstrap.servers": "kafka-1:9092,kafka-2:9092,kafka-3:9092", 20 | "database.history.kafka.topic": "schema-changes.mydb", 21 | "include.schema.changes": "true" , 22 | "transforms": "unwrap", 23 | "transforms.unwrap.type": "io.debezium.transforms.UnwrapFromEnvelope", 24 | "transforms.unwrap.drop.tombstones":"false" 25 | } 26 | }' 27 | ``` 28 | 29 | Let's do database manipulation 30 | 31 | ``` 32 | docker-compose exec mysql mysql --user=root --password=password --database=mydb -e "select * from team" 33 | docker-compose exec mysql mysql --user=root --password=password --database=mydb -e " 34 | INSERT INTO team ( \ 35 | name, \ 36 | email, \ 37 | last_modified \ 38 | ) VALUES ( \ 39 | 'another', \ 40 | 'another@apache.org', \ 41 | NOW() \ 42 | ); " 43 | docker-compose exec mysql mysql --user=root --password=password --database=mydb -e "UPDATE team set name='another name', last_modified = NOW() where id = '2'" 44 | docker-compose exec mysql mysql --user=root --password=password --database=mydb -e "DELETE FROM team WHERE id = 2" 45 | ``` 46 | 47 | Use ksql 48 | 49 | ``` 50 | $ docker-compose exec ksql ksql 51 | ksql> SET 'auto.offset.reset' = 'earliest'; 52 | Successfully changed local property 'auto.offset.reset' from 'null' to 'earliest' 53 | ksql> CREATE STREAM team WITH (KAFKA_TOPIC='dbserver1.mydb.team', VALUE_FORMAT='AVRO'); 54 | 55 | Message 56 | ---------------------------- 57 | Stream created and running 58 | --------------------------- 59 | ksql> SELECT * FROM team; 60 | 1519925080993 | | 1 | kafka | kafka@apache.org | 1519925014000 61 | 1519925111125 | | 2 | another | another@apache.org | 1519925110000 62 | 1519925120123 | | 2 | another name | another@apache.org | 1519925119000 63 | ``` 64 | 65 | Let's see the real underlying data to detect the deletion 66 | 67 | ```sh 68 | $ docker-compose exec schema-registry kafka-avro-console-consumer \ 69 | --bootstrap-server kafka-1:9092,kafka-2:9092,kafka-3:9092 \ 70 | --topic dbserver1.mydb.team \ 71 | --from-beginning \ 72 | --property schema.registry.url=http://localhost:8082 73 | {"id":1,"name":"kafka","email":"kafka@apache.org","last_modified":1519925014000} 74 | {"id":2,"name":"another","email":"another@apache.org","last_modified":1519925110000} 75 | {"id":2,"name":"another name","email":"another@apache.org","last_modified":1519925119000} 76 | null 77 | ``` 78 | 79 | We have a null, this is a tombstone. 80 | 81 | Let's see with the key 82 | 83 | ```sh 84 | $ docker-compose exec schema-registry kafka-avro-console-consumer \ 85 | --bootstrap-server kafka-1:9092,kafka-2:9092,kafka-3:9092 \ 86 | --topic dbserver1.mydb.team \ 87 | --from-beginning \ 88 | --property schema.registry.url=http://localhost:8082 \ 89 | --property print.key=true 90 | {"id":1} {"id":1,"name":"kafka","email":"kafka@apache.org","last_modified":1519925014000} 91 | {"id":2} {"id":2,"name":"another","email":"another@apache.org","last_modified":1519925110000} 92 | {"id":2} {"id":2,"name":"another name","email":"another@apache.org","last_modified":1519925119000} 93 | {"id":2} null 94 | ``` 95 | 96 | We have detected the deletion, we can make this topic a compacted topic then. 97 | 98 | Let's see the topic as of now 99 | 100 | ``` 101 | $ docker-compose exec kafka-1 kafka-topics --zookeeper zookeeper:2181 --describe --topic dbserver1.mydb.team 102 | Topic:dbserver1.mydb.team PartitionCount:1 ReplicationFactor:3 Configs: 103 | Topic: dbserver1.mydb.team Partition: 0 Leader: 3 Replicas: 3,1,2 Isr: 3,1,2 104 | ``` 105 | 106 | Let's add the compact policy 107 | 108 | ``` 109 | $ docker-compose exec kafka-1 kafka-configs \ 110 | --zookeeper zookeeper:2181 \ 111 | --entity-type topics \ 112 | --entity-name dbserver1.mydb.team \ 113 | --alter \ 114 | --add-config cleanup.policy=compact 115 | Completed Updating config for entity: topic 'dbserver1.mydb.team'. 116 | ``` 117 | 118 | 119 | -------------------------------------------------------------------------------- /step16/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.4' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | healthcheck: 9 | test: ["CMD", "bash", "-c", "echo ruok | nc localhost 2181 | grep imok"] 10 | start_period: 30s 11 | 12 | kafka-1: 13 | image: confluentinc/cp-kafka:5.3.1 14 | hostname: kafka-1 15 | depends_on: 16 | - zookeeper 17 | environment: 18 | KAFKA_BROKER_ID: 1 19 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 20 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 21 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 22 | healthcheck: 23 | test: ["CMD", "nc", "127.0.0.1", "9092"] 24 | start_period: 30s 25 | 26 | kafka-2: 27 | image: confluentinc/cp-kafka:5.3.1 28 | hostname: kafka-2 29 | depends_on: 30 | - zookeeper 31 | environment: 32 | KAFKA_BROKER_ID: 2 33 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 34 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 35 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 36 | healthcheck: 37 | test: ["CMD", "nc", "127.0.0.1", "9092"] 38 | start_period: 30s 39 | 40 | kafka-3: 41 | image: confluentinc/cp-kafka:5.3.1 42 | hostname: kafka-3 43 | depends_on: 44 | - zookeeper 45 | environment: 46 | KAFKA_BROKER_ID: 3 47 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 48 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 49 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 50 | healthcheck: 51 | test: ["CMD", "nc", "127.0.0.1", "9092"] 52 | start_period: 30s 53 | 54 | mysql: 55 | image: mysql:5.7 56 | volumes: 57 | - ../step11/mysql.cnf:/etc/mysql/conf.d/custom.cnf 58 | - ../step11/mysql-init.sql:/docker-entrypoint-initdb.d/mysql-init.sql 59 | environment: 60 | MYSQL_ROOT_PASSWORD: password 61 | MYSQL_DATABASE: db 62 | MYSQL_USER: user 63 | MYSQL_PASSWORD: password 64 | MYSQL_ROOT_HOST: 172.% # Allow docker containers to connect to mysql 65 | 66 | schema-registry: 67 | image: confluentinc/cp-schema-registry:5.3.1 68 | ports: 69 | - "8082:8082" 70 | depends_on: 71 | - zookeeper 72 | - kafka-1 73 | - kafka-2 74 | - kafka-3 75 | environment: 76 | SCHEMA_REGISTRY_HOST_NAME: schema-registry 77 | SCHEMA_REGISTRY_LISTENERS: "http://0.0.0.0:8082" 78 | SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: "PLAINTEXT://kafka-1:9092,PLAINTEXT://kafka-2:9092,PLAINTEXT://kafka-3:9092" 79 | 80 | connect: 81 | image: confluentinc/cp-kafka-connect:5.3.1 82 | hostname: connect 83 | restart: always 84 | ports: 85 | - "8083:8083" 86 | depends_on: 87 | - zookeeper 88 | - kafka-1 89 | - kafka-2 90 | - kafka-3 91 | - schema-registry 92 | - mysql 93 | environment: 94 | CONNECT_BOOTSTRAP_SERVERS: "kafka-1:9092,kafka-2:9092,kafka-3:9092" 95 | CONNECT_GROUP_ID: "connect" 96 | CONNECT_CONFIG_STORAGE_TOPIC: connect-config 97 | CONNECT_OFFSET_STORAGE_TOPIC: connect-offsets 98 | CONNECT_STATUS_STORAGE_TOPIC: connect-status 99 | CONNECT_REPLICATION_FACTOR: 2 100 | CONNECT_KEY_CONVERTER: io.confluent.connect.avro.AvroConverter 101 | CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter 102 | CONNECT_INTERNAL_KEY_CONVERTER: org.apache.kafka.connect.json.JsonConverter 103 | CONNECT_INTERNAL_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter 104 | CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8082 105 | CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8082 106 | CONNECT_REST_ADVERTISED_HOST_NAME: "connect" 107 | CONNECT_PLUGIN_PATH: "/usr/share/java" 108 | CONNECT_LOG4J_LOGGERS: org.reflections=ERROR 109 | volumes: 110 | - ../step9/mysql-connector-java-5.1.45-bin.jar:/usr/share/java/kafka-connect-jdbc/mysql-connector-java-5.1.45-bin.jar 111 | - ../step11/debezium-connector-mysql:/usr/share/java/debezium-connector-mysql 112 | healthcheck: 113 | test: ["CMD", "nc", "127.0.0.1", "8083"] 114 | start_period: 30s 115 | 116 | ksql: 117 | image: confluentinc/cp-ksql-server:5.3.1 118 | hostname: ksql-cli 119 | depends_on: 120 | - kafka-1 121 | - kafka-2 122 | - kafka-3 123 | - schema-registry 124 | environment: 125 | KSQL_BOOTSTRAP_SERVERS: kafka-1:9092 126 | KSQL_LISTENERS: http://0.0.0.0:8088 127 | KSQL_KSQL_SCHEMA_REGISTRY_URL: http://schema-registry:8082 -------------------------------------------------------------------------------- /step17/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.4' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | healthcheck: 9 | test: ["CMD", "bash", "-c", "echo ruok | nc localhost 2181 | grep imok"] 10 | start_period: 30s 11 | 12 | kafka-1: 13 | image: confluentinc/cp-kafka:5.3.1 14 | hostname: kafka-1 15 | depends_on: 16 | - zookeeper 17 | environment: 18 | KAFKA_BROKER_ID: 1 19 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 20 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 21 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 22 | healthcheck: 23 | test: ["CMD", "nc", "127.0.0.1", "9092"] 24 | start_period: 30s 25 | 26 | kafka-2: 27 | image: confluentinc/cp-kafka:5.3.1 28 | hostname: kafka-2 29 | depends_on: 30 | - zookeeper 31 | environment: 32 | KAFKA_BROKER_ID: 2 33 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 34 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 35 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 36 | healthcheck: 37 | test: ["CMD", "nc", "127.0.0.1", "9092"] 38 | start_period: 30s 39 | 40 | kafka-3: 41 | image: confluentinc/cp-kafka:5.3.1 42 | hostname: kafka-3 43 | depends_on: 44 | - zookeeper 45 | environment: 46 | KAFKA_BROKER_ID: 3 47 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 48 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 49 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 50 | healthcheck: 51 | test: ["CMD", "nc", "127.0.0.1", "9092"] 52 | start_period: 30s 53 | 54 | mysql: 55 | image: mysql:5.7 56 | volumes: 57 | - ../step11/mysql.cnf:/etc/mysql/conf.d/custom.cnf 58 | - ./mysql-init.sql:/docker-entrypoint-initdb.d/mysql-init.sql 59 | environment: 60 | MYSQL_ROOT_PASSWORD: password 61 | MYSQL_DATABASE: db 62 | MYSQL_USER: user 63 | MYSQL_PASSWORD: password 64 | MYSQL_ROOT_HOST: 172.% # Allow docker containers to connect to mysql 65 | 66 | connect: 67 | image: confluentinc/cp-kafka-connect:5.3.1 68 | hostname: connect 69 | restart: always 70 | ports: 71 | - "8083:8083" 72 | depends_on: 73 | - zookeeper 74 | - kafka-1 75 | - kafka-2 76 | - kafka-3 77 | - mysql 78 | environment: 79 | CONNECT_BOOTSTRAP_SERVERS: "kafka-1:9092,kafka-2:9092,kafka-3:9092" 80 | CONNECT_GROUP_ID: "connect" 81 | CONNECT_CONFIG_STORAGE_TOPIC: connect-config 82 | CONNECT_OFFSET_STORAGE_TOPIC: connect-offsets 83 | CONNECT_STATUS_STORAGE_TOPIC: connect-status 84 | CONNECT_REPLICATION_FACTOR: 2 85 | CONNECT_KEY_CONVERTER: org.apache.kafka.connect.json.JsonConverter 86 | CONNECT_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter 87 | CONNECT_INTERNAL_KEY_CONVERTER: org.apache.kafka.connect.json.JsonConverter 88 | CONNECT_INTERNAL_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter 89 | CONNECT_REST_ADVERTISED_HOST_NAME: "connect" 90 | CONNECT_PLUGIN_PATH: "/usr/share/java" 91 | CONNECT_LOG4J_LOGGERS: org.reflections=ERROR 92 | volumes: 93 | - ../step9/mysql-connector-java-5.1.45-bin.jar:/usr/share/java/kafka-connect-jdbc/mysql-connector-java-5.1.45-bin.jar 94 | - ../step11/debezium-connector-mysql:/usr/share/java/debezium-connector-mysql 95 | healthcheck: 96 | test: ["CMD", "nc", "127.0.0.1", "8083"] 97 | start_period: 30s 98 | 99 | ksql: 100 | image: confluentinc/cp-ksql-server:5.3.1 101 | hostname: ksql-cli 102 | depends_on: 103 | - kafka-1 104 | - kafka-2 105 | - kafka-3 106 | environment: 107 | KSQL_BOOTSTRAP_SERVERS: kafka-1:9092 108 | KSQL_LISTENERS: http://0.0.0.0:8088 -------------------------------------------------------------------------------- /step17/mysql-init.sql: -------------------------------------------------------------------------------- 1 | GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'replicator' IDENTIFIED BY 'replpass'; 2 | 3 | GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'debezium' IDENTIFIED BY 'dbz'; 4 | 5 | 6 | CREATE DATABASE mydb; 7 | 8 | GRANT ALL PRIVILEGES ON mydb.* TO 'user'@'%'; 9 | 10 | USE mydb; 11 | 12 | CREATE TABLE team ( 13 | id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, 14 | name VARCHAR(255) NOT NULL, 15 | last_modified DATETIME NOT NULL 16 | ); 17 | 18 | INSERT INTO team ( 19 | id, 20 | name, 21 | last_modified 22 | ) VALUES ( 23 | '1', 24 | 'kafka', 25 | NOW() 26 | ); 27 | 28 | CREATE TABLE member ( 29 | id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, 30 | name VARCHAR(255) NOT NULL, 31 | team_id INT NOT NULL, 32 | FOREIGN KEY (team_id) REFERENCES team(id) 33 | ); 34 | 35 | INSERT INTO member ( 36 | id, 37 | name, 38 | team_id 39 | ) VALUES ( 40 | '1', 41 | 'jun rao', 42 | 1 43 | ); 44 | 45 | -------------------------------------------------------------------------------- /step18/README.md: -------------------------------------------------------------------------------- 1 | # Objective 2 | 3 | Joins 4 | 5 | # Docker 6 | 7 | ``` 8 | $ docker-compose up -d 9 | $ docker-compose logs -f producer 10 | producer_1 | CHANGE_COUNTRY Domain.Address(id=38, streetName=Serenity Points, streetAddress=024 Macejkovic Knoll, city=Townestad, state=New Jersey, country=Bulgaria) 11 | producer_1 | TEAM_NAME_CHANGE Domain.Team(id=9, name=Pennsylvania vampires) 12 | producer_1 | NEW_MEMBER Domain.Member(id=78, firstname=Percy, lastname=Glover, gender=MALE, maritalStatus=MARRIED, teamId=24, age=45, role=QA) 13 | producer_1 | NEW_MEMBER Domain.Address(id=78, streetName=Cicero Loaf, streetAddress=37934 Mayer Wall, city=Port Vito, state=Iowa, country=Bosnia and Herzegovina) 14 | producer_1 | CHANGE_COUNTRY Domain.Address(id=42, streetName=Wilbert Fields, streetAddress=33501 Veronica Canyon, city=North Lincolnmouth, state=Connecticut, country=Puerto Rico) 15 | producer_1 | CHANGE_ROLE Domain.Member(id=68, firstname=Adella, lastname=Hessel, gender=MALE, maritalStatus=DIVORCED, teamId=15, age=10, role=DEVELOPER) 16 | producer_1 | NEW_MEMBER Domain.Member(id=80, firstname=Hazle, lastname=Herzog, gender=FEMALE, maritalStatus=SINGLE, teamId=24, age=22, role=QA) 17 | producer_1 | NEW_MEMBER Domain.Address(id=80, streetName=Hartmann Shoals, streetAddress=7054 Ratke Curve, city=New Theresa, state=Colorado, country=Sudan) 18 | producer_1 | TEAM_NAME_CHANGE Domain.Team(id=10, name=Nebraska zebras) 19 | producer_1 | NEW_MEMBER Domain.Member(id=82, firstname=Ernest, lastname=Legros, gender=THIRD, maritalStatus=WIDOWED, teamId=19, age=5, role=MANAGER) 20 | producer_1 | NEW_MEMBER Domain.Address(id=82, streetName=Zander Creek, streetAddress=721 Spencer Lakes, city=Mannshire, state=Iowa, country=Netherlands) 21 | $ docker-compose exec kafka-1 kafka-console-consumer \ 22 | --bootstrap-server localhost:9092 \ 23 | --topic Team \ 24 | --property print.key=true \ 25 | --key-deserializer org.apache.kafka.common.serialization.IntegerDeserializer \ 26 | --from-beginning 27 | 1 {"id":1,"name":"Michigan gooses"} 28 | 6 {"id":6,"name":"Tennessee banshees"} 29 | 3 null 30 | 1 {"id":1,"name":"Delaware worshipers"} 31 | 10 {"id":10,"name":"Connecticut conspirators"} 32 | 6 {"id":6,"name":"New Jersey rabbits"} 33 | 10 {"id":10,"name":"Florida oracles"} 34 | 7 null 35 | 10 {"id":10,"name":"Louisiana sons"} 36 | 1 null 37 | docker-compose exec kafka-1 kafka-console-consumer \ 38 | --bootstrap-server localhost:9092 \ 39 | --topic Aggregate \ 40 | --property print.key=true \ 41 | --key-deserializer org.apache.kafka.common.serialization.IntegerDeserializer \ 42 | --from-beginning 43 | $ docker-compose exec kafka-1 kafka-topics \ 44 | --zookeeper zookeeper:2181 \ 45 | --list 46 | Address 47 | Aggregate 48 | Member 49 | Team 50 | __consumer_offsets 51 | simple-join-stream-KSTREAM-JOINOTHER-0000000009-store-changelog 52 | simple-join-stream-KSTREAM-JOINTHIS-0000000008-store-changelog 53 | simple-join-stream-KSTREAM-OUTEROTHER-0000000014-store-changelog 54 | simple-join-stream-KSTREAM-OUTERTHIS-0000000013-store-changelog 55 | $ docker-compose exec kafka-1 kafka-console-consumer \ 56 | --bootstrap-server localhost:9092 \ 57 | --topic Aggregate \ 58 | --property print.key=true \ 59 | --key-deserializer org.apache.kafka.common.serialization.IntegerDeserializer \ 60 | --from-beginning 61 | 5 {"member":{"id":5,"firstname":"Katelin","lastname":"Donnelly","gender":"FEMALE","maritalStatus":"SINGLE","teamId":8,"age":22,"role":"MANAGER"},"address":{"id":5,"streetName":"Upton Square","streetAddress":"97265 Yost Springs","city":"West Majorton","state":"Rhode Island","country":"USA"},"team":{"id":5,"name":"Missouri dwarves"}} 62 | 5 {"member":{"id":5,"firstname":"Katelin","lastname":"Donnelly","gender":"FEMALE","maritalStatus":"SINGLE","teamId":8,"age":22,"role":"MANAGER"},"address":{"id":5,"streetName":"Upton Square","streetAddress":"97265 Yost Springs","city":"West Majorton","state":"Rhode Island","country":"USA"},"team":{"id":5,"name":"Rhode Island goblins"}} 63 | 5 {"member":{"id":5,"firstname":"Katelin","lastname":"Donnelly","gender":"FEMALE","maritalStatus":"SINGLE","teamId":8,"age":22,"role":"MANAGER"},"address":{"id":5,"streetName":"Upton Square","streetAddress":"97265 Yost Springs","city":"West Majorton","state":"Rhode Island","country":"USA"},"team":{"id":5,"name":"Hawaii spirits"}} 64 | 20 {"member":null,"address":null,"team":{"id":20,"name":"Louisiana ghosts"}} 65 | $ docker-compose logs -f joinstreamer 66 | joinstreamer_1 | 5:Domain.Aggregate(member=Domain.Member(id=5, firstname=Katelin, lastname=Von, gender=FEMALE, maritalStatus=DIVORCED, teamId=8, age=21, role=MANAGER), address=Domain.Address(id=5, streetName=Crawford Street, streetAddress=34122 Claudie Squares, city=South Jenatown, state=Rhode Island, country=USA), team=Domain.Team(id=5, name=New Jersey horses)) 67 | joinstreamer_1 | 5:Domain.Aggregate(member=Domain.Member(id=5, firstname=Katelin, lastname=Donnelly, gender=FEMALE, maritalStatus=SINGLE, teamId=8, age=22, role=MANAGER), address=Domain.Address(id=5, streetName=Myrna Knolls, streetAddress=45080 Lessie Crest, city=South Jenatown, state=Rhode Island, country=USA), team=Domain.Team(id=5, name=New Jersey horses)) 68 | joinstreamer_1 | 5:Domain.Aggregate(member=Domain.Member(id=5, firstname=Katelin, lastname=Donnelly, gender=FEMALE, maritalStatus=SINGLE, teamId=8, age=22, role=MANAGER), address=Domain.Address(id=5, streetName=Crawford Street, streetAddress=34122 Claudie Squares, city=South Jenatown, state=Rhode Island, country=USA), team=Domain.Team(id=5, name=New Jersey horses)) 69 | joinstreamer_1 | 5:Domain.Aggregate(member=Domain.Member(id=5, firstname=Katelin, lastname=Donnelly, gender=FEMALE, maritalStatus=SINGLE, teamId=8, age=21, role=MANAGER), address=Domain.Address(id=5, streetName=Upton Square, streetAddress=97265 Yost Springs, city=West Majorton, state=Rhode Island, country=USA), team=Domain.Team(id=5, name=New Jersey horses)) 70 | joinstreamer_1 | 5:Domain.Aggregate(member=Domain.Member(id=5, firstname=Rebeka, lastname=Donnelly, gender=THIRD, maritalStatus=SINGLE, teamId=8, age=21, role=MANAGER), address=Domain.Address(id=5, streetName=Upton Square, streetAddress=97265 Yost Springs, city=West Majorton, state=Rhode Island, country=USA), team=Domain.Team(id=5, name=New Jersey horses)) 71 | joinstreamer_1 | 5:Domain.Aggregate(member=Domain.Member(id=5, firstname=Katelin, lastname=Von, gender=FEMALE, maritalStatus=DIVORCED, teamId=8, age=21, role=MANAGER), address=Domain.Address(id=5, streetName=Upton Square, streetAddress 72 | ``` 73 | -------------------------------------------------------------------------------- /step18/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | 9 | kafka-1: 10 | image: confluentinc/cp-kafka:5.3.1 11 | hostname: kafka-1 12 | depends_on: 13 | - zookeeper 14 | environment: 15 | KAFKA_BROKER_ID: 1 16 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 17 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 18 | 19 | kafka-2: 20 | image: confluentinc/cp-kafka:5.3.1 21 | hostname: kafka-2 22 | depends_on: 23 | - zookeeper 24 | environment: 25 | KAFKA_BROKER_ID: 2 26 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 27 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 28 | 29 | kafka-3: 30 | image: confluentinc/cp-kafka:5.3.1 31 | hostname: kafka-3 32 | depends_on: 33 | - zookeeper 34 | environment: 35 | KAFKA_BROKER_ID: 3 36 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 37 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 38 | 39 | producer: 40 | build: streamjoins/ 41 | environment: 42 | ACTION: producer 43 | depends_on: 44 | - kafka-1 45 | - kafka-2 46 | - kafka-3 47 | 48 | joinstreamer: 49 | build: streamjoins/ 50 | environment: 51 | ACTION: streamer 52 | depends_on: 53 | - producer 54 | -------------------------------------------------------------------------------- /step18/streamjoins/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM maven:3.6.1-jdk-8 as mavenBuild 2 | COPY pom.xml pom.xml 3 | COPY src src 4 | RUN ["mvn", "install"] 5 | 6 | FROM confluentinc/cp-base:5.3.1 7 | COPY --from=mavenBuild ./target/*.jar ./ 8 | ENV ACTION "producer" 9 | ENV BROKER_LIST "kafka-1:9092,kafka-2:9092,kafka-3:9092" 10 | ENV JAVA_OPTS "" 11 | CMD [ "bash", "-c", "cub kafka-ready -b ${BROKER_LIST} 3 60 && java ${JAVA_OPTS} -jar *.jar ${ACTION} ${BROKER_LIST}" ] 12 | -------------------------------------------------------------------------------- /step18/streamjoins/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | com.github.framiere 6 | simple-streams-join 7 | 1.0.0 8 | jar 9 | 10 | 1.8 11 | 1.8 12 | UTF-8 13 | UTF-8 14 | 15 | 16 | 17 | org.apache.kafka 18 | kafka-clients 19 | 2.3.0 20 | 21 | 22 | org.apache.kafka 23 | kafka-streams 24 | 2.3.0 25 | 26 | 27 | org.projectlombok 28 | lombok 29 | 1.16.20 30 | compile 31 | 32 | 33 | com.github.javafaker 34 | javafaker 35 | 0.14 36 | 37 | 38 | 39 | junit 40 | junit 41 | 4.13.1 42 | test 43 | 44 | 45 | com.github.charithe 46 | kafka-junit 47 | 4.1.6 48 | test 49 | 50 | 51 | org.assertj 52 | assertj-core 53 | 3.8.0 54 | test 55 | 56 | 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-assembly-plugin 62 | 63 | 64 | install 65 | 66 | single 67 | 68 | 69 | false 70 | 71 | 72 | com.github.framiere.Runner 73 | 74 | 75 | 76 | jar-with-dependencies 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /step18/streamjoins/src/main/java/com/github/framiere/Domain.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | import com.github.javafaker.Faker; 4 | import lombok.AllArgsConstructor; 5 | import lombok.Data; 6 | import lombok.EqualsAndHashCode; 7 | import lombok.NoArgsConstructor; 8 | import lombok.experimental.Wither; 9 | import org.apache.kafka.clients.producer.Producer; 10 | import org.apache.kafka.clients.producer.ProducerRecord; 11 | 12 | import java.security.SecureRandom; 13 | 14 | import static com.github.framiere.Domain.OperationMode.*; 15 | 16 | public class Domain { 17 | private static final Faker faker = new Faker(); 18 | private static final SecureRandom random = new SecureRandom(); 19 | 20 | public interface HasId { 21 | int getId(); 22 | } 23 | 24 | @Data 25 | @EqualsAndHashCode(of = "id") 26 | @NoArgsConstructor 27 | @AllArgsConstructor 28 | @Wither 29 | public static class Member implements HasId { 30 | public int id; 31 | public String firstname; 32 | public String lastname; 33 | public Gender gender; 34 | public String phone; 35 | public MaritalStatus maritalStatus; 36 | public int teamId; 37 | public int age; 38 | public Role role; 39 | 40 | public Member withTeam(Team team) { 41 | return withTeamId(team.id); 42 | } 43 | } 44 | 45 | @Data 46 | @EqualsAndHashCode(of = "id") 47 | @NoArgsConstructor 48 | @AllArgsConstructor 49 | @Wither 50 | public static class Address implements HasId { 51 | public int id; 52 | public String streetName; 53 | public String streetAddress; 54 | public String city; 55 | public String state; 56 | public String country; 57 | 58 | public Address changeAddress() { 59 | return withStreetName(faker.address().streetName()) 60 | .withStreetAddress(faker.address().streetAddress()); 61 | } 62 | 63 | public Address changeCity() { 64 | return changeAddress() 65 | .withCity(faker.address().city()); 66 | } 67 | 68 | public Address changeState() { 69 | return changeCity() 70 | .withState(faker.address().state()); 71 | } 72 | 73 | public Address changePhone() { 74 | return withCountry(faker.phoneNumber().phoneNumber()); 75 | } 76 | 77 | public Address changeCountry() { 78 | return changeState() 79 | .changePhone() 80 | .withCountry(faker.address().country()); 81 | } 82 | } 83 | 84 | public enum MaritalStatus { 85 | MARRIED, 86 | SINGLE, 87 | DIVORCED, 88 | WIDOWED 89 | } 90 | 91 | public enum Role { 92 | DEVELOPER, 93 | QA, 94 | ARCHITECT, 95 | MANAGER 96 | } 97 | 98 | public enum Gender { 99 | MALE, 100 | FEMALE, 101 | THIRD 102 | } 103 | 104 | @Data 105 | @EqualsAndHashCode(of = "id") 106 | @NoArgsConstructor 107 | @AllArgsConstructor 108 | @Wither 109 | public static class Team implements HasId { 110 | public int id; 111 | public String name; 112 | 113 | public Team changeName() { 114 | return withName(faker.team().name()); 115 | } 116 | } 117 | 118 | @Data 119 | @AllArgsConstructor 120 | @NoArgsConstructor 121 | @Wither 122 | @EqualsAndHashCode(of = "id") 123 | public static class Aggregate { 124 | public Member member; 125 | public Address address; 126 | public Team team; 127 | } 128 | 129 | @AllArgsConstructor 130 | enum Operation { 131 | NEW_TEAM(8, INSERT), 132 | NEW_MEMBER(15, INSERT), 133 | TEAM_NAME_CHANGE(20, UPDATE), 134 | DELETE_TEAM(3, DELETE), 135 | DELETE_MEMBER(4, DELETE), 136 | NEW_MARITAL_STATUS(5, UPDATE), 137 | CHANGE_PHONE(2, UPDATE), 138 | CHANGE_ADDRESS_IN_TOWN(5, UPDATE), 139 | CHANGE_CITY(4, UPDATE), 140 | CHANGE_COUNTRY(1, UPDATE), 141 | CHANGE_GENDER(1, UPDATE), 142 | CHANGE_TEAM(5, UPDATE), 143 | CHANGE_ROLE(11, UPDATE), 144 | ANNIVERSARY(2, UPDATE), 145 | NO_OP(100, NONE); 146 | int chanceOfHappening; 147 | OperationMode operationMode; 148 | 149 | boolean fire() { 150 | return random.nextInt(100) <= chanceOfHappening; 151 | } 152 | 153 | public void call(Producer producer, HasId object) { 154 | System.out.println(this + " " + object); 155 | producer.send(new ProducerRecord<>(object.getClass().getSimpleName(), object.getId(), operationMode == DELETE ? null : object)); 156 | producer.send(new ProducerRecord<>(RandomProducerAction.class.getSimpleName(), new RandomProducerAction(this, object.getClass().getSimpleName(), object))); 157 | } 158 | } 159 | 160 | enum OperationMode { 161 | INSERT, 162 | DELETE, 163 | UPDATE, 164 | NONE 165 | } 166 | 167 | @AllArgsConstructor 168 | private static class RandomProducerAction { 169 | public Operation operation; 170 | public String clazz; 171 | public HasId object; 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /step18/streamjoins/src/main/java/com/github/framiere/JsonSerde.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import org.apache.kafka.common.serialization.Deserializer; 5 | import org.apache.kafka.common.serialization.Serde; 6 | import org.apache.kafka.common.serialization.Serializer; 7 | 8 | import java.io.IOException; 9 | import java.util.Map; 10 | 11 | public class JsonSerde implements Serde { 12 | private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); 13 | 14 | private final Class clazz; 15 | 16 | public JsonSerde(Class clazz) { 17 | this.clazz = clazz; 18 | } 19 | 20 | @Override 21 | public void configure(Map configs, boolean isKey) { 22 | } 23 | 24 | @Override 25 | public void close() { 26 | } 27 | 28 | @Override 29 | public Serializer serializer() { 30 | return new JsonSerializer<>(); 31 | } 32 | 33 | @Override 34 | public Deserializer deserializer() { 35 | return new Deserializer() { 36 | @Override 37 | public void configure(Map configs, boolean isKey) { 38 | 39 | } 40 | 41 | @Override 42 | public T deserialize(String topic, byte[] data) { 43 | try { 44 | return data == null ? null : (T) OBJECT_MAPPER.readValue(data, clazz); 45 | } catch (IOException e) { 46 | throw new RuntimeException(e); 47 | } 48 | } 49 | 50 | @Override 51 | public void close() { 52 | 53 | } 54 | }; 55 | } 56 | } -------------------------------------------------------------------------------- /step18/streamjoins/src/main/java/com/github/framiere/JsonSerializer.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | 4 | import com.fasterxml.jackson.core.JsonProcessingException; 5 | import com.fasterxml.jackson.databind.ObjectMapper; 6 | import org.apache.kafka.common.serialization.Serializer; 7 | 8 | import java.util.Map; 9 | 10 | public class JsonSerializer implements Serializer { 11 | private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); 12 | 13 | @Override 14 | public void configure(Map configs, boolean isKey) { 15 | } 16 | 17 | @Override 18 | public byte[] serialize(String topic, T data) { 19 | try { 20 | return data == null ? null : OBJECT_MAPPER.writeValueAsString(data).getBytes(); 21 | } catch (JsonProcessingException e) { 22 | throw new RuntimeException(e); 23 | } 24 | } 25 | 26 | @Override 27 | public void close() { 28 | } 29 | } -------------------------------------------------------------------------------- /step18/streamjoins/src/main/java/com/github/framiere/Runner.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | public class Runner { 4 | public static void main(String[] args) throws Exception { 5 | if (args.length == 0 || args.length > 2) { 6 | System.err.println("required args: [producer|streamer] boostrapservers"); 7 | System.err.println("ex: producer localhost:9092"); 8 | } 9 | String type = args[0]; 10 | switch (type) { 11 | case "producer": 12 | new RandomProducer().produce(args.length == 2 ? args[1] : "localhost:9092"); 13 | break; 14 | case "streamer": 15 | new SimpleJoinStream().stream(args.length == 2 ? args[1] : "localhost:9092"); 16 | break; 17 | default: 18 | throw new IllegalArgumentException(type + "is not supported"); 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /step18/streamjoins/src/main/java/com/github/framiere/SimpleJoinStream.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | import org.apache.kafka.clients.admin.AdminClient; 4 | import org.apache.kafka.clients.consumer.ConsumerConfig; 5 | import org.apache.kafka.clients.producer.ProducerConfig; 6 | import org.apache.kafka.common.serialization.Serdes; 7 | import org.apache.kafka.streams.*; 8 | import org.apache.kafka.streams.kstream.*; 9 | import org.apache.kafka.streams.processor.WallclockTimestampExtractor; 10 | 11 | import java.time.Duration; 12 | import java.util.Properties; 13 | import java.util.Set; 14 | import java.util.concurrent.TimeUnit; 15 | 16 | import static com.github.framiere.Domain.*; 17 | import static java.util.concurrent.TimeUnit.HOURS; 18 | import static java.util.concurrent.TimeUnit.SECONDS; 19 | 20 | public class SimpleJoinStream { 21 | private static long windowRetentionTimeMs = HOURS.toMillis(4); 22 | 23 | public static void main(String[] args) throws Exception { 24 | new SimpleJoinStream().stream(args.length == 1 ? args[0] : "localhost:9092"); 25 | } 26 | 27 | public void stream(String bootstrapServers) throws Exception { 28 | waitForTopics(bootstrapServers); 29 | 30 | // Serializer, Deserializer 31 | JsonSerde memberSerde = new JsonSerde<>(Member.class); 32 | JsonSerde
addressSerde = new JsonSerde<>(Address.class); 33 | JsonSerde teamSerde = new JsonSerde<>(Team.class); 34 | JsonSerde aggregateSerde = new JsonSerde<>(Aggregate.class); 35 | 36 | StreamsBuilder builder = new StreamsBuilder(); 37 | 38 | // Streams the 3 domain model topics 39 | KStream members = builder.stream(Member.class.getSimpleName(), Consumed.with(Serdes.Integer(), memberSerde)); 40 | KStream addresses = builder.stream(Address.class.getSimpleName(), Consumed.with(Serdes.Integer(), addressSerde)); 41 | KStream teams = builder.stream(Team.class.getSimpleName(), Consumed.with(Serdes.Integer(), teamSerde)); 42 | 43 | // SELECT * FROM members m 44 | // INNER JOIN address a ON (m.id = a.id) 45 | // LEFT OUTER JOIN team t on (m.id = t.id) 46 | // WHERE m.age > 18 47 | // AND a.country = "USA" 48 | members 49 | .filter((key, member) -> member != null && member.age > 18) 50 | .join( 51 | addresses.filter((key, address) -> address != null && "USA".equals(address.country)), 52 | (member, address) -> new Aggregate().withMember(member).withAddress(address), 53 | JoinWindows.of(Duration.ofSeconds(30)).grace(Duration.ofMillis(windowRetentionTimeMs)), 54 | Joined.with(Serdes.Integer(), memberSerde, addressSerde)) 55 | .outerJoin( 56 | teams, 57 | (aggregate, team) -> (aggregate == null ? new Aggregate() : aggregate).withTeam(team), 58 | JoinWindows.of(Duration.ofSeconds(50)).grace(Duration.ofMillis(windowRetentionTimeMs)), 59 | Joined.with(Serdes.Integer(), aggregateSerde, teamSerde)) 60 | .peek((k, aggregate) -> System.out.println(aggregate)) 61 | .to(Aggregate.class.getSimpleName(), Produced.with(Serdes.Integer(), aggregateSerde)); 62 | 63 | Topology build = builder.build(); 64 | 65 | System.out.println(build.describe()); 66 | 67 | KafkaStreams kafkaStreams = new KafkaStreams(build, buildProducerProperties(bootstrapServers)); 68 | kafkaStreams.cleanUp(); 69 | kafkaStreams.start(); 70 | } 71 | 72 | private void waitForTopics(String bootstrapServers) throws Exception { 73 | while (true) { 74 | TimeUnit.SECONDS.sleep(5); 75 | Properties properties = new Properties(); 76 | properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 77 | AdminClient adminClient = AdminClient.create(properties); 78 | Set topics = adminClient.listTopics().names().get(); 79 | if (topics.contains(Member.class.getSimpleName())) { 80 | return; 81 | } 82 | System.out.println("Waiting for data"); 83 | } 84 | } 85 | 86 | private Properties buildProducerProperties(String bootstrapServers) { 87 | Properties properties = new Properties(); 88 | properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 89 | properties.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 3); 90 | properties.put(StreamsConfig.APPLICATION_ID_CONFIG, "simple-join-stream"); 91 | properties.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 5 * 1000); 92 | properties.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0); 93 | properties.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, WallclockTimestampExtractor.class); 94 | properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 95 | properties.put(ProducerConfig.BATCH_SIZE_CONFIG, "1"); 96 | properties.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, "snappy"); 97 | return properties; 98 | } 99 | } -------------------------------------------------------------------------------- /step18/streamjoins/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 6 | -------------------------------------------------------------------------------- /step19/README.md: -------------------------------------------------------------------------------- 1 | # Objective 2 | 3 | # Enable CDC 4 | ```sh 5 | $ docker-compose exec connect curl -s -XPOST -H "Content-Type: application/json; charset=UTF-8" http://localhost:8083/connectors/ -d ' 6 | { 7 | "name": "debezium-connector", 8 | "config": { 9 | "connector.class": "io.debezium.connector.mysql.MySqlConnector", 10 | "tasks.max": "1", 11 | "database.hostname": "mysql", 12 | "database.port": "3306", 13 | "database.user": "debezium", 14 | "database.password": "dbz", 15 | "database.server.id": "1", 16 | "database.server.name": "dbserver1", 17 | "database.whitelist": "mydb", 18 | "database.history.kafka.bootstrap.servers": "kafka-1:9092,kafka-2:9092,kafka-3:9092", 19 | "database.history.kafka.topic": "schema-changes.mydb", 20 | "include.schema.changes": "false" , 21 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 22 | "value.converter.schemas.enable": false, 23 | "key.converter": "org.apache.kafka.connect.json.JsonConverter", 24 | "key.converter.schemas.enable": false 25 | } 26 | }' 27 | ``` 28 | 29 | # Enable Push to S3! 30 | 31 | ```sh 32 | $ docker-compose exec connect curl -XPOST -H "Content-Type: application/json; charset=UTF-8" http://localhost:8083/connectors/ -d ' 33 | { 34 | "name": "s3-sink", 35 | "config": { 36 | "connector.class": "io.confluent.connect.s3.S3SinkConnector", 37 | "tasks.max": "1", 38 | "topics.regex": "dbserver1.mydb.*", 39 | "s3.bucket.name": "cdc", 40 | "s3.part.size": "5242880", 41 | "store.url": "http://minio:9000", 42 | "flush.size": "3", 43 | "storage.class": "io.confluent.connect.s3.storage.S3Storage", 44 | "format.class": "io.confluent.connect.s3.format.json.JsonFormat", 45 | "schema.generator.class": "io.confluent.connect.storage.hive.schema.DefaultSchemaGenerator", 46 | "partitioner.class": "io.confluent.connect.storage.partitioner.DefaultPartitioner", 47 | "schema.compatibility": "NONE", 48 | "name": "s3-sink", 49 | "value.converter": "org.apache.kafka.connect.json.JsonConverter", 50 | "value.converter.schemas.enable": false, 51 | "key.converter": "org.apache.kafka.connect.json.JsonConverter", 52 | "key.converter.schemas.enable": false 53 | } 54 | }' 55 | ``` 56 | 57 | ## Show graph dependencies 58 | ``` 59 | $ docker run --rm -it --name dcv -v $(pwd):/input pmsipilot/docker-compose-viz \ 60 | render \ 61 | --horizontal \ 62 | --output-format image \ 63 | --no-volumes \ 64 | --force \ 65 | docker-compose.yml 66 | ``` 67 | 68 | ![No volumes](./docker-compose-novolume.png "No Volume") 69 | 70 | 71 | ## Show graph dependencies with volumes 72 | 73 | ``` 74 | $ docker run --rm -it --name dcv -v $(pwd):/input pmsipilot/docker-compose-viz render --horizontal --output-format image --force docker-compose.yml 75 | ``` 76 | ![Volumes](./docker-compose-volumes.png "Volumes") 77 | 78 | 79 | ## Without Confluent Control Center ? 80 | 81 | ![NoControlCenter](./docker-compose-no-control-center.png "No Control Center") 82 | 83 | What a nice bus! 84 | 85 | 86 | # Ksql 87 | 88 | ``` 89 | $ docker-compose exec ksql ksql 90 | ``` 91 | 92 | ```sql 93 | CREATE STREAM OPERATIONS (operation varchar, clazz varchar) \ 94 | WITH ( kafka_topic='RandomProducerAction',value_format='JSON'); 95 | 96 | CREATE TABLE BY_OPERATION WITH (PARTITIONS=1) AS \ 97 | SELECT operation, count(*) as count \ 98 | FROM OPERATIONS \ 99 | WINDOW TUMBLING (SIZE 20 SECONDS) \ 100 | GROUP BY operation; 101 | 102 | CREATE TABLE BY_CLASS WITH (PARTITIONS=1) AS \ 103 | SELECT clazz, count(*) as count \ 104 | FROM OPERATIONS \ 105 | WINDOW TUMBLING (SIZE 20 SECONDS) \ 106 | GROUP BY clazz; 107 | ``` 108 | 109 | 110 | # Links 111 | 112 | [Confluent Control Center](http://localhost:9021/) 113 | 114 | [Grafana](http://admin:admin@localhost:3000/) 115 | -------------------------------------------------------------------------------- /step19/cdcdiff/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM maven:3.6.1-jdk-8 as mavenBuild 2 | COPY pom.xml pom.xml 3 | COPY src src 4 | RUN ["mvn", "install"] 5 | 6 | FROM confluentinc/cp-base:5.3.1 7 | COPY --from=mavenBuild ./target/*.jar ./ 8 | ENV BROKER_LIST "kafka-1:9092,kafka-2:9092,kafka-3:9092" 9 | ENV JAVA_OPTS "" 10 | CMD [ "bash", "-c", "cub kafka-ready -b ${BROKER_LIST} 3 60 && java ${JAVA_OPTS} -jar *.jar ${BROKER_LIST}" ] 11 | -------------------------------------------------------------------------------- /step19/cdcdiff/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | com.github.framiere 6 | cdc-diff 7 | 1.0.0 8 | jar 9 | 10 | 1.8 11 | 1.8 12 | UTF-8 13 | UTF-8 14 | 15 | 16 | 17 | org.apache.kafka 18 | kafka-clients 19 | 2.3.0 20 | 21 | 22 | org.apache.kafka 23 | kafka-streams 24 | 2.3.0 25 | 26 | 27 | org.projectlombok 28 | lombok 29 | 1.16.20 30 | compile 31 | 32 | 33 | junit 34 | junit 35 | 4.13.1 36 | test 37 | 38 | 39 | com.github.charithe 40 | kafka-junit 41 | 4.1.6 42 | test 43 | 44 | 45 | org.assertj 46 | assertj-core 47 | 3.8.0 48 | test 49 | 50 | 51 | 52 | 53 | 54 | org.apache.maven.plugins 55 | maven-assembly-plugin 56 | 57 | 58 | install 59 | 60 | single 61 | 62 | 63 | false 64 | 65 | 66 | com.github.framiere.Application 67 | 68 | 69 | 70 | jar-with-dependencies 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /step19/cdcdiff/src/main/java/com/github/framiere/Application.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | import org.apache.kafka.clients.admin.AdminClient; 4 | import org.apache.kafka.clients.consumer.ConsumerConfig; 5 | import org.apache.kafka.clients.producer.ProducerConfig; 6 | import org.apache.kafka.common.serialization.Serdes; 7 | import org.apache.kafka.streams.*; 8 | import org.apache.kafka.streams.kstream.Consumed; 9 | import org.apache.kafka.streams.kstream.Produced; 10 | import org.apache.kafka.streams.processor.WallclockTimestampExtractor; 11 | 12 | import java.util.Arrays; 13 | import java.util.List; 14 | import java.util.Properties; 15 | import java.util.concurrent.TimeUnit; 16 | 17 | public class Application { 18 | private static final List TOPICS = Arrays.asList("dbserver1.mydb.Team", "dbserver1.mydb.Member", "dbserver1.mydb.Address"); 19 | private final CdcChange cdcChange = new CdcChange(); 20 | 21 | public static void main(String[] args) throws Exception { 22 | new Application().stream(args.length == 1 ? args[0] : "localhost:9092"); 23 | } 24 | 25 | public void stream(String bootstrapServers) throws Exception { 26 | waitForTopics(bootstrapServers); 27 | 28 | StreamsBuilder builder = new StreamsBuilder(); 29 | builder 30 | .stream(TOPICS, Consumed.with(Serdes.String(), Serdes.String())) 31 | .mapValues(cdcChange::toTelegraf) 32 | .filter((k, v) -> v != null && !v.isEmpty()) 33 | .peek((k, v) -> System.out.println(v)) 34 | .to("telegraf", Produced.with(Serdes.String(), Serdes.String())); 35 | 36 | Topology build = builder.build(); 37 | 38 | System.out.println(build.describe()); 39 | 40 | KafkaStreams kafkaStreams = new KafkaStreams(build, buildProducerProperties(bootstrapServers)); 41 | kafkaStreams.cleanUp(); 42 | kafkaStreams.start(); 43 | } 44 | 45 | private void waitForTopics(String bootstrapServers) throws Exception { 46 | while (true) { 47 | TimeUnit.SECONDS.sleep(5); 48 | Properties properties = new Properties(); 49 | properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 50 | AdminClient adminClient = AdminClient.create(properties); 51 | if (adminClient.listTopics().names().get().containsAll(TOPICS)) { 52 | return; 53 | } 54 | System.out.println("Waiting for data"); 55 | } 56 | } 57 | 58 | private Properties buildProducerProperties(String bootstrapServers) { 59 | Properties properties = new Properties(); 60 | properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 61 | properties.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 3); 62 | properties.put(StreamsConfig.APPLICATION_ID_CONFIG, "cdc-change-stream"); 63 | properties.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 5 * 1000); 64 | properties.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0); 65 | properties.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, WallclockTimestampExtractor.class); 66 | properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 67 | properties.put(ProducerConfig.BATCH_SIZE_CONFIG, "1"); 68 | properties.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, "snappy"); 69 | return properties; 70 | } 71 | } -------------------------------------------------------------------------------- /step19/cdcdiff/src/main/java/com/github/framiere/CdcChange.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | import com.fasterxml.jackson.databind.JsonNode; 4 | import com.fasterxml.jackson.databind.ObjectMapper; 5 | 6 | import java.io.IOException; 7 | import java.util.concurrent.atomic.LongAdder; 8 | 9 | public class CdcChange { 10 | private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); 11 | 12 | public String toTelegraf(String json) { 13 | return json == null ? null : toTelegraf(json.getBytes()); 14 | } 15 | 16 | public String toTelegraf(byte[] json) { 17 | try { 18 | JsonNode payload = OBJECT_MAPPER.readTree(json); 19 | if (payload == null || !payload.has("op") || !payload.has("source") || !payload.has("before")) { 20 | System.err.println("Payload has not the required fields"); 21 | return null; 22 | } 23 | String op = payload.get("op").asText(); 24 | String table = payload.get("source").get("table").asText(); 25 | JsonNode beforeNode = payload.get("before"); 26 | JsonNode afterNode = payload.get("after"); 27 | 28 | String line = "cdc,table=" + table + ",operation="; 29 | switch (op) { 30 | case "u": 31 | return line + "update" + getId(afterNode) + toUpdate(beforeNode, afterNode) + ",found=1 " + getTimeInS(payload); 32 | case "d": 33 | return line + "delete" + getId(beforeNode) + " found=1 " + getTimeInS(payload); 34 | default: 35 | return line + "insert" + getId(afterNode) + " found=1 " + getTimeInS(payload); 36 | } 37 | } catch (IOException e) { 38 | return ""; 39 | } 40 | } 41 | 42 | private String getTimeInS(JsonNode payload) { 43 | return payload.get("ts_ms").asText() + "000000"; 44 | } 45 | 46 | private String getId(JsonNode afterNode) { 47 | String id = ""; 48 | if (afterNode.has("id")) { 49 | id = ",id=" + afterNode.get("id").asText(); 50 | } 51 | return id; 52 | } 53 | 54 | private String toUpdate(JsonNode beforeNode, JsonNode afterNode) { 55 | StringBuilder line = new StringBuilder(); 56 | LongAdder nbChanges = new LongAdder(); 57 | beforeNode.fieldNames() 58 | .forEachRemaining(f -> { 59 | if (!beforeNode.get(f).asText().equals(afterNode.get(f).asText())) { 60 | line.append("," + f + "=updated"); 61 | nbChanges.increment(); 62 | } 63 | } 64 | ); 65 | return line + " nbChanges=" + nbChanges; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /step19/cdcdiff/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 6 | -------------------------------------------------------------------------------- /step19/cdcdiff/src/test/java/com/github/framiere/CdcChangeTest.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | import org.junit.Test; 4 | 5 | import java.io.IOException; 6 | 7 | import static org.assertj.core.api.Assertions.assertThat; 8 | 9 | public class CdcChangeTest { 10 | public CdcChange cdcChange = new CdcChange(); 11 | 12 | @Test 13 | public void update() throws IOException { 14 | assertThat(cdcChange.toTelegraf("" + 15 | "{\n" + 16 | " \"before\": {\n" + 17 | " \"id\": 60,\n" + 18 | " \"name\": \"California horses\"\n" + 19 | " },\n" + 20 | " \"after\": {\n" + 21 | " \"id\": 60,\n" + 22 | " \"name\": \"New Mexico penguins\"\n" + 23 | " },\n" + 24 | " \"source\": {\n" + 25 | " \"version\": \"0.7.3\",\n" + 26 | " \"name\": \"dbserver1\",\n" + 27 | " \"server_id\": 223344,\n" + 28 | " \"ts_sec\": 1520202662,\n" + 29 | " \"gtid\": null,\n" + 30 | " \"file\": \"mysql-bin.000003\",\n" + 31 | " \"pos\": 329994,\n" + 32 | " \"row\": 0,\n" + 33 | " \"snapshot\": null,\n" + 34 | " \"thread\": 2,\n" + 35 | " \"db\": \"mydb\",\n" + 36 | " \"table\": \"Team\"\n" + 37 | " },\n" + 38 | " \"op\": \"u\",\n" + 39 | " \"ts_ms\": 1520202662784\n" + 40 | "}")).isEqualTo("cdc,table=Team,operation=update,id=60,name=updated nbChanges=1,found=1 1520202662784000000"); 41 | } 42 | 43 | @Test 44 | public void delete() { 45 | assertThat(cdcChange.toTelegraf("" + 46 | "{\n" + 47 | " \"before\": {\n" + 48 | " \"id\": 46,\n" + 49 | " \"name\": \"Louisiana warlocks\"\n" + 50 | " },\n" + 51 | " \"after\": null,\n" + 52 | " \"source\": {\n" + 53 | " \"version\": \"0.7.3\",\n" + 54 | " \"name\": \"dbserver1\",\n" + 55 | " \"server_id\": 223344,\n" + 56 | " \"ts_sec\": 1520202505,\n" + 57 | " \"gtid\": null,\n" + 58 | " \"file\": \"mysql-bin.000003\",\n" + 59 | " \"pos\": 293861,\n" + 60 | " \"row\": 0,\n" + 61 | " \"snapshot\": null,\n" + 62 | " \"thread\": 2,\n" + 63 | " \"db\": \"mydb\",\n" + 64 | " \"table\": \"Team\"\n" + 65 | " },\n" + 66 | " \"op\": \"d\",\n" + 67 | " \"ts_ms\": 1520202505258\n" + 68 | "}\n")).isEqualTo("cdc,table=Team,operation=delete,id=46 found=1 1520202505258000000"); 69 | } 70 | 71 | @Test 72 | public void insert() { 73 | assertThat(cdcChange.toTelegraf("" + 74 | "{\n" + 75 | " \"before\": null,\n" + 76 | " \"after\": {\n" + 77 | " \"id\": 57,\n" + 78 | " \"name\": \"Florida dragons\"\n" + 79 | " },\n" + 80 | " \"source\": {\n" + 81 | " \"version\": \"0.7.3\",\n" + 82 | " \"name\": \"dbserver1\",\n" + 83 | " \"server_id\": 223344,\n" + 84 | " \"ts_sec\": 1520202543,\n" + 85 | " \"gtid\": null,\n" + 86 | " \"file\": \"mysql-bin.000003\",\n" + 87 | " \"pos\": 301020,\n" + 88 | " \"row\": 0,\n" + 89 | " \"snapshot\": null,\n" + 90 | " \"thread\": 2,\n" + 91 | " \"db\": \"mydb\",\n" + 92 | " \"table\": \"Team\"\n" + 93 | " },\n" + 94 | " \"op\": \"c\",\n" + 95 | " \"ts_ms\": 1520202543833\n" + 96 | "}")).isEqualTo("cdc,table=Team,operation=insert,id=57 found=1 1520202543833000000"); 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /step19/docker-compose-no-control-center.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step19/docker-compose-no-control-center.png -------------------------------------------------------------------------------- /step19/docker-compose-novolumne.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step19/docker-compose-novolumne.png -------------------------------------------------------------------------------- /step19/docker-compose-volumes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step19/docker-compose-volumes.png -------------------------------------------------------------------------------- /step19/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.4' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | healthcheck: 9 | test: ["CMD", "bash", "-c", "echo ruok | nc localhost 2181 | grep imok"] 10 | start_period: 30s 11 | 12 | kafka-1: 13 | image: confluentinc/cp-kafka:5.3.1 14 | hostname: kafka-1 15 | depends_on: 16 | - zookeeper 17 | environment: 18 | KAFKA_BROKER_ID: 1 19 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 20 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 21 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 22 | healthcheck: 23 | test: ["CMD", "nc", "127.0.0.1", "9092"] 24 | start_period: 30s 25 | 26 | kafka-2: 27 | image: confluentinc/cp-kafka:5.3.1 28 | hostname: kafka-2 29 | depends_on: 30 | - zookeeper 31 | environment: 32 | KAFKA_BROKER_ID: 2 33 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 34 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 35 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 36 | healthcheck: 37 | test: ["CMD", "nc", "127.0.0.1", "9092"] 38 | start_period: 30s 39 | 40 | kafka-3: 41 | image: confluentinc/cp-kafka:5.3.1 42 | hostname: kafka-3 43 | depends_on: 44 | - zookeeper 45 | environment: 46 | KAFKA_BROKER_ID: 3 47 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 48 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 49 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 50 | healthcheck: 51 | test: ["CMD", "nc", "127.0.0.1", "9092"] 52 | start_period: 30s 53 | 54 | mysql: 55 | image: mysql:5.7 56 | volumes: 57 | - ../step11/mysql.cnf:/etc/mysql/conf.d/custom.cnf 58 | - ./mysql-init.sql:/docker-entrypoint-initdb.d/mysql-init.sql 59 | ports: 60 | - "3306:3306" 61 | environment: 62 | MYSQL_ROOT_PASSWORD: password 63 | MYSQL_DATABASE: db 64 | MYSQL_USER: user 65 | MYSQL_PASSWORD: password 66 | MYSQL_ROOT_HOST: 172.% # Allow docker containers to connect to mysql 67 | 68 | minio: 69 | image: minio/minio 70 | ports: 71 | - "9000:9000" 72 | environment: 73 | MINIO_ACCESS_KEY: AKIAIOSFODNN7EXAMPLE 74 | MINIO_SECRET_KEY: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY 75 | command: server /data 76 | 77 | create-buckets: 78 | image: minio/mc 79 | depends_on: 80 | - minio 81 | entrypoint: > 82 | /bin/sh -c " 83 | /usr/bin/mc config host add myminio http://minio:9000 AKIAIOSFODNN7EXAMPLE wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY; 84 | while ! /usr/bin/mc mb myminio/cdc; do 85 | sleep 5 86 | done 87 | exit 0; 88 | " 89 | 90 | list-buckets: 91 | image: minio/mc 92 | depends_on: 93 | - minio 94 | entrypoint: > 95 | /bin/sh -c " 96 | /usr/bin/mc config host add myminio http://minio:9000 AKIAIOSFODNN7EXAMPLE wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY; 97 | while ! /usr/bin/mc ls -r myminio/cdc; do 98 | sleep 5 99 | done 100 | exit 0; 101 | " 102 | 103 | connect: 104 | image: confluentinc/cp-kafka-connect:5.3.1 105 | hostname: connect 106 | restart: always 107 | ports: 108 | - "8083:8083" 109 | depends_on: 110 | - zookeeper 111 | - kafka-1 112 | - kafka-2 113 | - kafka-3 114 | - mysql 115 | - minio 116 | environment: 117 | CONNECT_BOOTSTRAP_SERVERS: "kafka-1:9092,kafka-2:9092,kafka-3:9092" 118 | CONNECT_GROUP_ID: "connect" 119 | CONNECT_CONFIG_STORAGE_TOPIC: connect-config 120 | CONNECT_OFFSET_STORAGE_TOPIC: connect-offsets 121 | CONNECT_STATUS_STORAGE_TOPIC: connect-status 122 | CONNECT_REPLICATION_FACTOR: 2 123 | CONNECT_KEY_CONVERTER: org.apache.kafka.connect.json.JsonConverter 124 | CONNECT_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter 125 | CONNECT_INTERNAL_KEY_CONVERTER: org.apache.kafka.connect.json.JsonConverter 126 | CONNECT_INTERNAL_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter 127 | CONNECT_REST_ADVERTISED_HOST_NAME: "connect" 128 | CONNECT_PLUGIN_PATH: "/usr/share/java" 129 | CONNECT_LOG4J_LOGGERS: org.reflections=ERROR 130 | AWS_ACCESS_KEY_ID: AKIAIOSFODNN7EXAMPLE 131 | AWS_SECRET_ACCESS_KEY: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY 132 | volumes: 133 | - ../step9/mysql-connector-java-5.1.45-bin.jar:/usr/share/java/kafka-connect-jdbc/mysql-connector-java-5.1.45-bin.jar 134 | - ../step11/debezium-connector-mysql:/usr/share/java/debezium-connector-mysql 135 | healthcheck: 136 | test: ["CMD", "nc", "127.0.0.1", "8083"] 137 | start_period: 30s 138 | 139 | producer: 140 | build: ../step18/streamjoins/ 141 | environment: 142 | ACTION: producer 143 | depends_on: 144 | - kafka-1 145 | - kafka-2 146 | - kafka-3 147 | 148 | operationstomysql: 149 | build: operationtomysql/ 150 | depends_on: 151 | - mysql 152 | 153 | cdcdiff: 154 | build: cdcdiff/ 155 | depends_on: 156 | - kafka-1 157 | - kafka-2 158 | - kafka-3 159 | 160 | influxdb: 161 | image: influxdb:1.5.2 162 | ports: 163 | - "8086:8086" 164 | healthcheck: 165 | test: ["CMD", "influx", "-host", "127.0.0.1", "-port", "8086", "-execute", "SHOW DATABASES"] 166 | start_period: 30s 167 | 168 | grafana: 169 | image: grafana/grafana:5.1.2 170 | ports: 171 | - "3000:3000" 172 | depends_on: 173 | - influxdb 174 | environment: 175 | GF_INSTALL_PLUGINS: jdbranham-diagram-panel,novalabs-annotations-panel,vonage-status-panel,bessler-pictureit-panel,grafana-piechart-panel 176 | healthcheck: 177 | test: ["CMD", "curl", "-f", "http://localhost:3000"] 178 | start_period: 30s 179 | 180 | grafana-setup: 181 | build: ../step8/grafana-setup/ 182 | depends_on: 183 | - grafana 184 | 185 | telegraf-topic: 186 | image: confluentinc/cp-kafka:5.3.1 187 | command: bash -c "cub kafka-ready -z zookeeper:2181 3 120 && kafka-topics --zookeeper zookeeper:2181 --create --topic telegraf --partitions 10 --replication-factor 3" 188 | depends_on: 189 | - zookeeper 190 | - kafka-1 191 | - kafka-2 192 | - kafka-3 193 | 194 | kafka-to-influxdb: 195 | image: telegraf:1.8 196 | restart: unless-stopped 197 | volumes: 198 | - ../step8/telegraf-kafka-to-influxdb.conf:/etc/telegraf/telegraf.conf:ro 199 | depends_on: 200 | - kafka-1 201 | - kafka-2 202 | - kafka-3 203 | - influxdb 204 | 205 | ksql-json-to-influxdb: 206 | image: telegraf:1.8 207 | restart: unless-stopped 208 | volumes: 209 | - ./ksql-json-to-influxdb.conf:/etc/telegraf/telegraf.conf:ro 210 | depends_on: 211 | - kafka-1 212 | - kafka-2 213 | - kafka-3 214 | - influxdb 215 | 216 | ksql: 217 | image: confluentinc/cp-ksql-server:5.3.1 218 | hostname: ksql-cli 219 | depends_on: 220 | - kafka-1 221 | - kafka-2 222 | - kafka-3 223 | ports: 224 | - "8088:8088" 225 | environment: 226 | KSQL_BOOTSTRAP_SERVERS: kafka-1:9092 227 | KSQL_LISTENERS: http://0.0.0.0:8088 228 | 229 | control-center: 230 | image: confluentinc/cp-enterprise-control-center:5.3.1 231 | container_name: control-center 232 | restart: always 233 | depends_on: 234 | - zookeeper 235 | - kafka-1 236 | - kafka-2 237 | - kafka-3 238 | - ksql 239 | ports: 240 | - "9021:9021" 241 | environment: 242 | CONTROL_CENTER_BOOTSTRAP_SERVERS: "kafka-1:9091,kafka-2:9092" 243 | CONTROL_CENTER_ZOOKEEPER_CONNECT: "zookeeper:2181" 244 | CONTROL_CENTER_KSQL_URL: "http://ksql:8088" 245 | CONTROL_CENTER_REST_LISTENERS: "http://0.0.0.0:9021" 246 | CONTROL_CENTER_KSQL_ADVERTISED_URL: "http://localhost:8088" 247 | CONTROL_CENTER_REPLICATION_FACTOR: 3 248 | -------------------------------------------------------------------------------- /step19/ksql-json-to-influxdb.conf: -------------------------------------------------------------------------------- 1 | [agent] 2 | interval = "2s" 3 | flush_interval= "2s" 4 | 5 | [[inputs.kafka_consumer]] 6 | topics = ["BY_OPERATION","BY_CLASS"] 7 | brokers = ["kafka-1:9092","kafka-2:9092","kafka-3:9092"] 8 | consumer_group = "ksql-computed-json-to-influxdb" 9 | offset = "oldest" 10 | data_format = "json" 11 | name_suffix = "_ksql" 12 | tag_keys = [ 13 | "OPERATION", 14 | "CLASS" 15 | ] 16 | 17 | [[outputs.influxdb]] 18 | urls = ["http://influxdb:8086"] 19 | database = "telegraf" 20 | 21 | [[outputs.file]] 22 | files=["stdout"] 23 | -------------------------------------------------------------------------------- /step19/mysql-init.sql: -------------------------------------------------------------------------------- 1 | GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'replicator' IDENTIFIED BY 'replpass'; 2 | 3 | GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'debezium' IDENTIFIED BY 'dbz'; 4 | 5 | CREATE DATABASE mydb; 6 | 7 | GRANT ALL PRIVILEGES ON mydb.* TO 'user'@'%'; 8 | 9 | USE mydb; 10 | 11 | CREATE TABLE Member ( 12 | id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, 13 | firstName VARCHAR(255) NOT NULL, 14 | lastName VARCHAR(255) NOT NULL, 15 | gender VARCHAR(255) NOT NULL, 16 | phone VARCHAR(255) NOT NULL, 17 | maritalStatus VARCHAR(255) NOT NULL, 18 | teamId INT NOT NULL, 19 | age INT NOT NULL, 20 | role VARCHAR(255) NOT NULL 21 | ); 22 | 23 | CREATE TABLE Address ( 24 | id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, 25 | streetName VARCHAR(255) NOT NULL, 26 | streetAddress VARCHAR(255) NOT NULL, 27 | city VARCHAR(255) NOT NULL, 28 | state VARCHAR(255) NOT NULL, 29 | country VARCHAR(255) NOT NULL 30 | ); 31 | 32 | CREATE TABLE Team ( 33 | id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, 34 | name VARCHAR(255) NOT NULL 35 | ); 36 | -------------------------------------------------------------------------------- /step19/operationtomysql/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM maven:3.6.1-jdk-8 as mavenBuild 2 | COPY pom.xml pom.xml 3 | COPY src src 4 | RUN ["mvn", "install"] 5 | 6 | FROM confluentinc/cp-base:5.3.1 7 | COPY --from=mavenBuild ./target/*.jar ./ 8 | ENV BROKER_LIST "kafka-1:9092,kafka-2:9092,kafka-3:9092" 9 | ENV JAVA_OPTS "" 10 | CMD [ "bash", "-c", "cub kafka-ready -b ${BROKER_LIST} 3 60 && java ${JAVA_OPTS} -jar *.jar ${ACTION} ${BROKER_LIST}" ] 11 | -------------------------------------------------------------------------------- /step19/operationtomysql/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | com.github.framiere 6 | operations-to-mysql 7 | 1.0.0 8 | jar 9 | 10 | org.springframework.boot 11 | spring-boot-starter-parent 12 | 2.0.0.RELEASE 13 | 14 | 15 | 16 | 1.8 17 | 1.8 18 | UTF-8 19 | UTF-8 20 | 21 | 22 | 23 | org.apache.kafka 24 | kafka-clients 25 | 2.3.0 26 | 27 | 28 | org.apache.kafka 29 | kafka-streams 30 | 2.3.0 31 | 32 | 33 | org.projectlombok 34 | lombok 35 | 1.16.20 36 | compile 37 | 38 | 39 | org.springframework.boot 40 | spring-boot-starter-data-jpa 41 | 42 | 43 | mysql 44 | mysql-connector-java 45 | 8.0.16 46 | 47 | 48 | javax.xml.bind 49 | jaxb-api 50 | 2.2.11 51 | 52 | 53 | 54 | 55 | 56 | org.springframework.boot 57 | spring-boot-maven-plugin 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /step19/operationtomysql/src/main/java/com/github/framiere/AddressRepository.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | import org.springframework.data.repository.CrudRepository; 4 | import org.springframework.stereotype.Repository; 5 | 6 | @Repository 7 | public interface AddressRepository extends CrudRepository { 8 | 9 | } 10 | -------------------------------------------------------------------------------- /step19/operationtomysql/src/main/java/com/github/framiere/Application.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import org.apache.kafka.clients.admin.AdminClient; 5 | import org.apache.kafka.clients.consumer.ConsumerConfig; 6 | import org.apache.kafka.common.serialization.Serdes; 7 | import org.apache.kafka.streams.*; 8 | import org.apache.kafka.streams.kstream.Consumed; 9 | import org.apache.kafka.streams.processor.WallclockTimestampExtractor; 10 | import org.springframework.beans.factory.annotation.Autowired; 11 | import org.springframework.boot.CommandLineRunner; 12 | import org.springframework.boot.SpringApplication; 13 | import org.springframework.boot.autoconfigure.SpringBootApplication; 14 | import org.springframework.data.jpa.repository.config.EnableJpaRepositories; 15 | import org.springframework.data.repository.CrudRepository; 16 | 17 | import java.io.IOException; 18 | import java.util.Properties; 19 | import java.util.Set; 20 | import java.util.concurrent.TimeUnit; 21 | 22 | import static com.github.framiere.Domain.*; 23 | 24 | @SpringBootApplication 25 | @EnableJpaRepositories 26 | public class Application implements CommandLineRunner { 27 | @Autowired 28 | private TeamRepository teamRepository; 29 | @Autowired 30 | private AddressRepository addressRepository; 31 | @Autowired 32 | private MemberRepository memberRepository; 33 | private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); 34 | 35 | public static void main(String[] args) { 36 | SpringApplication.run(Application.class, args); 37 | } 38 | 39 | @Override 40 | public void run(String... args) throws Exception { 41 | String bootstrapServers = args.length == 1 ? args[0] : "localhost:9092"; 42 | waitForTopics(bootstrapServers); 43 | 44 | JsonSerde randomProducerActionSerde = new JsonSerde<>(RandomProducerAction.class); 45 | 46 | StreamsBuilder builder = new StreamsBuilder(); 47 | builder 48 | .stream(RandomProducerAction.class.getSimpleName(), Consumed.with(Serdes.String(), randomProducerActionSerde)) 49 | .peek((k, action) -> handleProducerAction(action)); 50 | Topology build = builder.build(); 51 | 52 | System.out.println(build.describe()); 53 | 54 | KafkaStreams kafkaStreams = new KafkaStreams(build, buildProducerProperties(bootstrapServers)); 55 | kafkaStreams.cleanUp(); 56 | kafkaStreams.start(); 57 | } 58 | 59 | private void handleProducerAction(RandomProducerAction action) { 60 | try { 61 | System.out.println(action.operation + " " + action.operation.operationMode + " on " + action.clazz); 62 | String content = OBJECT_MAPPER.writeValueAsString(action.object); 63 | switch (action.clazz) { 64 | case "Member": 65 | handleMember(action, content); 66 | break; 67 | case "Team": 68 | handleTeam(action, content); 69 | break; 70 | case "Address": 71 | handleAddress(action, content); 72 | break; 73 | } 74 | } catch (IOException e) { 75 | e.printStackTrace(); 76 | } 77 | } 78 | 79 | private void handleAddress(RandomProducerAction action, String content) throws IOException { 80 | Address address = OBJECT_MAPPER.readValue(content, Address.class); 81 | doOperation(action.operation.operationMode, addressRepository, address, address.id); 82 | } 83 | 84 | private void handleTeam(RandomProducerAction action, String content) throws IOException { 85 | Team team = OBJECT_MAPPER.readValue(content, Team.class); 86 | doOperation(action.operation.operationMode, teamRepository, team, team.id); 87 | } 88 | 89 | private void handleMember(RandomProducerAction action, String content) throws IOException { 90 | Member member = OBJECT_MAPPER.readValue(content, Member.class); 91 | doOperation(action.operation.operationMode, memberRepository, member, member.id); 92 | } 93 | 94 | private void waitForTopics(String bootstrapServers) throws Exception { 95 | while (true) { 96 | TimeUnit.SECONDS.sleep(5); 97 | Properties properties = new Properties(); 98 | properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 99 | AdminClient adminClient = AdminClient.create(properties); 100 | Set topics = adminClient.listTopics().names().get(); 101 | if (topics.contains(RandomProducerAction.class.getSimpleName())) { 102 | return; 103 | } 104 | System.out.println("Waiting for data"); 105 | } 106 | } 107 | 108 | public void doOperation(OperationMode operationMode, CrudRepository repo, T object, ID id) { 109 | switch (operationMode) { 110 | case UPDATE: 111 | case INSERT: 112 | repo.save(object); 113 | break; 114 | case DELETE: 115 | if (repo.existsById(id)) { 116 | repo.deleteById(id); 117 | } 118 | break; 119 | default: 120 | throw new IllegalArgumentException(operationMode + " is not supported"); 121 | } 122 | } 123 | 124 | private Properties buildProducerProperties(String bootstrapServers) { 125 | Properties properties = new Properties(); 126 | properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 127 | properties.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 3); 128 | properties.put(StreamsConfig.APPLICATION_ID_CONFIG, "operations-to-mysql"); 129 | properties.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 5 * 1000); 130 | properties.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0); 131 | properties.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, WallclockTimestampExtractor.class); 132 | properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 133 | return properties; 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /step19/operationtomysql/src/main/java/com/github/framiere/Domain.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.EqualsAndHashCode; 6 | import lombok.NoArgsConstructor; 7 | import lombok.experimental.Wither; 8 | 9 | import javax.persistence.Entity; 10 | import javax.persistence.Id; 11 | import javax.persistence.Table; 12 | 13 | import static com.github.framiere.Domain.OperationMode.*; 14 | 15 | public class Domain { 16 | 17 | @Data 18 | @Entity 19 | @Table(name = "Member") 20 | @EqualsAndHashCode(of = "id") 21 | @NoArgsConstructor 22 | @AllArgsConstructor 23 | @Wither 24 | public static class Member { 25 | @Id 26 | public int id; 27 | public String firstname; 28 | public String lastname; 29 | public Gender gender; 30 | public String phone; 31 | public MaritalStatus maritalStatus; 32 | public int teamId; 33 | public int age; 34 | public Role role; 35 | } 36 | 37 | @Data 38 | @Entity 39 | @Table(name = "Address") 40 | @EqualsAndHashCode(of = "id") 41 | @NoArgsConstructor 42 | @AllArgsConstructor 43 | @Wither 44 | public static class Address { 45 | @Id 46 | public int id; 47 | public String streetName; 48 | public String streetAddress; 49 | public String city; 50 | public String state; 51 | public String country; 52 | } 53 | 54 | public enum MaritalStatus { 55 | MARRIED, 56 | SINGLE, 57 | DIVORCED, 58 | WIDOWED 59 | } 60 | 61 | public enum Role { 62 | DEVELOPER, 63 | QA, 64 | ARCHITECT, 65 | MANAGER 66 | } 67 | 68 | public enum Gender { 69 | MALE, 70 | FEMALE, 71 | THIRD 72 | } 73 | 74 | @Data 75 | @Entity 76 | @Table(name = "Team") 77 | @EqualsAndHashCode(of = "id") 78 | @NoArgsConstructor 79 | @AllArgsConstructor 80 | @Wither 81 | public static class Team { 82 | @Id 83 | public int id; 84 | public String name; 85 | } 86 | 87 | @Data 88 | @AllArgsConstructor 89 | @NoArgsConstructor 90 | @Wither 91 | @EqualsAndHashCode(of = "id") 92 | public static class Aggregate { 93 | public Member member; 94 | public Address address; 95 | public Team team; 96 | } 97 | 98 | @AllArgsConstructor 99 | enum Operation { 100 | NEW_TEAM(8, INSERT), 101 | NEW_MEMBER(15, INSERT), 102 | TEAM_NAME_CHANGE(20, UPDATE), 103 | DELETE_TEAM(3, DELETE), 104 | DELETE_MEMBER(4, DELETE), 105 | NEW_MARITAL_STATUS(5, UPDATE), 106 | CHANGE_PHONE(2, UPDATE), 107 | CHANGE_ADDRESS_IN_TOWN(5, UPDATE), 108 | CHANGE_CITY(4, UPDATE), 109 | CHANGE_COUNTRY(1, UPDATE), 110 | CHANGE_GENDER(1, UPDATE), 111 | CHANGE_TEAM(5, UPDATE), 112 | CHANGE_ROLE(11, UPDATE), 113 | ANNIVERSARY(2, UPDATE), 114 | NO_OP(100, NONE); 115 | int chanceOfHappening; 116 | OperationMode operationMode; 117 | } 118 | 119 | enum OperationMode { 120 | INSERT, 121 | DELETE, 122 | UPDATE, 123 | NONE 124 | } 125 | 126 | @Data 127 | @AllArgsConstructor 128 | @NoArgsConstructor 129 | public static class RandomProducerAction { 130 | public Operation operation; 131 | public String clazz; 132 | public Object object; 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /step19/operationtomysql/src/main/java/com/github/framiere/JsonSerde.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import org.apache.kafka.common.serialization.Deserializer; 5 | import org.apache.kafka.common.serialization.Serde; 6 | import org.apache.kafka.common.serialization.Serializer; 7 | 8 | import java.io.IOException; 9 | import java.util.Map; 10 | 11 | public class JsonSerde implements Serde { 12 | private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); 13 | 14 | private final Class clazz; 15 | 16 | public JsonSerde(Class clazz) { 17 | this.clazz = clazz; 18 | } 19 | 20 | @Override 21 | public void configure(Map configs, boolean isKey) { 22 | } 23 | 24 | @Override 25 | public void close() { 26 | } 27 | 28 | @Override 29 | public Serializer serializer() { 30 | return new JsonSerializer<>(); 31 | } 32 | 33 | @Override 34 | public Deserializer deserializer() { 35 | return new Deserializer() { 36 | @Override 37 | public void configure(Map configs, boolean isKey) { 38 | 39 | } 40 | 41 | @Override 42 | public T deserialize(String topic, byte[] data) { 43 | try { 44 | return data == null ? null : (T) OBJECT_MAPPER.readValue(data, clazz); 45 | } catch (IOException e) { 46 | throw new RuntimeException(e); 47 | } 48 | } 49 | 50 | @Override 51 | public void close() { 52 | 53 | } 54 | }; 55 | } 56 | } -------------------------------------------------------------------------------- /step19/operationtomysql/src/main/java/com/github/framiere/JsonSerializer.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | 4 | import com.fasterxml.jackson.core.JsonProcessingException; 5 | import com.fasterxml.jackson.databind.ObjectMapper; 6 | import org.apache.kafka.common.serialization.Serializer; 7 | 8 | import java.util.Map; 9 | 10 | public class JsonSerializer implements Serializer { 11 | private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); 12 | 13 | @Override 14 | public void configure(Map configs, boolean isKey) { 15 | } 16 | 17 | @Override 18 | public byte[] serialize(String topic, T data) { 19 | try { 20 | return data == null ? null : OBJECT_MAPPER.writeValueAsString(data).getBytes(); 21 | } catch (JsonProcessingException e) { 22 | throw new RuntimeException(e); 23 | } 24 | } 25 | 26 | @Override 27 | public void close() { 28 | } 29 | } -------------------------------------------------------------------------------- /step19/operationtomysql/src/main/java/com/github/framiere/MemberRepository.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | import org.springframework.data.repository.CrudRepository; 4 | import org.springframework.stereotype.Repository; 5 | 6 | @Repository 7 | public interface MemberRepository extends CrudRepository { 8 | 9 | } 10 | -------------------------------------------------------------------------------- /step19/operationtomysql/src/main/java/com/github/framiere/TeamRepository.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | import org.springframework.data.repository.CrudRepository; 4 | import org.springframework.stereotype.Repository; 5 | 6 | @Repository 7 | public interface TeamRepository extends CrudRepository { 8 | 9 | } 10 | -------------------------------------------------------------------------------- /step19/operationtomysql/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | spring.datasource.url=jdbc:mysql://mysql/mydb?user=user&password=password&useSSL=false 2 | spring.jpa.hibernate.naming.implicit-strategy=org.hibernate.boot.model.naming.ImplicitNamingStrategyLegacyJpaImpl 3 | spring.jpa.hibernate.naming.physical-strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl 4 | spring.jpa.show-sql=true 5 | spring.jpa.properties.hibernate.format_sql=true 6 | logging.level.org.hibernate.SQL=DEBUG 7 | logging.level.org.hibernate.type=TRACE -------------------------------------------------------------------------------- /step19/operationtomysql/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 6 | -------------------------------------------------------------------------------- /step2/README.md: -------------------------------------------------------------------------------- 1 | # Objective 2 | 3 | 1. Run a zookeeper and many kafka brokers 4 | 5 | # `docker-compose` 6 | 7 | In this example, we'll focus on kafka, no need to setup many zookeeper, it would be the same 8 | 9 | ```yml 10 | version: '2' 11 | services: 12 | zookeeper: 13 | image: confluentinc/cp-zookeeper:5.3.1 14 | hostname: zookeeper 15 | environment: 16 | ZOOKEEPER_CLIENT_PORT: 2181 17 | 18 | kafka-1: 19 | image: confluentinc/cp-kafka:5.3.1 20 | hostname: kafka-1 21 | depends_on: 22 | - zookeeper 23 | environment: 24 | KAFKA_BROKER_ID: 1 25 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 26 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 27 | 28 | kafka-2: 29 | image: confluentinc/cp-kafka:5.3.1 30 | hostname: kafka-2 31 | depends_on: 32 | - zookeeper 33 | environment: 34 | KAFKA_BROKER_ID: 2 35 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 36 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 37 | 38 | kafka-3: 39 | image: confluentinc/cp-kafka:5.3.1 40 | hostname: kafka-3 41 | depends_on: 42 | - zookeeper 43 | environment: 44 | KAFKA_BROKER_ID: 3 45 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 46 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 47 | ``` 48 | 49 | You can discover the configuration options at 50 | 51 | * https://docs.confluent.io/current/installation/docker/config-reference.html#zk-configuration 52 | * https://docs.confluent.io/current/installation/docker/config-reference.html#confluent-kafka-configuration 53 | 54 | Note: Please note `KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR` is not defined anymore, we have now enough kafka brokers to satisfy the defaut replication factor of 3. 55 | 56 | then run it 57 | 58 | ```sh 59 | $ docker-compose up -d 60 | $ docker-compose ps 61 | Name Command State Ports 62 | -------------------------------------------------------------------------------------------------- 63 | step2_kafka-1_1 /etc/confluent/docker/run Up 64 | step2_kafka-2_1 /etc/confluent/docker/run Up 65 | step2_kafka-3_1 /etc/confluent/docker/run Up 66 | step2_zookeeper_1 /etc/confluent/docker/run Up 0.0.0.0:2181->2181/tcp, 2888/tcp, 3888/tcp 67 | ``` 68 | 69 | Fine, looks like `zookeeper` and multiple `kafka` are up. 70 | 71 | 72 | # Let's see if everything is working 73 | 74 | Let's send a message 75 | ```sh 76 | $ docker-compose exec kafka-1 bash -c "echo story | kafka-console-producer --broker-list localhost:9092 --topic sample" 77 | >>% 78 | ``` 79 | 80 | And retrieve it 81 | 82 | ```sh 83 | $ docker-compose exec kafka-1 kafka-console-consumer --bootstrap-server localhost:9092 --topic sample --from-beginning --max-messages=1 84 | story 85 | Processed a total of 1 messages 86 | ``` 87 | 88 | Ok all good 89 | 90 | Let's see the topics 91 | 92 | ```sh 93 | $ docker-compose exec kafka-1 kafka-topics --zookeeper zookeeper:2181 --list 94 | __consumer_offsets 95 | sample 96 | ``` 97 | 98 | And focus on `sample` 99 | 100 | ``` 101 | $ docker-compose exec kafka-1 kafka-topics --zookeeper zookeeper:2181 --describe --topic sample 102 | Topic:sample PartitionCount:1 ReplicationFactor:1 Configs: 103 | Topic: sample Partition: 0 Leader: 1 Replicas: 1 Isr: 1 104 | ``` 105 | 106 | -------------------------------------------------------------------------------- /step2/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | 9 | kafka-1: 10 | image: confluentinc/cp-kafka:5.3.1 11 | hostname: kafka-1 12 | depends_on: 13 | - zookeeper 14 | environment: 15 | KAFKA_BROKER_ID: 1 16 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 17 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 18 | 19 | kafka-2: 20 | image: confluentinc/cp-kafka:5.3.1 21 | hostname: kafka-2 22 | depends_on: 23 | - zookeeper 24 | environment: 25 | KAFKA_BROKER_ID: 2 26 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 27 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 28 | 29 | kafka-3: 30 | image: confluentinc/cp-kafka:5.3.1 31 | hostname: kafka-3 32 | depends_on: 33 | - zookeeper 34 | environment: 35 | KAFKA_BROKER_ID: 3 36 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 37 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 -------------------------------------------------------------------------------- /step3/README.md: -------------------------------------------------------------------------------- 1 | # Objective 2 | 3 | 1. Creating a simple java producer 4 | 1. Creating a simple java consumer 5 | 6 | # Docker-compose 7 | 8 | We are adding only 9 | 10 | ```yml 11 | producer: 12 | build: producer/ 13 | depends_on: 14 | - kafka-1 15 | - kafka-2 16 | - kafka-3 17 | 18 | consumer: 19 | build: consumer/ 20 | depends_on: 21 | - kafka-1 22 | - kafka-2 23 | - kafka-3 24 | ``` 25 | 26 | We are using the [`build`](https://docs.docker.com/compose/compose-file/compose-file-v2/#build) feature of docker compose. 27 | 28 | # Docker multi stage buids 29 | 30 | in `step3/consumer` you'll see a `Dockerfile` using a [multi stage build](https://docs.docker.com/engine/userguide/eng-image/multistage-build/#use-multi-stage-builds) 31 | 32 | The first step is the following 33 | ```Dockerfile 34 | FROM maven:3.6.1-jdk-8 as mavenBuild 35 | COPY pom.xml pom.xml 36 | COPY src src 37 | RUN ["mvn"] 38 | ``` 39 | 40 | This is using a maven image, then copies the pom and sources, then execute mvn that produces a jar in `target/` 41 | 42 | Please note the `as mavenBuild` that will reference all the artefacts of this image. 43 | 44 | The second step is 45 | 46 | ```Dockerfile 47 | FROM java:8 48 | COPY --from=mavenBuild ./target/*.jar ./ 49 | CMD ["bash", "-c", "java -jar *.jar"] 50 | ``` 51 | 52 | Using a java8 image we copy from the jar from the first build using is logical name `mavenBuild`, and specify the CMD. That's it. 53 | 54 | That allows to prevent embedding maven and all its artefacts in the target image. 55 | 56 | # Java 57 | 58 | The consumer is an almost identical copy of https://kafka.apache.org/10/javadoc/index.html?org/apache/kafka/clients/consumer/KafkaConsumer.html 59 | 60 | The producer is an almost identical copy of https://kafka.apache.org/10/javadoc/index.html?org/apache/kafka/clients/consumer/KafkaConsumer.html 61 | 62 | 63 | # Run 64 | 65 | You can then run it by `docker-compose up` 66 | 67 | ``` 68 | $ docker-compose up -d 69 | $ docker-compose ps 70 | Name Command State Ports 71 | ------------------------------------------------------------------------------------- 72 | step3_consumer_1 bash -c java -jar *.jar Up 73 | step3_kafka-1_1 /etc/confluent/docker/run Up 9092/tcp 74 | step3_kafka-2_1 /etc/confluent/docker/run Up 9092/tcp 75 | step3_kafka-3_1 /etc/confluent/docker/run Up 9092/tcp 76 | step3_producer_1 bash -c java -jar *.jar Exit 0 77 | step3_zookeeper_1 /etc/confluent/docker/run Up 2181/tcp, 2888/tcp, 3888/tcp 78 | $ docker-compose logs producer 79 | ... 80 | producer_1 | Sending key 301 Value 301 81 | producer_1 | Sending key 302 Value 302 82 | producer_1 | Sending key 303 Value 303 83 | producer_1 | Sending key 304 Value 304 84 | producer_1 | Sending key 305 Value 305 85 | producer_1 | Sending key 306 Value 306 86 | producer_1 | Sending key 307 Value 307 87 | producer_1 | Sending key 308 Value 308 88 | producer_1 | Sending key 309 Value 309 89 | producer_1 | Sending key 310 Value 310 90 | producer_1 | Sending key 311 Value 311 91 | producer_1 | Sending key 312 Value 312 92 | producer_1 | Sending key 313 Value 313 93 | $ docker-compose logs consumer 94 | Attaching to step3_consumer_1 95 | consumer_1 | log4j:WARN No appenders could be found for logger (org.apache.kafka.clients.consumer.ConsumerConfig). 96 | consumer_1 | log4j:WARN Please initialize the log4j system properly. 97 | consumer_1 | log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info. 98 | consumer_1 | Subscribing to sample 99 | consumer_1 | log4j:WARN No appenders could be found for logger (org.apache.kafka.clients.consumer.ConsumerConfig). 100 | consumer_1 | log4j:WARN Please initialize the log4j system properly. 101 | consumer_1 | log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info. 102 | consumer_1 | Subscribing to sample 103 | consumer_1 | Received offset = 370729, key = key 1, value = Value 1 104 | consumer_1 | Received offset = 370730, key = key 2, value = Value 2 105 | consumer_1 | Received offset = 370731, key = key 3, value = Value 3 106 | consumer_1 | Received offset = 370732, key = key 4, value = Value 4 107 | consumer_1 | Received offset = 370733, key = key 5, value = Value 5 108 | ``` 109 | 110 | -------------------------------------------------------------------------------- /step3/consumer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM maven:3.6.1-jdk-8 as mavenBuild 2 | COPY pom.xml pom.xml 3 | COPY src src 4 | RUN ["mvn", "install"] 5 | 6 | FROM java:8 7 | COPY --from=mavenBuild ./target/*.jar ./ 8 | ENV JAVA_OPTS "" 9 | CMD [ "bash", "-c", "java ${JAVA_OPTS} -jar *.jar"] -------------------------------------------------------------------------------- /step3/consumer/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | com.github.framiere 6 | simple-consumer 7 | 1.0.0 8 | jar 9 | 10 | 1.8 11 | 1.8 12 | UTF-8 13 | UTF-8 14 | 15 | 16 | 17 | org.apache.kafka 18 | kafka-clients 19 | 2.3.0 20 | 21 | 22 | 23 | install 24 | 25 | 26 | org.apache.maven.plugins 27 | maven-assembly-plugin 28 | 29 | 30 | install 31 | 32 | single 33 | 34 | 35 | false 36 | 37 | 38 | com.github.framiere.SimpleConsumer 39 | 40 | 41 | 42 | jar-with-dependencies 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /step3/consumer/src/main/java/com/github/framiere/SimpleConsumer.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | import org.apache.kafka.clients.consumer.ConsumerConfig; 4 | import org.apache.kafka.clients.consumer.ConsumerRecord; 5 | import org.apache.kafka.clients.consumer.ConsumerRecords; 6 | import org.apache.kafka.clients.consumer.KafkaConsumer; 7 | 8 | import java.time.Duration; 9 | import java.util.Arrays; 10 | import java.util.Properties; 11 | 12 | public class SimpleConsumer { 13 | public static void main(String[] args) { 14 | Properties props = new Properties(); 15 | props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka-1:9092,kafka-2:9092,kafka-3:9092"); 16 | props.put(ConsumerConfig.GROUP_ID_CONFIG, "simple-consumer"); 17 | props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer"); 18 | props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer"); 19 | KafkaConsumer consumer = new KafkaConsumer<>(props); 20 | System.out.println("Subscribing to `sample` topic"); 21 | consumer.subscribe(Arrays.asList("sample")); 22 | while (true) { 23 | ConsumerRecords records = consumer.poll(Duration.ofMillis(400)); 24 | for (ConsumerRecord record : records) { 25 | System.out.println("Received offset = " + record.offset() + ", key = " + record.key() + ", value = " + record.value()); 26 | } 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /step3/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | 9 | kafka-1: 10 | image: confluentinc/cp-kafka:5.3.1 11 | hostname: kafka-1 12 | depends_on: 13 | - zookeeper 14 | environment: 15 | KAFKA_BROKER_ID: 1 16 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 17 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 18 | 19 | kafka-2: 20 | image: confluentinc/cp-kafka:5.3.1 21 | hostname: kafka-2 22 | depends_on: 23 | - zookeeper 24 | environment: 25 | KAFKA_BROKER_ID: 2 26 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 27 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 28 | 29 | kafka-3: 30 | image: confluentinc/cp-kafka:5.3.1 31 | hostname: kafka-3 32 | depends_on: 33 | - zookeeper 34 | environment: 35 | KAFKA_BROKER_ID: 3 36 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 37 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 38 | 39 | producer: 40 | build: producer/ 41 | depends_on: 42 | - kafka-1 43 | - kafka-2 44 | - kafka-3 45 | 46 | consumer: 47 | build: consumer/ 48 | depends_on: 49 | - kafka-1 50 | - kafka-2 51 | - kafka-3 52 | -------------------------------------------------------------------------------- /step3/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.github.framiere 7 | kafka-story 8 | 1.0.0 9 | .. 10 | 11 | step3 12 | pom 13 | 14 | consumer 15 | producer 16 | 17 | 18 | -------------------------------------------------------------------------------- /step3/producer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM maven:3.6.1-jdk-8 as mavenBuild 2 | COPY pom.xml pom.xml 3 | COPY src src 4 | RUN ["mvn", "install"] 5 | 6 | FROM java:8 7 | COPY --from=mavenBuild ./target/*.jar ./ 8 | ENV JAVA_OPTS "" 9 | CMD [ "bash", "-c", "java ${JAVA_OPTS} -jar *.jar"] 10 | -------------------------------------------------------------------------------- /step3/producer/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | com.github.framiere 6 | simple-producer 7 | 1.0.0 8 | jar 9 | 10 | 1.8 11 | 1.8 12 | UTF-8 13 | UTF-8 14 | 15 | 16 | 17 | org.apache.kafka 18 | kafka-clients 19 | 1.0.0 20 | 21 | 22 | 23 | install 24 | 25 | 26 | org.apache.maven.plugins 27 | maven-assembly-plugin 28 | 29 | 30 | install 31 | 32 | single 33 | 34 | 35 | false 36 | 37 | 38 | com.github.framiere.SimpleProducer 39 | 40 | 41 | 42 | jar-with-dependencies 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /step3/producer/src/main/java/com/github/framiere/SimpleProducer.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | import org.apache.kafka.clients.producer.KafkaProducer; 4 | import org.apache.kafka.clients.producer.Producer; 5 | import org.apache.kafka.clients.producer.ProducerConfig; 6 | import org.apache.kafka.clients.producer.ProducerRecord; 7 | 8 | import java.util.Properties; 9 | import java.util.concurrent.TimeUnit; 10 | 11 | public class SimpleProducer { 12 | public static void main(String[] args) throws InterruptedException { 13 | Properties props = new Properties(); 14 | 15 | props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka-1:9092,kafka-2:9092,kafka-3:9092"); 16 | props.put(ProducerConfig.ACKS_CONFIG, "all"); 17 | props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer"); 18 | props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer"); 19 | System.out.println("Sending data to `sample` topic"); 20 | try (Producer producer = new KafkaProducer<>(props)) { 21 | int i = 0; 22 | while (true) { 23 | ProducerRecord record = new ProducerRecord<>("sample", "key " + i, "Value " + i); 24 | System.out.println("Sending " + record.key() + " " + record.value()); 25 | producer.send(record); 26 | i++; 27 | TimeUnit.SECONDS.sleep(1); 28 | } 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /step4/README.md: -------------------------------------------------------------------------------- 1 | # Objective 2 | 3 | 1. Gather statistics using telegraf 4 | 5 | # Telegraf 6 | 7 | In this example, we'll add [telegraf](https://github.com/influxdata/telegraf/) that will gather the container metrics 8 | 9 | ```yml 10 | telegraf: 11 | image: telegraf:1.8 12 | restart: unless-stopped 13 | volumes: 14 | - /var/run/docker.sock:/tmp/docker.sock 15 | - ./telegraf.conf:/etc/telegraf/telegraf.conf:ro 16 | links: 17 | - kafka-1 18 | - kafka-2 19 | - kafka-3 20 | ``` 21 | 22 | Note: we specified the `unless-stopped` [restart policy](https://docs.docker.com/compose/compose-file/#restart) as telegraf will fail to start if the cluster is not already ready. 23 | 24 | Note: In order for telegraf to gather docker metrics, we provide it the docker socket as a volume mapping. 25 | 26 | The `telegraf.conf` is the following 27 | 28 | ```conf 29 | [agent] 30 | interval = "5s" 31 | 32 | [[inputs.docker]] 33 | endpoint = "unix:///tmp/docker.sock" 34 | 35 | [[outputs.kafka]] 36 | brokers = ["kafka-1:9092","kafka-2:9092","kafka-3:9092"] 37 | topic = "telegraf" 38 | ``` 39 | 40 | # Let's run it 41 | 42 | ``` 43 | $ docker-compose exec kafka-1 kafka-console-consumer --bootstrap-server kafka-1:9092 --topic telegraf 44 | docker_container_mem,container_version=unknown,com.docker.compose.service=kafka-1,io.confluent.docker=true,com.docker.compose.config-hash=50c870843a239004389079d1baf9067fe0fc0339701905f19af63240a358e2a2,com.docker.compose.container-number=1,com.docker.compose.version=1.17.1,com.docker.compose.project=step4,engine_host=moby,container_name=step4_kafka-1_1,container_image=confluentinc/cp-kafka,io.confluent.docker.git.id=e0c39c6,io.confluent.docker.build.number=None,com.docker.compose.oneoff=False,host=af95c0c993d7 total_pgfault=340388i,total_pgpgin=238572i,total_active_file=180224i,unevictable=0i,inactive_file=294912i,pgpgin=238572i,pgpgout=140181i,rss_huge=0i,total_pgmajfault=0i,total_rss_huge=0i,active_file=180224i,hierarchical_memory_limit=9223372036854771712i,pgmajfault=0i,total_rss=402534400i,total_mapped_file=65536i,max_usage=416272384i,total_active_anon=402452480i,total_inactive_anon=0i,total_inactive_file=294912i,total_unevictable=0i,active_anon=402452480i,rss=402534400i,total_cache=475136i,total_writeback=0i,limit=8096448512i,container_id="37f9bc055227429ee9e0cbb5444c1af3c99746ccda1e17b532e3428f6b969c00",cache=475136i,inactive_anon=0i,mapped_file=65536i,pgfault=340388i,total_pgpgout=140181i,writeback=0i,usage=410337280i,usage_percent=5.062246037784721 1514759002000000000 45 | docker_container_cpu,container_image=telegraf,com.docker.compose.oneoff=False,host=af95c0c993d7,container_name=step4_telegraf_1,com.docker.compose.config-hash=1f7ea37af395ca2db227212f76765d1970dfc55b618b26e39c63b977caa6e015,com.docker.compose.container-number=1,com.docker.compose.service=telegraf,cpu=cpu-total,container_version=1.5,com.docker.compose.project=step4,com.docker.compose.version=1.17.1,engine_host=moby usage_total=150416778i,usage_in_kernelmode=70000000i,throttling_periods=0i,throttling_throttled_periods=0i,usage_percent=0.1428634020618557,usage_in_usermode=80000000i,usage_system=94619910000000i,throttling_throttled_time=0i,container_id="af95c0c993d72f43ca2145af32d723d5ec92dbd387c330491e643286687b05b3" 1514759002000000000 46 | 47 | docker_container_cpu,com.docker.compose.container-number=1,host=af95c0c993d7,container_version=1.5,com.docker.compose.project=step4,com.docker.compose.version=1.17.1,engine_host=moby,container_name=step4_telegraf_1,com.docker.compose.config-hash=1f7ea37af395ca2db227212f76765d1970dfc55b618b26e39c63b977caa6e015,cpu=cpu0,container_image=telegraf,com.docker.compose.oneoff=False,com.docker.compose.service=telegraf usage_total=3819059i,container_id="af95c0c993d72f43ca2145af32d723d5ec92dbd387c330491e643286687b05b3" 1514759002000000000 48 | 49 | docker_container_cpu,com.docker.compose.oneoff=False,com.docker.compose.project=step4,host=af95c0c993d7,container_name=step4_telegraf_1,com.docker.compose.config-hash=1f7ea37af395ca2db227212f76765d1970dfc55b618b26e39c63b977caa6e015,com.docker.compose.service=telegraf,com.docker.compose.version=1.17.1,container_image=telegraf,container_version=1.5,cpu=cpu1,engine_host=moby,com.docker.compose.container-number=1 container_id="af95c0c993d72f43ca2145af32d723d5ec92dbd387c330491e643286687b05b3",usage_total=45071361i 1514759002000000000 50 | ... 51 | ... 52 | ``` 53 | 54 | Fine ! We have real data that mean something. 55 | 56 | # Telegraf topic 57 | 58 | Let's describe the telegraf topic 59 | 60 | ``` 61 | $ docker-compose exec kafka-1 kafka-topics --zookeeper zookeeper:2181 --describe --topic telegraf 62 | Topic:telegraf PartitionCount:1 ReplicationFactor:1 Configs: 63 | Topic: telegraf Partition: 0 Leader: 3 Replicas: 3 Isr: 3 64 | ``` 65 | 66 | `PartitionCount:1` and `ReplicationFactor:1` ? Not a good configuration. 67 | 68 | Let's change that https://docs.confluent.io/current/kafka/post-deployment.html#tweaking-configs-dynamically 69 | and https://docs.confluent.io/current/kafka/post-deployment.html#modifying-topics 70 | 71 | 72 | ``` 73 | $ docker-compose exec kafka-1 kafka-topics --zookeeper zookeeper:2181 --alter --topic telegraf --partitions 10 74 | WARNING: If partitions are increased for a topic that has a key, the partition logic or ordering of the messages will be affected 75 | Adding partitions succeeded! 76 | ``` 77 | 78 | Fine let's do the same for the `replication-factor` right ? 79 | 80 | Well, by reading the [increasing replication factor documentation](https://docs.confluent.io/current/kafka/post-deployment.html#increasing-replication-factor) it seems like it's more complicated than thought. 81 | 82 | Please take the time to understand why it's not as simple as addition partition. 83 | 84 | Do not forget to read the [scaling-the-cluster](https://docs.confluent.io/current/kafka/post-deployment.html#scaling-the-cluster) chapter. 85 | 86 | -------------------------------------------------------------------------------- /step4/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | 9 | kafka-1: 10 | image: confluentinc/cp-kafka:5.3.1 11 | hostname: kafka-1 12 | depends_on: 13 | - zookeeper 14 | environment: 15 | KAFKA_BROKER_ID: 1 16 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 17 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 18 | 19 | kafka-2: 20 | image: confluentinc/cp-kafka:5.3.1 21 | hostname: kafka-2 22 | depends_on: 23 | - zookeeper 24 | environment: 25 | KAFKA_BROKER_ID: 2 26 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 27 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 28 | 29 | kafka-3: 30 | image: confluentinc/cp-kafka:5.3.1 31 | hostname: kafka-3 32 | depends_on: 33 | - zookeeper 34 | environment: 35 | KAFKA_BROKER_ID: 3 36 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 37 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 38 | 39 | telegraf: 40 | image: telegraf:1.8 41 | restart: unless-stopped 42 | volumes: 43 | - /var/run/docker.sock:/tmp/docker.sock 44 | - ./telegraf.conf:/etc/telegraf/telegraf.conf:ro 45 | links: 46 | - kafka-1 47 | - kafka-2 48 | - kafka-3 49 | -------------------------------------------------------------------------------- /step4/telegraf.conf: -------------------------------------------------------------------------------- 1 | [agent] 2 | interval = "10s" 3 | 4 | [[inputs.docker]] 5 | endpoint = "unix:///tmp/docker.sock" 6 | 7 | [[outputs.kafka]] 8 | brokers = ["kafka-1:9092","kafka-2:9092","kafka-3:9092"] 9 | topic = "telegraf" 10 | -------------------------------------------------------------------------------- /step5/README.md: -------------------------------------------------------------------------------- 1 | # Objective 2 | 3 | 1. Better kafka default settings 4 | 5 | # Telegraf 6 | 7 | We want to have better topic defaults, let's look into https://kafka.apache.org/documentation/#brokerconfigs 8 | 9 | Let's add `KAFKA_DEFAULT_REPLICATION_FACTOR: 3` configuration to all kafka brokers, and disable the automatic topic creation with `KAFKA_AUTO_CREATE_TOPICS_ENABLED` 10 | 11 | ```yml 12 | kafka-1: 13 | image: confluentinc/cp-kafka:5.3.1 14 | hostname: kafka-1 15 | depends_on: 16 | - zookeeper 17 | environment: 18 | KAFKA_BROKER_ID: 1 19 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 20 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:19092 21 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 22 | KAFKA_AUTO_CREATE_TOPICS_ENABLED: "false" 23 | ``` 24 | 25 | We also create the `telegraf` topic with the right number of partitions from the get-go, and we rely on the `cub` tool to wait for zookeeper to be up before that. 26 | 27 | ```yml 28 | telegraf-topic: 29 | image: confluentinc/cp-kafka:5.3.1 30 | command: bash -c "cub kafka-ready -z zookeeper:2181 1 30 && kafka-topics --zookeeper zookeeper:2181 --create --topic telegraf --partitions 10 --replication-factor 3" 31 | depends_on: 32 | - zookeeper 33 | ``` 34 | 35 | Let' run it `docker-compose up` and verify 36 | 37 | ``` 38 | $ docker-compose exec kafka-1 kafka-topics \ 39 | --zookeeper zookeeper:2181 \ 40 | --describe \ 41 | --topic telegraf 42 | Topic:telegraf PartitionCount:10 ReplicationFactor:3 Configs: 43 | Topic: telegraf Partition: 0 Leader: 2 Replicas: 2,1,3 Isr: 2,1,3 44 | Topic: telegraf Partition: 1 Leader: 3 Replicas: 3,2,1 Isr: 3,2,1 45 | Topic: telegraf Partition: 2 Leader: 1 Replicas: 1,3,2 Isr: 1,3,2 46 | Topic: telegraf Partition: 3 Leader: 2 Replicas: 2,3,1 Isr: 2,3,1 47 | Topic: telegraf Partition: 4 Leader: 3 Replicas: 3,1,2 Isr: 3,1,2 48 | Topic: telegraf Partition: 5 Leader: 1 Replicas: 1,3,2 Isr: 1,3,2 49 | Topic: telegraf Partition: 6 Leader: 2 Replicas: 2,3,1 Isr: 2,3,1 50 | Topic: telegraf Partition: 7 Leader: 3 Replicas: 3,1,2 Isr: 3,1,2 51 | Topic: telegraf Partition: 8 Leader: 1 Replicas: 1,2,3 Isr: 1,2,3 52 | Topic: telegraf Partition: 9 Leader: 2 Replicas: 2,1,3 Isr: 2,1,3 53 | ``` 54 | 55 | All good! 56 | 57 | -------------------------------------------------------------------------------- /step5/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | 9 | kafka-1: 10 | image: confluentinc/cp-kafka:5.3.1 11 | hostname: kafka-1 12 | depends_on: 13 | - zookeeper 14 | environment: 15 | KAFKA_BROKER_ID: 1 16 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 17 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 18 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 19 | KAFKA_AUTO_CREATE_TOPICS_ENABLED: 'false' 20 | 21 | kafka-2: 22 | image: confluentinc/cp-kafka:5.3.1 23 | hostname: kafka-2 24 | depends_on: 25 | - zookeeper 26 | environment: 27 | KAFKA_BROKER_ID: 2 28 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 29 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 30 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 31 | KAFKA_AUTO_CREATE_TOPICS_ENABLED: 'false' 32 | 33 | kafka-3: 34 | image: confluentinc/cp-kafka:5.3.1 35 | hostname: kafka-3 36 | depends_on: 37 | - zookeeper 38 | environment: 39 | KAFKA_BROKER_ID: 3 40 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 41 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 42 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 43 | KAFKA_AUTO_CREATE_TOPICS_ENABLED: 'false' 44 | 45 | telegraf-topic: 46 | image: confluentinc/cp-kafka:5.3.1 47 | command: bash -c "cub kafka-ready -z zookeeper:2181 1 30 && kafka-topics --zookeeper zookeeper:2181 --create --topic telegraf --partitions 10 --replication-factor 3" 48 | depends_on: 49 | - zookeeper 50 | 51 | telegraf: 52 | image: telegraf:1.8 53 | restart: unless-stopped 54 | volumes: 55 | - /var/run/docker.sock:/tmp/docker.sock 56 | - ./telegraf.conf:/etc/telegraf/telegraf.conf:ro 57 | depends_on: 58 | - kafka-1 59 | - kafka-2 60 | - kafka-3 61 | - telegraf-topic 62 | -------------------------------------------------------------------------------- /step5/telegraf.conf: -------------------------------------------------------------------------------- 1 | [agent] 2 | interval = "10s" 3 | 4 | [[inputs.docker]] 5 | endpoint = "unix:///tmp/docker.sock" 6 | 7 | [[outputs.kafka]] 8 | brokers = ["kafka-1:9092","kafka-2:9092","kafka-3:9092"] 9 | topic = "telegraf" 10 | -------------------------------------------------------------------------------- /step6/README.md: -------------------------------------------------------------------------------- 1 | # Objective 2 | 3 | 1. Streams 4 | 5 | # Docker 6 | 7 | ``` 8 | $ docker-compose build 9 | $ docker-compose up -d 10 | $ docker-compose ps 11 | $ docker-compose exec kafka-1 kafka-topics \ 12 | --zookeeper zookeeper:2181 \ 13 | --list 14 | $ docker-compose exec kafka-1 kafka-console-consumer \ 15 | --bootstrap-server localhost:9092 \ 16 | --topic telegraf-input-by-thread \ 17 | --from-beginning 18 | $ docker-compose exec kafka-1 kafka-console-consumer \ 19 | --bootstrap-server localhost:9092 \ 20 | --topic telegraf-10s-window-count \ 21 | --property print.key=true \ 22 | --value-deserializer org.apache.kafka.common.serialization.LongDeserializer \ 23 | --from-beginning 24 | ``` 25 | 26 | -------------------------------------------------------------------------------- /step6/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | 9 | kafka-1: 10 | image: confluentinc/cp-kafka:5.3.1 11 | hostname: kafka-1 12 | depends_on: 13 | - zookeeper 14 | environment: 15 | KAFKA_BROKER_ID: 1 16 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 17 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 18 | 19 | kafka-2: 20 | image: confluentinc/cp-kafka:5.3.1 21 | hostname: kafka-2 22 | depends_on: 23 | - zookeeper 24 | environment: 25 | KAFKA_BROKER_ID: 2 26 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 27 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 28 | 29 | kafka-3: 30 | image: confluentinc/cp-kafka:5.3.1 31 | hostname: kafka-3 32 | depends_on: 33 | - zookeeper 34 | environment: 35 | KAFKA_BROKER_ID: 3 36 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 37 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 38 | 39 | telegraf-topic: 40 | image: confluentinc/cp-kafka:5.3.1 41 | command: bash -c "cub kafka-ready -z zookeeper:2181 1 30 && kafka-topics --zookeeper zookeeper:2181 --create --topic telegraf --partitions 10 --replication-factor 3" 42 | depends_on: 43 | - zookeeper 44 | 45 | telegraf: 46 | image: telegraf:1.8 47 | restart: unless-stopped 48 | volumes: 49 | - /var/run/docker.sock:/tmp/docker.sock 50 | - ./telegraf.conf:/etc/telegraf/telegraf.conf:ro 51 | depends_on: 52 | - kafka-1 53 | - kafka-2 54 | - kafka-3 55 | - telegraf-topic 56 | 57 | stream: 58 | build: streams/ 59 | depends_on: 60 | - kafka-1 61 | - kafka-2 62 | - kafka-3 63 | - telegraf 64 | -------------------------------------------------------------------------------- /step6/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.github.framiere 7 | kafka-story 8 | 1.0.0 9 | .. 10 | 11 | step6 12 | pom 13 | 14 | streams 15 | 16 | 17 | -------------------------------------------------------------------------------- /step6/streams/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM maven:3.6.1-jdk-8 as mavenBuild 2 | COPY pom.xml pom.xml 3 | COPY src src 4 | RUN ["mvn", "install"] 5 | 6 | FROM confluentinc/cp-base:5.3.1 7 | COPY --from=mavenBuild ./target/*.jar ./ 8 | ENV BROKER_LIST "kafka-1:9092,kafka-2:9092,kafka-3:9092" 9 | ENV JAVA_OPTS "" 10 | CMD [ "bash", "-c", "cub kafka-ready -b ${BROKER_LIST} 3 30 && java ${JAVA_OPTS} -jar *.jar ${BROKER_LIST}" ] 11 | 12 | -------------------------------------------------------------------------------- /step6/streams/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | com.github.framiere 6 | simple-streams 7 | 1.0.0 8 | jar 9 | 10 | 1.8 11 | 1.8 12 | UTF-8 13 | UTF-8 14 | 15 | 16 | 17 | org.apache.kafka 18 | kafka-clients 19 | 2.3.0 20 | 21 | 22 | org.apache.kafka 23 | kafka-streams 24 | 2.3.0 25 | 26 | 27 | 28 | junit 29 | junit 30 | 4.13.1 31 | test 32 | 33 | 34 | com.github.charithe 35 | kafka-junit 36 | 4.1.6 37 | test 38 | 39 | 40 | org.assertj 41 | assertj-core 42 | 3.8.0 43 | test 44 | 45 | 46 | 47 | 48 | 49 | org.apache.maven.plugins 50 | maven-assembly-plugin 51 | 52 | 53 | install 54 | 55 | single 56 | 57 | 58 | false 59 | 60 | 61 | com.github.framiere.SimpleStream 62 | 63 | 64 | 65 | jar-with-dependencies 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /step6/streams/src/main/java/com/github/framiere/SimpleStream.java: -------------------------------------------------------------------------------- 1 | package com.github.framiere; 2 | 3 | import org.apache.kafka.clients.consumer.ConsumerConfig; 4 | import org.apache.kafka.common.serialization.Serdes; 5 | import org.apache.kafka.streams.*; 6 | import org.apache.kafka.streams.kstream.*; 7 | import org.apache.kafka.streams.processor.*; 8 | 9 | import java.time.Duration; 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | import java.util.Properties; 13 | import java.util.concurrent.TimeUnit; 14 | 15 | /** 16 | * ./confluent local destroy 17 | * ./confluent local start 18 | * ./kafka-topics --zookeeper localhost:2181 --create --topic telegraf --partitions 3 --replication-factor 1 19 | * run application java -jar simplestream*.jar 20 | * seq 10000 | ./kafka-console-producer --broker-list localhost:9092 --topic telegraf 21 | * or type yourself words : ./kafka-console-producer --broker-list localhost:9092 --topic telegraf 22 | * ./kafka-topics --zookeeper localhost:2181 --list 23 | * ./kafka-console-consumer --bootstrap-server localhost:9092 --topic telegraf-input-by-thread --from-beginning 24 | * ./kafka-console-consumer --bootstrap-server localhost:9092 --topic telegraf-10s-window-count --property print.key=true --value-deserializer org.apache.kafka.common.serialization.LongDeserializer --from-beginning 25 | */ 26 | public class SimpleStream { 27 | 28 | public void stream(String bootstrapServers) { 29 | Properties properties = new Properties(); 30 | properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); 31 | properties.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 3); 32 | properties.put(StreamsConfig.APPLICATION_ID_CONFIG, "simple-stream"); 33 | properties.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 5 * 1000); 34 | properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 35 | properties.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0); 36 | properties.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, WallclockTimestampExtractor.class); 37 | 38 | StreamsBuilder builder = new StreamsBuilder(); 39 | KStream input = builder.stream("telegraf", Consumed.with(Serdes.String(), Serdes.String())); 40 | 41 | // map each value and add the thread that processed it 42 | input 43 | .mapValues(v -> Thread.currentThread().getName() + " " + v) 44 | .to("telegraf-input-by-thread", Produced.with(Serdes.String(), Serdes.String())); 45 | 46 | // grab the first word as a key, and make a global count out of it, and push the changes to telegraf-global-count 47 | input 48 | .map((key, value) -> new KeyValue<>(value.split("[, ]")[0], 0L)) 49 | .groupByKey(Grouped.with(Serdes.String(), Serdes.Long())) 50 | .count() 51 | .toStream() 52 | .to("telegraf-global-count", Produced.with(Serdes.String(), Serdes.Long())); 53 | 54 | // check with ./kafka-console-consumer --bootstrap-server localhost:9092 --topic telegraf-10s-window-count --property print.key=true --value-deserializer org.apache.kafka.common.serialization.LongDeserializer 55 | // count the first word on a sliding window, and push the changes to telegraf-10s-window-count 56 | input 57 | .map((key, value) -> new KeyValue<>(value.split("[, ]")[0], 1L)) 58 | .groupByKey(Grouped.with(Serdes.String(), Serdes.Long())) 59 | .windowedBy(TimeWindows.of(Duration.ofSeconds(10))) 60 | .count() 61 | .toStream((windowedRegion, count) -> windowedRegion.toString()) 62 | .to("telegraf-10s-window-count", Produced.with(Serdes.String(), Serdes.Long())); 63 | 64 | // you can branch to multiple destinations, please note that the branching happens on first-match: 65 | // A record in the original stream is assigned to the corresponding result 66 | // stream for the first predicate that evaluates to true, and is assigned to this stream only. 67 | // A record will be dropped if none of the predicates evaluate to true. 68 | KStream[] branch = input.branch( 69 | (key, value) -> (value.length() % 3) == 0, 70 | (key, value) -> (value.length() % 5) == 0, 71 | (key, value) -> true); 72 | 73 | branch[0].to("telegraf-length-divisible-by-3", Produced.with(Serdes.String(), Serdes.String())); 74 | branch[1].to("telegraf-length-divisible-by-5", Produced.with(Serdes.String(), Serdes.String())); 75 | branch[2].to("telegraf-length-divisible-by-neither-3-nor-5", Produced.with(Serdes.String(), Serdes.String())); 76 | 77 | // You can also use the low level APIs if you need to handle complex use cases, 78 | input 79 | .process(() -> new AbstractProcessor() { 80 | private final List batch = new ArrayList<>(); 81 | 82 | @Override 83 | public void init(ProcessorContext context) { 84 | super.init(context); 85 | // Punctuator function will be called on the same thread 86 | context().schedule(TimeUnit.SECONDS.toMillis(10), PunctuationType.WALL_CLOCK_TIME, this::flush); 87 | } 88 | 89 | private void flush(long timestamp) { 90 | if (!batch.isEmpty()) { 91 | // sending to an external system ? 92 | System.out.println(timestamp + " " + Thread.currentThread().getName() + " Flushing batch of " + batch.size()); 93 | batch.clear(); 94 | } 95 | } 96 | 97 | @Override 98 | public void process(String key, String value) { 99 | batch.add(value); 100 | context().forward(key, value); 101 | } 102 | }); 103 | 104 | 105 | Topology build = builder.build(); 106 | 107 | System.out.println(build.describe()); 108 | 109 | KafkaStreams kafkaStreams = new KafkaStreams(build, properties); 110 | kafkaStreams.cleanUp(); 111 | kafkaStreams.start(); 112 | } 113 | 114 | public static void main(String[] args) { 115 | String bootstrapServers = args.length == 1 ? args[0] : "localhost:9092"; 116 | System.out.println(bootstrapServers); 117 | new SimpleStream().stream(bootstrapServers); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /step6/streams/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%p %m (%c:%L) %n -------------------------------------------------------------------------------- /step6/telegraf.conf: -------------------------------------------------------------------------------- 1 | [agent] 2 | interval = "10s" 3 | 4 | [[inputs.docker]] 5 | endpoint = "unix:///tmp/docker.sock" 6 | 7 | [[outputs.kafka]] 8 | brokers = ["kafka-1:9092","kafka-2:9092","kafka-3:9092"] 9 | topic = "telegraf" 10 | -------------------------------------------------------------------------------- /step7/README.md: -------------------------------------------------------------------------------- 1 | # Objective 2 | 3 | Capture JMX metrics via Jolokia and Telegraf. 4 | 5 | 6 | # Jolokia 7 | 8 | Jolokia is a tool to access the JMX metrics via HTTP. This is especially useful for non jvm languages such as go. 9 | 10 | Telegraf can access JMX metrics only via http, thus the need for jolokia. 11 | 12 | # Jmx desktop browser 13 | 14 | In order to see the JMX metrics, you can use the venerable [jconsole](https://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html), or a much better tool that is shipped with JDK8 [Java Mission Control](http://www.oracle.com/technetwork/java/javaseproducts/mission-control/java-mission-control-1998576.html) . 15 | 16 | To run it : 17 | 18 | ```` 19 | $ jmc & 20 | ```` 21 | 22 | ![jmc](images/jmc.png) 23 | 24 | 25 | # Use Jolokia to attach to a running Kafka process 26 | 27 | This commands will 28 | 1. download [Confluent distribution](https://www.confluent.io/download/) 29 | 1. download [Jolokia](https://jolokia.org) 30 | 1. unzip the confluent distribution 31 | 1. start Kafka and Zookeeper 32 | 1. see all the running jvm 33 | 1. see the jolokia help output 34 | 1. connect the jolokia java agent to the running Kafka process 35 | 1. test jolokia output 36 | 1. disconnect the jolokia agent from the running Kafka process 37 | 1. stop Kafka and Zookeeper 38 | 39 | ``` 40 | wget http://packages.confluent.io/archive/5.3/confluent-5.3.1-2.12.tar.gz 41 | wget https://repo1.maven.org/maven2/org/jolokia/jolokia-jvm/1.3.7/jolokia-jvm-1.3.7-agent.jar 42 | tar -xf confluent-5.3.1-2.12.tar.gz 43 | curl -L https://cnfl.io/cli | sh -s -- -b confluent-5.3.1/bin 44 | confluent-5.3.1/bin/confluent local start kafka 45 | jps 46 | java -jar jolokia-jvm-1.3.7-agent.jar --help | head -13 47 | java -jar jolokia-jvm-1.3.7-agent.jar start `jps | grep SupportedKafka | cut -d ' ' -f 1` 48 | curl http://127.0.0.1:8778/jolokia/ 49 | java -jar jolokia-jvm-1.3.7-agent.jar stop `jps | grep SupportedKafka | cut -d ' ' -f 1` 50 | confluent-5.3.1/bin/confluent local stop 51 | ``` 52 | 53 | # Telegraf 54 | 55 | Well now we can add the [jolokia2 plugin](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/jolokia2) and setup the metrics we want to gather using jolokia. 56 | 57 | Please make sure you know how to gather tag_keys 58 | 59 | ![mbean info](images/mbean-info.png) 60 | 61 | 62 | ``` 63 | [[inputs.jolokia2_agent]] 64 | urls = ["http://kafka-1:8778/jolokia/","http://kafka-2:8778/jolokia/","http://kafka-3:8778/jolokia/"] 65 | 66 | [[inputs.jolokia2_agent.metric]] 67 | name = "kafka_cluster_partition" 68 | mbean = "kafka.cluster:type=Partition,topic=*,partition=*" 69 | tag_keys = ["type","topic","partition"] 70 | ``` 71 | 72 | 73 | # Splitting the configuration 74 | 75 | As we want to monitor 76 | 77 | * kafka 78 | * zookeeper 79 | * kafka consumer 80 | * docker 81 | 82 | We'll split the inputs definition in multiple files 83 | 84 | ```sh 85 | $ tree telegraf-inputs/ 86 | telegraf-inputs/ 87 | ├── consumer.conf 88 | ├── docker.conf 89 | ├── kafka.conf 90 | └── zookeeper.conf 91 | ``` 92 | 93 | we'll update add `./telegraf-inputs/:/tmp/telegraf-inputs/:ro` in the `volumes` definition. 94 | 95 | And change the default telegraf command to `telegraf --config-directory /tmp/telegraf-inputs` 96 | 97 | ```yml 98 | telegraf: 99 | image: telegraf:1.8 100 | restart: unless-stopped 101 | volumes: 102 | - /var/run/docker.sock:/tmp/docker.sock 103 | - ./telegraf.conf:/etc/telegraf/telegraf.conf:ro 104 | - ./telegraf-inputs/:/tmp/telegraf-inputs/:ro 105 | command: telegraf --config-directory /tmp/telegraf-inputs 106 | depends_on: 107 | - zookeeper 108 | - kafka-1 109 | - kafka-2 110 | - kafka-3 111 | - consumer-1 112 | ``` 113 | 114 | # Integrating the Jolokia agent 115 | 116 | In order to install the [jolokia agent](https://jolokia.org/agent/jvm.html) to our java application we need to set the following environment variable 117 | 118 | ``` 119 | KAFKA_OPTS: "-javaagent:/some_folder/jolokia.jar=host=0.0.0.0" 120 | ``` 121 | 122 | example 123 | 124 | ```yml 125 | consumer-1: 126 | image: confluentinc/cp-kafka:5.3.1 127 | hostname: consumer-3 128 | depends_on: 129 | - zookeeper 130 | command: kafka-console-consumer --bootstrap-server kafka-1:9092,kafka-2:9092,kafka-3:9092 --topic telegraf --from-beginning 131 | environment: 132 | KAFKA_OPTS: "-javaagent:/some_folder/jolokia.jar=host=0.0.0.0" 133 | depends_on: 134 | - kafka-1 135 | - kafka-2 136 | - kafka-3 137 | 138 | ``` 139 | 140 | That is all fine and good, but there is no `/some_folder/jolokia.jar` in the `confluentinc/cp-kafka` image. 141 | 142 | # Docker volumes to the rescue 143 | 144 | In order to solve this issue we'll rely on 145 | * the [volumes](https://docs.docker.com/compose/compose-file/compose-file-v2/#volumes) docker-compose attribute 146 | * the [volumes_from](https://docs.docker.com/compose/compose-file/compose-file-v2/#volumes_from) docker-compose attribute 147 | * the [jolokia/java-jolokia](https://hub.docker.com/r/jolokia/java-jolokia/) image. 148 | 149 | 150 | ```yml 151 | version: '2' 152 | services: 153 | jolokia: 154 | image: jolokia/java-jolokia 155 | volumes: 156 | - /opt/jolokia/ 157 | example: 158 | image: alpine 159 | volumes_from: 160 | - jolokia 161 | command: sh -c "echo `ls /opt/jolokia` is from an external folder" 162 | 163 | ``` 164 | 165 | Here's the output 166 | 167 | ``` 168 | $ docker-compose -f expose-volume.yml up 169 | Starting step7_jolokia_1 ... 170 | Starting step7_jolokia_1 ... done 171 | Starting step7_using_jolokia_1 ... 172 | Starting step7_using_jolokia_1 ... done 173 | Attaching to step7_jolokia_1, step7_using_jolokia_1 174 | jolokia_1 | Jolokia JVM Agent 1.3.1 175 | using_an_external_file_1 | jolokia.jar is from an external folder 176 | step7_jolokia_1 exited with code 0 177 | step7_using_jolokia_1 exited with code 0 178 | 179 | ``` 180 | 181 | We can now reference the `jolokia` volume! 182 | 183 | ```yml 184 | consumer-1: 185 | image: confluentinc/cp-kafka:5.3.1 186 | hostname: consumer-3 187 | depends_on: 188 | - zookeeper 189 | command: kafka-console-consumer --bootstrap-server kafka-1:9092,kafka-2:9092,kafka-3:9092 --topic telegraf --from-beginning 190 | environment: 191 | KAFKA_OPTS: "-javaagent:/opt/jolokia/jolokia.jar=host=0.0.0.0" 192 | volumes_from: 193 | - jolokia 194 | depends_on: 195 | - kafka-1 196 | - kafka-2 197 | - kafka-3 198 | 199 | ``` 200 | 201 | We're done 202 | 203 | 204 | -------------------------------------------------------------------------------- /step7/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | jolokia: 4 | image: jolokia/java-jolokia 5 | volumes: 6 | - /opt/jolokia/ 7 | 8 | zookeeper: 9 | image: confluentinc/cp-zookeeper:5.3.1 10 | hostname: zookeeper 11 | environment: 12 | ZOOKEEPER_CLIENT_PORT: 2181 13 | KAFKA_OPTS: "-javaagent:/opt/jolokia/jolokia.jar=host=0.0.0.0" 14 | volumes_from: 15 | - jolokia 16 | 17 | kafka-1: 18 | image: confluentinc/cp-kafka:5.3.1 19 | hostname: kafka-1 20 | depends_on: 21 | - zookeeper 22 | environment: 23 | KAFKA_BROKER_ID: 1 24 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 25 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 26 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 27 | KAFKA_AUTO_CREATE_TOPICS_ENABLED: 'false' 28 | KAFKA_OPTS: "-javaagent:/opt/jolokia/jolokia.jar=host=0.0.0.0" 29 | volumes_from: 30 | - jolokia 31 | 32 | kafka-2: 33 | image: confluentinc/cp-kafka:5.3.1 34 | hostname: kafka-2 35 | depends_on: 36 | - zookeeper 37 | environment: 38 | KAFKA_BROKER_ID: 2 39 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 40 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 41 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 42 | KAFKA_AUTO_CREATE_TOPICS_ENABLED: 'false' 43 | KAFKA_OPTS: "-javaagent:/opt/jolokia/jolokia.jar=host=0.0.0.0" 44 | volumes_from: 45 | - jolokia 46 | 47 | kafka-3: 48 | image: confluentinc/cp-kafka:5.3.1 49 | hostname: kafka-3 50 | depends_on: 51 | - zookeeper 52 | environment: 53 | KAFKA_BROKER_ID: 3 54 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 55 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 56 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 57 | KAFKA_AUTO_CREATE_TOPICS_ENABLED: 'false' 58 | KAFKA_OPTS: "-javaagent:/opt/jolokia/jolokia.jar=host=0.0.0.0" 59 | volumes_from: 60 | - jolokia 61 | 62 | telegraf-topic: 63 | image: confluentinc/cp-kafka:5.3.1 64 | command: bash -c "cub kafka-ready -z zookeeper:2181 1 30 && kafka-topics --zookeeper zookeeper:2181 --create --topic telegraf --partitions 10 --replication-factor 3" 65 | depends_on: 66 | - zookeeper 67 | 68 | telegraf: 69 | image: telegraf:1.8 70 | restart: unless-stopped 71 | volumes: 72 | - /var/run/docker.sock:/tmp/docker.sock 73 | - ./telegraf.conf:/tmp/telegraf.conf:ro 74 | - ./telegraf-inputs/:/tmp/telegraf-inputs/:ro 75 | command: telegraf -config /tmp/telegraf.conf --config-directory /tmp/telegraf-inputs 76 | depends_on: 77 | - zookeeper 78 | - kafka-1 79 | - kafka-2 80 | - kafka-3 81 | 82 | producer: 83 | build: ../step3/producer 84 | environment: 85 | JAVA_OPTS: "-javaagent:/opt/jolokia/jolokia.jar=host=0.0.0.0" 86 | volumes_from: 87 | - jolokia 88 | depends_on: 89 | - kafka-1 90 | - kafka-2 91 | - kafka-3 92 | 93 | consumer: 94 | build: ../step3/consumer 95 | environment: 96 | JAVA_OPTS: "-javaagent:/opt/jolokia/jolokia.jar=host=0.0.0.0" 97 | volumes_from: 98 | - jolokia 99 | depends_on: 100 | - kafka-1 101 | - kafka-2 102 | - kafka-3 103 | 104 | streams: 105 | build: ../step6/streams 106 | environment: 107 | JAVA_OPTS: "-javaagent:/opt/jolokia/jolokia.jar=host=0.0.0.0" 108 | volumes_from: 109 | - jolokia 110 | depends_on: 111 | - kafka-1 112 | - kafka-2 113 | - kafka-3 114 | - telegraf 115 | -------------------------------------------------------------------------------- /step7/expose-volume.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | jolokia: 4 | image: jolokia/java-jolokia 5 | volumes: 6 | - /opt/jolokia/ 7 | using_an_external_file: 8 | image: alpine 9 | volumes_from: 10 | - jolokia 11 | command: sh -c "echo `ls /opt/jolokia` is from an external folder" 12 | -------------------------------------------------------------------------------- /step7/images/jmc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step7/images/jmc.png -------------------------------------------------------------------------------- /step7/images/mbean-info.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step7/images/mbean-info.png -------------------------------------------------------------------------------- /step7/telegraf-inputs/consumer.conf: -------------------------------------------------------------------------------- 1 | [[inputs.jolokia2_agent]] 2 | urls = ["http://consumer:8778/jolokia/"] 3 | 4 | ## Consumer 5 | [[inputs.jolokia2_agent.metric]] 6 | name = "kafka_consumer_app_info" 7 | mbean = "kafka.consumer:type=app_info,id=*" 8 | tag_keys = ["id"] 9 | [[inputs.jolokia2_agent.metric]] 10 | name = "kafka_consumer_consumer_coordinator_metrics" 11 | mbean = "kafka.consumer:type=consumer-coordinator-metrics,client-id=*" 12 | tag_keys = ["client-id"] 13 | [[inputs.jolokia2_agent.metric]] 14 | name = "kafka_consumer_fetch_manager_metrics" 15 | mbean = "kafka.consumer:type=consumer-fetch-manager-metrics,client-id=*" 16 | tag_keys = ["client-id"] 17 | [[inputs.jolokia2_agent.metric]] 18 | name = "kafka_consumer_metrics" 19 | mbean = "kafka.consumer:type=consumer-metrics,client-id=*" 20 | tag_keys = ["client-id"] 21 | [[inputs.jolokia2_agent.metric]] 22 | name = "kafka_consumer_node_metrics" 23 | mbean = "kafka.consumer:type=consumer-node-metrics,client-id=*,node-id=*" 24 | tag_keys = ["client-id","node-id"] 25 | [[inputs.jolokia2_agent.metric]] 26 | name = "kafka_consumer_kafka_metrics_count" 27 | mbean = "kafka.consumer:type=kafka-metrics-count,client-id=*" 28 | tag_keys = ["client-id"] 29 | 30 | ## JVM 31 | [[inputs.jolokia2_agent.metric]] 32 | name = "java_lang_memory" 33 | mbean = "java.lang:type=Memory" 34 | [[inputs.jolokia2_agent.metric]] 35 | name = "java_lang_memory_pool" 36 | mbean = "java.lang:type=MemoryPool,name=*" 37 | paths = ["Usage"] 38 | tag_keys = ["name"] 39 | [[inputs.jolokia2_agent.metric]] 40 | name = "java_lang_threading" 41 | mbean = "java.lang:type=Threading" 42 | paths = ["ThreadCount","PeakThreadCount","DaemonThreadCount"] 43 | [[inputs.jolokia2_agent.metric]] 44 | name = "java_lang_garbage_collector" 45 | mbean = "java.lang:type=GarbageCollector,name=*" 46 | paths = ["CollectionCount","CollectionTime"] 47 | tag_keys = ["name"] -------------------------------------------------------------------------------- /step7/telegraf-inputs/docker.conf: -------------------------------------------------------------------------------- 1 | [[inputs.docker]] 2 | endpoint = "unix:///tmp/docker.sock" 3 | -------------------------------------------------------------------------------- /step7/telegraf-inputs/kafka.conf: -------------------------------------------------------------------------------- 1 | [[inputs.jolokia2_agent]] 2 | urls = ["http://kafka-1:8778/jolokia/","http://kafka-2:8778/jolokia/","http://kafka-3:8778/jolokia/"] 3 | 4 | ## Cluster 5 | [[inputs.jolokia2_agent.metric]] 6 | name = "kafka_cluster_partition" 7 | mbean = "kafka.cluster:type=Partition,topic=*,partition=*" 8 | tag_keys = ["type","topic","partition"] 9 | ## Controller 10 | [[inputs.jolokia2_agent.metric]] 11 | name = "kafka_controller_controller_channel_manager_by_broker" 12 | mbean = "kafka.controller:type=ControllerChannelManager,broker-id=0" 13 | tag_keys = ["broker-id"] 14 | [[inputs.jolokia2_agent.metric]] 15 | name = "kafka_controller_controller_channel_manager" 16 | mbean = "kafka.controller:type=ControllerChannelManager,name=*" 17 | field_name = "$1" 18 | [[inputs.jolokia2_agent.metric]] 19 | name = "kafka_controller_controller_stats" 20 | mbean = "kafka.controller:type=ControllerStats,name=*" 21 | tag_keys = ["name"] 22 | [[inputs.jolokia2_agent.metric]] 23 | name = "kafka_controller_kafka_controller" 24 | mbean = "kafka.controller:type=KafkaController,name=*" 25 | field_name = "$1" 26 | tag_keys = ["name"] 27 | ## Coordinator group 28 | [[inputs.jolokia2_agent.metric]] 29 | name = "kafka_coordinator_group" 30 | mbean = "kafka.coordinator.group:type=*,name=*" 31 | field_name = "$2" 32 | tag_keys = ["type"] 33 | ## Coordinator transaction 34 | [[inputs.jolokia2_agent.metric]] 35 | name = "kafka_coordinator_transaction" 36 | mbean = "kafka.coordinator.transaction:type=*" 37 | tag_keys = ["type"] 38 | ## Kafka log 39 | [[inputs.jolokia2_agent.metric]] 40 | name = "kafka_log_per_topic" 41 | mbean = "kafka.log:type=Log,topic=*,partition=*,name=*" 42 | field_name = "$3" 43 | tag_keys = ["topic", "partition"] 44 | [[inputs.jolokia2_agent.metric]] 45 | name = "kafka_log_log_cleaner" 46 | mbean = "kafka.log:type=LogCleaner,name=*" 47 | field_name = "$1" 48 | tag_keys = ["type", "name"] 49 | [[inputs.jolokia2_agent.metric]] 50 | name = "kafka_log_log_manager_per_file" 51 | mbean = "kafka.log:type=LogManager,logDirectory=*,name=*" 52 | field_name = "$2" 53 | tag_keys = ["logDirectory", "name"] 54 | [[inputs.jolokia2_agent.metric]] 55 | name = "kafka_log_log_manager" 56 | mbean = "kafka.log:type=LogManager,name=*" 57 | field_name = "$1" 58 | [[inputs.jolokia2_agent.metric]] 59 | name = "kafka_log_log_flush_stats" 60 | mbean = "kafka.log:type=LogFlushStats,name=*" 61 | tag_keys = ["name"] 62 | ## Kafka network 63 | [[inputs.jolokia2_agent.metric]] 64 | name = "kafka_network_per_processor" 65 | mbean = "kafka.network:type=Processor,networkProcessor=*,name=*" 66 | field_name = "$2" 67 | tag_keys = ["networkProcessor", "name"] 68 | [[inputs.jolokia2_agent.metric]] 69 | name = "kafka_network_request_channel_per_processor" 70 | mbean = "kafka.network:type=RequestChannel,processor=*,name=*" 71 | field_name = "$2" 72 | tag_keys = ["processor", "name"] 73 | [[inputs.jolokia2_agent.metric]] 74 | name = "kafka_network_request_channel_per_processor" 75 | mbean = "kafka.network:type=RequestChannel,name=*" 76 | field_name = "$1" 77 | [[inputs.jolokia2_agent.metric]] 78 | name = "kafka_network_request_metrics" 79 | mbean = "kafka.network:type=RequestMetrics,request=*,name=*" 80 | tag_keys = ["name", "request"] 81 | [[inputs.jolokia2_agent.metric]] 82 | name = "kafka_network_socket_server" 83 | mbean = "kafka.network:type=SocketServer,name=*" 84 | field_name = "$1" 85 | ## Kafka Server 86 | [[inputs.jolokia2_agent.metric]] 87 | name = "kafka_server_broker_topic_metrics_per_topic" 88 | mbean = "kafka.server:type=BrokerTopicMetrics,topic=*,name=*" 89 | tag_keys = ["topic", "name"] 90 | [[inputs.jolokia2_agent.metric]] 91 | name = "kafka_server_broker_topic_metrics" 92 | mbean = "kafka.server:type=BrokerTopicMetrics,name=*" 93 | tag_keys = ["name"] 94 | [[inputs.jolokia2_agent.metric]] 95 | name = "kafka_server_delayed_fetch_metrics" 96 | mbean = "kafka.server:type=DelayedFetchMetrics,fetcherType=*,name=*" 97 | tag_keys = ["name", "fetcherType"] 98 | [[inputs.jolokia2_agent.metric]] 99 | name = "kafka_server_delayed_operation_purgatory" 100 | mbean = "kafka.server:type=DelayedOperationPurgatory,delayedOperation=*,name=*" 101 | field_name = "$2" 102 | tag_keys = ["delayedOperation"] 103 | [[inputs.jolokia2_agent.metric]] 104 | name = "kafka_server_kafka_request_handler_pool" 105 | mbean = "kafka.server:type=KafkaRequestHandlerPool,name=*" 106 | tag_keys = ["name"] 107 | [[inputs.jolokia2_agent.metric]] 108 | name = "kafka_server_kafka_server" 109 | mbean = "kafka.server:type=KafkaServer,name=*" 110 | field_name = "$1" 111 | tag_keys = ["name"] 112 | [[inputs.jolokia2_agent.metric]] 113 | name = "kafka_server_replica_fetcher_manager" 114 | mbean = "kafka.server:type=ReplicaFetcherManager,clientId=*,name=*" 115 | field_name = "$2" 116 | tag_keys = ["clientId"] 117 | [[inputs.jolokia2_agent.metric]] 118 | name = "kafka_server_replica_manager" 119 | mbean = "kafka.server:type=ReplicaManager,name=*" 120 | field_name = "$1" 121 | [[inputs.jolokia2_agent.metric]] 122 | name = "kafka_server_session_expire_listener" 123 | mbean = "kafka.server:type=SessionExpireListener" 124 | [[inputs.jolokia2_agent.metric]] 125 | name = "kafka_server_zookeeper_client_metrics" 126 | mbean = "kafka.server:type=ZooKeeperClientMetrics,name=*" 127 | tag_keys = ["name"] 128 | [[inputs.jolokia2_agent.metric]] 129 | name = "kafka_server_controller_channel_metrics" 130 | mbean = "kafka.server:type=controller-channel-metrics,broker-id=*" 131 | tag_keys = ["broker-id"] 132 | [[inputs.jolokia2_agent.metric]] 133 | name = "kafka_server_fetch" 134 | mbean = "kafka.server:type=Fetch" 135 | [[inputs.jolokia2_agent.metric]] 136 | name = "kafka_server_produce" 137 | mbean = "kafka.server:type=Produce" 138 | [[inputs.jolokia2_agent.metric]] 139 | name = "kafka_server_request" 140 | mbean = "kafka.server:type=Request" 141 | [[inputs.jolokia2_agent.metric]] 142 | name = "kafka_server_app_info" 143 | mbean = "kafka.server:type=app-info" 144 | [[inputs.jolokia2_agent.metric]] 145 | name = "kafka_server_kafka_metrics_count" 146 | mbean = "kafka.server:type=kafka-metrics-count" 147 | [[inputs.jolokia2_agent.metric]] 148 | name = "kafka_server_socket_server_metrics" 149 | mbean = "kafka.server:type=socket-server-metrics" 150 | [[inputs.jolokia2_agent.metric]] 151 | name = "kafka_server_txn_marker_channel_metrics" 152 | mbean = "kafka.server:type=txn-marker-channel-metrics" 153 | ## Kafka utils 154 | [[inputs.jolokia2_agent.metric]] 155 | name = "kafka_utils_throttler" 156 | mbean = "kafka.utils:type=Throttler,name=*" -------------------------------------------------------------------------------- /step7/telegraf-inputs/producer.conf: -------------------------------------------------------------------------------- 1 | [[inputs.jolokia2_agent]] 2 | urls = ["http://producer:8778/jolokia/"] 3 | 4 | ## Producer 5 | [[inputs.jolokia2_agent.metric]] 6 | name = "kafka_producer_app_info" 7 | mbean = "kafka.producer:type=app-info,client-id=*" 8 | tag_keys = ["client-id"] 9 | [[inputs.jolokia2_agent.metric]] 10 | name = "kafka_producer_kafka_metrics_count" 11 | mbean = "kafka.producer:type=kafka-metrics-count,client-id=*" 12 | tag_keys = ["client-id"] 13 | [[inputs.jolokia2_agent.metric]] 14 | name = "kafka_producer_producer_metrics" 15 | mbean = "kafka.producer:type=producer-metrics,client-id=*" 16 | tag_keys = ["client-id"] 17 | [[inputs.jolokia2_agent.metric]] 18 | name = "kafka_producer_producer_node_metrics" 19 | mbean = "kafka.producer:type=producer-node-metrics,client-id=*,node-id=*" 20 | tag_keys = ["client-id", "node-id"] 21 | [[inputs.jolokia2_agent.metric]] 22 | name = "kafka_producer_producer_topic_metrics" 23 | mbean = "kafka.producer:type=producer-topic-metrics,client-id=*,topic=*" 24 | tag_keys = ["client-id", "topic"] 25 | 26 | ## JVM 27 | [[inputs.jolokia2_agent.metric]] 28 | name = "java_lang_memory" 29 | mbean = "java.lang:type=Memory" 30 | [[inputs.jolokia2_agent.metric]] 31 | name = "java_lang_memory_pool" 32 | mbean = "java.lang:type=MemoryPool,name=*" 33 | paths = ["Usage"] 34 | tag_keys = ["name"] 35 | [[inputs.jolokia2_agent.metric]] 36 | name = "java_lang_threading" 37 | mbean = "java.lang:type=Threading" 38 | paths = ["ThreadCount","PeakThreadCount","DaemonThreadCount"] 39 | [[inputs.jolokia2_agent.metric]] 40 | name = "java_lang_garbage_collector" 41 | mbean = "java.lang:type=GarbageCollector,name=*" 42 | paths = ["CollectionCount","CollectionTime"] 43 | tag_keys = ["name"] -------------------------------------------------------------------------------- /step7/telegraf-inputs/streams.conf: -------------------------------------------------------------------------------- 1 | [[inputs.jolokia2_agent]] 2 | urls = ["http://streams:8778/jolokia/"] 3 | 4 | ## The streaming engine is on top of the consumer and the producer apis 5 | 6 | ## Streams 7 | [[inputs.jolokia2_agent.metric]] 8 | name = "kafka_streams_stream_metrics" 9 | mbean = "kafka.streams:type=stream-metrics,client-id=*" 10 | tag_keys = ["client-id"] 11 | [[inputs.jolokia2_agent.metric]] 12 | name = "kafka_streams_stream_processor_node_metrics" 13 | mbean = "kafka.streams:type=stream-processor-node-metrics,client-id=*,task-id=*,processor-node-id=*" 14 | tag_keys = ["client-id","task-id","processor-node-id"] 15 | [[inputs.jolokia2_agent.metric]] 16 | name = "kafka_streams_stream_record_cache_metrics" 17 | mbean = "kafka.streams:type=stream-record-cache-metrics,client-id=*,task-id=*,record-cache-id=*" 18 | tag_keys = ["client-id","task-id","record-cache-id"] 19 | [[inputs.jolokia2_agent.metric]] 20 | name = "kafka_streams_stream_rocksdb_state_metrics" 21 | mbean = "kafka.streams:type=stream-rocksdb-state-metrics,client-id=*,task-id=*,rocksdb-state-id=*" 22 | tag_keys = ["client-id","task-id","rocksdb-state-id"] 23 | [[inputs.jolokia2_agent.metric]] 24 | name = "kafka_streams_stream_rocksdb_window_metrics" 25 | mbean = "kafka.streams:type=stream-rocksdb-window-metrics,client-id=*,rocksdb-window-id=*,task-id=*" 26 | tag_keys = ["client-id","task-id","rocksdb-window-id"] 27 | [[inputs.jolokia2_agent.metric]] 28 | name = "kafka_streams_stream_task_metrics" 29 | mbean = "kafka.streams:type=stream-task-metrics,client-id=*,task-id=*" 30 | tag_keys = ["client-id","task-id"] 31 | [[inputs.jolokia2_agent.metric]] 32 | name = "kafka_streams_kafka_metrics_count" 33 | mbean = "kafka.streams:type=kafka-metrics-count" 34 | 35 | ## Consumer 36 | [[inputs.jolokia2_agent.metric]] 37 | name = "kafka_producer_app_info" 38 | mbean = "kafka.producer:type=app-info,client-id=*" 39 | tag_keys = ["client-id"] 40 | [[inputs.jolokia2_agent.metric]] 41 | name = "kafka_producer_kafka_metrics_count" 42 | mbean = "kafka.producer:type=kafka-metrics-count,client-id=*" 43 | tag_keys = ["client-id"] 44 | [[inputs.jolokia2_agent.metric]] 45 | name = "kafka_producer_producer_metrics" 46 | mbean = "kafka.producer:type=producer-metrics,client-id=*" 47 | tag_keys = ["client-id"] 48 | [[inputs.jolokia2_agent.metric]] 49 | name = "kafka_producer_producer_node_metrics" 50 | mbean = "kafka.producer:type=producer-node-metrics,client-id=*,node-id=*" 51 | tag_keys = ["client-id", "node-id"] 52 | [[inputs.jolokia2_agent.metric]] 53 | name = "kafka_producer_producer_topic_metrics" 54 | mbean = "kafka.producer:type=producer-topic-metrics,client-id=*,topic=*" 55 | tag_keys = ["client-id", "topic"] 56 | 57 | ## Producer 58 | [[inputs.jolokia2_agent.metric]] 59 | name = "kafka_producer_app_info" 60 | mbean = "kafka.producer:type=app-info,client-id=*" 61 | tag_keys = ["client-id"] 62 | [[inputs.jolokia2_agent.metric]] 63 | name = "kafka_producer_kafka_metrics_count" 64 | mbean = "kafka.producer:type=kafka-metrics-count,client-id=*" 65 | tag_keys = ["client-id"] 66 | [[inputs.jolokia2_agent.metric]] 67 | name = "kafka_producer_producer_metrics" 68 | mbean = "kafka.producer:type=producer-metrics,client-id=*" 69 | tag_keys = ["client-id"] 70 | [[inputs.jolokia2_agent.metric]] 71 | name = "kafka_producer_producer_node_metrics" 72 | mbean = "kafka.producer:type=producer-node-metrics,client-id=*,node-id=*" 73 | tag_keys = ["client-id", "node-id"] 74 | [[inputs.jolokia2_agent.metric]] 75 | name = "kafka_producer_producer_topic_metrics" 76 | mbean = "kafka.producer:type=producer-topic-metrics,client-id=*,topic=*" 77 | tag_keys = ["client-id", "topic"] 78 | 79 | ## JVM 80 | [[inputs.jolokia2_agent.metric]] 81 | name = "java_lang_memory" 82 | mbean = "java.lang:type=Memory" 83 | [[inputs.jolokia2_agent.metric]] 84 | name = "java_lang_memory_pool" 85 | mbean = "java.lang:type=MemoryPool,name=*" 86 | paths = ["Usage"] 87 | tag_keys = ["name"] 88 | [[inputs.jolokia2_agent.metric]] 89 | name = "java_lang_threading" 90 | mbean = "java.lang:type=Threading" 91 | paths = ["ThreadCount","PeakThreadCount","DaemonThreadCount"] 92 | [[inputs.jolokia2_agent.metric]] 93 | name = "java_lang_garbage_collector" 94 | mbean = "java.lang:type=GarbageCollector,name=*" 95 | paths = ["CollectionCount","CollectionTime"] 96 | tag_keys = ["name"] 97 | 98 | -------------------------------------------------------------------------------- /step7/telegraf-inputs/zookeeper.conf: -------------------------------------------------------------------------------- 1 | [[inputs.jolokia2_agent]] 2 | urls = ["http://zookeeper:8778/jolokia/"] 3 | 4 | ## Zookeeper 5 | [[inputs.jolokia2_agent.metric]] 6 | name = "zookeeper" 7 | mbean = "org.apache.ZooKeeperService:name0=*" 8 | tag_keys = ["name0"] 9 | [[inputs.jolokia2_agent.metric]] 10 | name = "zookeeper_InMemoryDataTree" 11 | mbean = "org.apache.ZooKeeperService:name0=*,name1=InMemoryDataTree" 12 | tag_keys = ["name0"] 13 | [[inputs.jolokia2_agent.metric]] 14 | name = "zookeeper_per_connection" 15 | mbean = "org.apache.ZooKeeperService:name0=*,name1=Connections,name2=*,name3=*" 16 | tag_keys = ["name0","name2","name3"] 17 | 18 | ## JVM 19 | [[inputs.jolokia2_agent.metric]] 20 | name = "java_lang_memory" 21 | mbean = "java.lang:type=Memory" 22 | [[inputs.jolokia2_agent.metric]] 23 | name = "java_lang_memory_pool" 24 | mbean = "java.lang:type=MemoryPool,name=*" 25 | paths = ["Usage"] 26 | tag_keys = ["name"] 27 | [[inputs.jolokia2_agent.metric]] 28 | name = "java_lang_threading" 29 | mbean = "java.lang:type=Threading" 30 | paths = ["ThreadCount","PeakThreadCount","DaemonThreadCount"] 31 | [[inputs.jolokia2_agent.metric]] 32 | name = "java_lang_garbage_collector" 33 | mbean = "java.lang:type=GarbageCollector,name=*" 34 | paths = ["CollectionCount","CollectionTime"] 35 | tag_keys = ["name"] 36 | 37 | -------------------------------------------------------------------------------- /step7/telegraf.conf: -------------------------------------------------------------------------------- 1 | [agent] 2 | interval = "10s" 3 | flush_interval= "10s" 4 | 5 | [[outputs.kafka]] 6 | brokers = ["kafka-1:9092","kafka-2:9092","kafka-3:9092"] 7 | topic = "telegraf" 8 | 9 | ##[[outputs.file]] 10 | ##files = ["stdout"] -------------------------------------------------------------------------------- /step8/README.md: -------------------------------------------------------------------------------- 1 | # Objective 2 | 3 | Pushing metrics to Influxdb via Kafka, and graphing them in Grafana 4 | 5 | 6 | # Metrics -> Influxdb 7 | 8 | The metrics captured by telegraf to shipped to kafka, we still need to push them to influxdb. 9 | 10 | Let's do that with 11 | 12 | ```yml 13 | kafka-to-influxdb: 14 | image: telegraf:1.8 15 | restart: unless-stopped 16 | volumes: 17 | - ./telegraf-kafka-to-influxdb.conf:/etc/telegraf/telegraf.conf:ro 18 | depends_on: 19 | - kafka-1 20 | - kafka-2 21 | - kafka-3 22 | - influxdb 23 | ``` 24 | 25 | The telegraf configuration is as simple as this 26 | 27 | ``` 28 | [agent] 29 | interval = "5s" 30 | flush_interval= "5s" 31 | 32 | [[inputs.kafka_consumer]] 33 | topics = ["telegraf"] 34 | brokers = ["kafka-1:9092","kafka-2:9092","kafka-3:9092"] 35 | consumer_group = "telegraf-kafka-to-influxdb" 36 | offset = "oldest" 37 | 38 | [[outputs.influxdb]] 39 | urls = ["http://influxdb:8086"] 40 | database = "telegraf" 41 | ``` 42 | 43 | # Grafana 44 | 45 | see https://github.com/framiere/monitoring-demo 46 | 47 | 48 | # Docker-compose 49 | 50 | As we are using volumes down, to tear down this step please use 51 | 52 | ``` 53 | $ docker-compose down --volumes 54 | ``` -------------------------------------------------------------------------------- /step8/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.4' 2 | services: 3 | jolokia: 4 | image: jolokia/java-jolokia 5 | volumes: 6 | - jolokia:/opt/jolokia/ 7 | 8 | zookeeper: 9 | image: confluentinc/cp-zookeeper:5.3.1 10 | hostname: zookeeper 11 | environment: 12 | ZOOKEEPER_CLIENT_PORT: 2181 13 | KAFKA_OPTS: "-javaagent:/opt/jolokia/jolokia.jar=host=0.0.0.0" 14 | volumes: 15 | - jolokia:/opt/jolokia/ 16 | healthcheck: 17 | test: ["CMD", "bash", "-c", "echo ruok | nc localhost 2181 | grep imok"] 18 | start_period: 30s 19 | 20 | kafka-1: 21 | image: confluentinc/cp-kafka:5.3.1 22 | hostname: kafka-1 23 | depends_on: 24 | - zookeeper 25 | environment: 26 | KAFKA_BROKER_ID: 1 27 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 28 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 29 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 30 | KAFKA_OPTS: "-javaagent:/opt/jolokia/jolokia.jar=host=0.0.0.0" 31 | volumes: 32 | - jolokia:/opt/jolokia/ 33 | healthcheck: 34 | test: ["CMD", "nc", "127.0.0.1", "9092"] 35 | start_period: 30s 36 | 37 | kafka-2: 38 | image: confluentinc/cp-kafka:5.3.1 39 | hostname: kafka-2 40 | depends_on: 41 | - zookeeper 42 | environment: 43 | KAFKA_BROKER_ID: 2 44 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 45 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 46 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 47 | KAFKA_OPTS: "-javaagent:/opt/jolokia/jolokia.jar=host=0.0.0.0" 48 | volumes: 49 | - jolokia:/opt/jolokia/ 50 | healthcheck: 51 | test: ["CMD", "nc", "127.0.0.1", "9092"] 52 | start_period: 30s 53 | 54 | kafka-3: 55 | image: confluentinc/cp-kafka:5.3.1 56 | hostname: kafka-3 57 | depends_on: 58 | - zookeeper 59 | environment: 60 | KAFKA_BROKER_ID: 3 61 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 62 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 63 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 64 | KAFKA_OPTS: "-javaagent:/opt/jolokia/jolokia.jar=host=0.0.0.0" 65 | volumes: 66 | - jolokia:/opt/jolokia/ 67 | healthcheck: 68 | test: ["CMD", "nc", "127.0.0.1", "9092"] 69 | start_period: 30s 70 | 71 | telegraf-topic: 72 | image: confluentinc/cp-kafka:5.3.1 73 | command: bash -c "cub kafka-ready -z zookeeper:2181 3 120 && kafka-topics --zookeeper zookeeper:2181 --create --topic telegraf --partitions 10 --replication-factor 3" 74 | depends_on: 75 | - zookeeper 76 | - kafka-1 77 | - kafka-2 78 | - kafka-3 79 | 80 | telegraf: 81 | image: telegraf:1.8 82 | restart: unless-stopped 83 | volumes: 84 | - /var/run/docker.sock:/tmp/docker.sock 85 | - ../step7/telegraf.conf:/tmp/telegraf.conf:ro 86 | - ../step7/telegraf-inputs/:/tmp/telegraf-inputs/:ro 87 | command: telegraf -config /tmp/telegraf.conf --config-directory /tmp/telegraf-inputs 88 | depends_on: 89 | - zookeeper 90 | - kafka-1 91 | - kafka-2 92 | - kafka-3 93 | 94 | producer: 95 | build: ../step3/producer 96 | environment: 97 | JAVA_OPTS: "-javaagent:/opt/jolokia/jolokia.jar=host=0.0.0.0" 98 | volumes: 99 | - jolokia:/opt/jolokia/ 100 | depends_on: 101 | - kafka-1 102 | - kafka-2 103 | - kafka-3 104 | 105 | consumer: 106 | build: ../step3/consumer 107 | environment: 108 | JAVA_OPTS: "-javaagent:/opt/jolokia/jolokia.jar=host=0.0.0.0" 109 | volumes: 110 | - jolokia:/opt/jolokia/ 111 | depends_on: 112 | - kafka-1 113 | - kafka-2 114 | - kafka-3 115 | 116 | streams: 117 | build: ../step6/streams 118 | restart: unless-stopped 119 | environment: 120 | JAVA_OPTS: "-javaagent:/opt/jolokia/jolokia.jar=host=0.0.0.0" 121 | volumes: 122 | - jolokia:/opt/jolokia/ 123 | depends_on: 124 | - kafka-1 125 | - kafka-2 126 | - kafka-3 127 | - telegraf 128 | 129 | influxdb: 130 | image: influxdb:1.4.3 131 | ports: 132 | - "8086:8086" 133 | healthcheck: 134 | test: ["CMD", "influx", "-host", "127.0.0.1", "-port", "8086", "-execute", "SHOW DATABASES"] 135 | start_period: 30s 136 | 137 | chronograf: 138 | image: chronograf:1.4.0 139 | environment: 140 | INFLUXDB_URL: http://influxdb:8086 141 | ports: 142 | - "8888:8888" 143 | depends_on: 144 | - influxdb 145 | 146 | grafana: 147 | image: grafana/grafana:4.6.3 148 | ports: 149 | - "3000:3000" 150 | depends_on: 151 | - influxdb 152 | environment: 153 | GF_INSTALL_PLUGINS: jdbranham-diagram-panel,novalabs-annotations-panel,vonage-status-panel,bessler-pictureit-panel,grafana-piechart-panel 154 | healthcheck: 155 | test: ["CMD", "curl", "-f", "http://localhost:3000"] 156 | start_period: 30s 157 | 158 | grafana-setup: 159 | build: grafana-setup/ 160 | depends_on: 161 | - grafana 162 | 163 | kafka-to-influxdb: 164 | image: telegraf:1.8 165 | restart: unless-stopped 166 | volumes: 167 | - ./telegraf-kafka-to-influxdb.conf:/etc/telegraf/telegraf.conf:ro 168 | depends_on: 169 | - kafka-1 170 | - kafka-2 171 | - kafka-3 172 | - influxdb 173 | 174 | elasticsearch: 175 | image: docker.elastic.co/elasticsearch/elasticsearch:6.1.3 176 | restart: on-failure 177 | ports: 178 | - "9200:9200" 179 | - "9300:9300" 180 | environment: 181 | xpack.security.enabled: "false" 182 | healthcheck: 183 | test: ["CMD", "curl", "-f", "http://localhost:9200"] 184 | start_period: 30s 185 | 186 | kibana: 187 | image: docker.elastic.co/kibana/kibana:6.1.3 188 | restart: on-failure 189 | ports: 190 | - "5601:5601" 191 | environment: 192 | xpack.security.enabled: "false" 193 | depends_on: 194 | - elasticsearch 195 | healthcheck: 196 | test: ["CMD", "curl", "-f", "http://localhost:5601"] 197 | start_period: 30s 198 | 199 | kibana_index_pattern: 200 | image: tutum/curl 201 | command: | 202 | bash -c "sleep 30 ; curl -XPOST 'http://kibana:5601/api/saved_objects/index-pattern/logstash-*' -H 'kbn-version: 6.1.3' -H 'content-type: application/json' --data-binary '{\"attributes\":{\"title\":\"logstash-*\",\"timeFieldName\":\"@timestamp\"}}'" 203 | depends_on: 204 | - kibana 205 | 206 | logstash: 207 | image: logstash:7.0.0 208 | restart: on-failure 209 | command: -e "input { udp { port => 5000 codec => json } } filter { if [docker][image] =~ /^logstash/ { drop { } } } output { elasticsearch { hosts => "elasticsearch" } }" 210 | depends_on: 211 | - elasticsearch 212 | 213 | logspout: 214 | image: bekt/logspout-logstash 215 | restart: on-failure 216 | volumes: 217 | - /var/run/docker.sock:/tmp/docker.sock 218 | environment: 219 | ROUTE_URIS: logstash://logstash:5000 220 | depends_on: 221 | - logstash 222 | 223 | volumes: 224 | jolokia: 225 | -------------------------------------------------------------------------------- /step8/grafana-setup/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM everpeace/curl-jq 2 | 3 | ENV SERVICE_GRAFANA_HOST grafana 4 | ENV SERVICE_GRAFANA_PORT 3000 5 | ENV SERVICE_GRAFANA_USERNAME "admin" 6 | ENV SERVICE_GRAFANA_PASSWORD "admin" 7 | 8 | COPY ./startup.sh /usr/bin/startup.sh 9 | RUN chmod +x /usr/bin/startup.sh 10 | 11 | RUN mkdir /datasources 12 | COPY ./datasources/*.json /datasources/ 13 | 14 | RUN mkdir /alert-channels 15 | COPY ./alert-channels/*.json /alert-channels/ 16 | 17 | RUN mkdir /dashboards 18 | COPY ./dashboards/*.json /dashboards/ 19 | 20 | CMD ["/usr/bin/startup.sh"] -------------------------------------------------------------------------------- /step8/grafana-setup/README.MD: -------------------------------------------------------------------------------- 1 | # Grafana setup 2 | 3 | This docker will wait for grafana to be up and running. 4 | 5 | It will then fetch the `datasources` and `alert-channels` and push them into the grafana api to have a default well behaved grafana. -------------------------------------------------------------------------------- /step8/grafana-setup/alert-channels/email.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "email", 3 | "type": "email", 4 | "isDefault": true, 5 | "settings": { 6 | "addresses": "team@email.com", 7 | "uploadImage": true 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /step8/grafana-setup/alert-channels/pager-duty.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pagerduty", 3 | "type": "pagerduty", 4 | "settings": { 5 | "integrationKey": "YOUR_INTEGRATION_KEY", 6 | "uploadImage": true 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /step8/grafana-setup/dashboards/README.MD: -------------------------------------------------------------------------------- 1 | sDo your shopping at https://grafana.com/dashboards and save the json in this folder 2 | 3 | Please remove the 4 | 5 | ``` 6 | "__inputs": [ 7 | { 8 | "name": "influxdb", 9 | "label": "InfluxDB_telegraf", 10 | "description": "", 11 | "type": "datasource", 12 | "pluginId": "influxdb", 13 | "pluginName": "InfluxDB" 14 | } 15 | ], 16 | ``` 17 | 18 | and replace all the `${influxdb}` by your corresponding datasource name -------------------------------------------------------------------------------- /step8/grafana-setup/datasources/elasticsearch.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "elasticsearch", 3 | "type": "elasticsearch", 4 | "url": "http://elasticsearch:9200", 5 | "access": "proxy", 6 | "jsonData": { 7 | "timeField": "@timestamp" 8 | }, 9 | "database": "logstash-*" 10 | } 11 | -------------------------------------------------------------------------------- /step8/grafana-setup/datasources/influxdb.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "influxdb", 3 | "isDefault": true, 4 | "type": "influxdb", 5 | "url": "http://influxdb:8086", 6 | "access": "proxy", 7 | "database": "telegraf" 8 | } -------------------------------------------------------------------------------- /step8/grafana-setup/startup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -x 3 | 4 | if [ -z "${SERVICE_GRAFANA_USERNAME}" ] ; then 5 | GRAFANA_AUTH="" 6 | else 7 | GRAFANA_AUTH="${SERVICE_GRAFANA_USERNAME}:${SERVICE_GRAFANA_PASSWORD}@" 8 | fi 9 | GRAFANA_URL=http://${GRAFANA_AUTH}${SERVICE_GRAFANA_HOST}:${SERVICE_GRAFANA_PORT} 10 | 11 | function waitForGrafana { 12 | while : ; 13 | do 14 | curl ${GRAFANA_URL} --output /dev/null 15 | if [ $? -eq 0 ] ; then 16 | break; 17 | fi 18 | sleep 1 19 | done 20 | } 21 | 22 | waitForGrafana 23 | 24 | mkdir /grafana 25 | 26 | for datasource in /datasources/*json ; do 27 | curl -XPOST ${GRAFANA_URL}/api/datasources/ -H 'Content-Type: application/json;charset=UTF-8' --output /dev/null -d @${datasource} 28 | done 29 | 30 | for alertChannel in /alert-channels/*json ; do 31 | curl -XPOST ${GRAFANA_URL}/api/alert-notifications/ -H 'Content-Type: application/json;charset=UTF-8' --output /dev/null -d @${alertChannel} 32 | done 33 | 34 | for dashboard in /dashboards/*json ; do 35 | curl -XPOST ${GRAFANA_URL}/api/dashboards/db/ -H 'Content-Type: application/json;charset=UTF-8' --output /dev/null -d @${dashboard} 36 | done -------------------------------------------------------------------------------- /step8/telegraf-kafka-to-influxdb.conf: -------------------------------------------------------------------------------- 1 | [agent] 2 | interval = "5s" 3 | flush_interval= "5s" 4 | 5 | [[inputs.kafka_consumer]] 6 | topics = ["telegraf"] 7 | brokers = ["kafka-1:9092","kafka-2:9092","kafka-3:9092"] 8 | consumer_group = "telegraf-kafka-to-influxdb" 9 | offset = "oldest" 10 | 11 | [[outputs.influxdb]] 12 | urls = ["http://influxdb:8086"] 13 | database = "telegraf" 14 | -------------------------------------------------------------------------------- /step9/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.4' 2 | services: 3 | zookeeper: 4 | image: confluentinc/cp-zookeeper:5.3.1 5 | hostname: zookeeper 6 | environment: 7 | ZOOKEEPER_CLIENT_PORT: 2181 8 | healthcheck: 9 | test: ["CMD", "bash", "-c", "echo ruok | nc localhost 2181 | grep imok"] 10 | start_period: 30s 11 | 12 | kafka-1: 13 | image: confluentinc/cp-kafka:5.3.1 14 | hostname: kafka-1 15 | depends_on: 16 | - zookeeper 17 | environment: 18 | KAFKA_BROKER_ID: 1 19 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 20 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092 21 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 22 | healthcheck: 23 | test: ["CMD", "nc", "127.0.0.1", "9092"] 24 | start_period: 30s 25 | 26 | kafka-2: 27 | image: confluentinc/cp-kafka:5.3.1 28 | hostname: kafka-2 29 | depends_on: 30 | - zookeeper 31 | environment: 32 | KAFKA_BROKER_ID: 2 33 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 34 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092 35 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 36 | healthcheck: 37 | test: ["CMD", "nc", "127.0.0.1", "9092"] 38 | start_period: 30s 39 | 40 | kafka-3: 41 | image: confluentinc/cp-kafka:5.3.1 42 | hostname: kafka-3 43 | depends_on: 44 | - zookeeper 45 | environment: 46 | KAFKA_BROKER_ID: 3 47 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 48 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092 49 | KAFKA_DEFAULT_REPLICATION_FACTOR: 3 50 | healthcheck: 51 | test: ["CMD", "nc", "127.0.0.1", "9092"] 52 | start_period: 30s 53 | 54 | mysql: 55 | image: mysql:5.7 56 | volumes: 57 | - ../step9/mysql-init.sql:/docker-entrypoint-initdb.d/mysql-init.sql 58 | environment: 59 | MYSQL_ROOT_PASSWORD: password 60 | MYSQL_DATABASE: db 61 | MYSQL_USER: user 62 | MYSQL_PASSWORD: password 63 | 64 | connect: 65 | image: confluentinc/cp-kafka-connect:5.3.1 66 | hostname: connect 67 | restart: always 68 | ports: 69 | - "8083:8083" 70 | depends_on: 71 | - zookeeper 72 | - kafka-1 73 | - kafka-2 74 | - kafka-3 75 | - mysql 76 | environment: 77 | CONNECT_BOOTSTRAP_SERVERS: "kafka-1:9092,kafka-2:9092,kafka-3:9092" 78 | CONNECT_GROUP_ID: "connect" 79 | CONNECT_CONFIG_STORAGE_TOPIC: connect-config 80 | CONNECT_OFFSET_STORAGE_TOPIC: connect-offsets 81 | CONNECT_STATUS_STORAGE_TOPIC: connect-status 82 | CONNECT_REPLICATION_FACTOR: 2 83 | CONNECT_KEY_CONVERTER: "org.apache.kafka.connect.storage.StringConverter" 84 | CONNECT_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 85 | CONNECT_INTERNAL_KEY_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 86 | CONNECT_INTERNAL_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 87 | CONNECT_REST_ADVERTISED_HOST_NAME: "connect" 88 | CONNECT_PLUGIN_PATH: "/usr/share/java" 89 | CONNECT_LOG4J_LOGGERS: org.reflections=ERROR 90 | volumes: 91 | - ./mysql-connector-java-5.1.45-bin.jar:/usr/share/java/kafka-connect-jdbc/mysql-connector-java-5.1.45-bin.jar 92 | healthcheck: 93 | test: ["CMD", "nc", "127.0.0.1", "8083"] 94 | start_period: 30s 95 | -------------------------------------------------------------------------------- /step9/mysql-connector-java-5.1.45-bin.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/framiere/a-kafka-story/7cb5f0e6323bab10b44386b0aeab9a6ee00bfead/step9/mysql-connector-java-5.1.45-bin.jar -------------------------------------------------------------------------------- /step9/mysql-init.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE IF NOT EXISTS db; 2 | 3 | USE db; 4 | 5 | CREATE TABLE IF NOT EXISTS application ( 6 | id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, 7 | name VARCHAR(255) NOT NULL, 8 | team_email VARCHAR(255) NOT NULL, 9 | last_modified DATETIME NOT NULL 10 | ); 11 | 12 | 13 | INSERT INTO application ( 14 | id, 15 | name, 16 | team_email, 17 | last_modified 18 | ) VALUES ( 19 | 1, 20 | 'kafka', 21 | 'kafka@apache.org', 22 | NOW() 23 | ); 24 | --------------------------------------------------------------------------------