├── README.md ├── debezium-jdbc-es └── Dockerfile ├── docker-compose.yaml ├── postgres ├── Dockerfile ├── fake.sql ├── init-db.sh └── setup.sql ├── reqs ├── connections │ ├── es-sink-comments.json │ ├── es-sink-posts.json │ ├── es-sink-users.json │ └── source.json └── mappings │ ├── comments.json │ ├── posts.json │ └── users.json └── start.sh /README.md: -------------------------------------------------------------------------------- 1 | # Sync PostgreSQL with Elasticsearch via Debezium 2 | 3 | ### Schema 4 | 5 | ``` 6 | +-------------+ 7 | | | 8 | | PostgreSQL | 9 | | | 10 | +------+------+ 11 | | 12 | | 13 | | 14 | +---------------v------------------+ 15 | | | 16 | | Kafka Connect | 17 | | (Debezium, ES connectors) | 18 | | | 19 | +---------------+------------------+ 20 | | 21 | | 22 | | 23 | | 24 | +-------v--------+ 25 | | | 26 | | Elasticsearch | 27 | | | 28 | +----------------+ 29 | 30 | 31 | ``` 32 | We are using Docker Compose to deploy the following components: 33 | 34 | * PostgreSQL 35 | * Kafka 36 | * ZooKeeper 37 | * Kafka Broker 38 | * Kafka Connect with [Debezium](http://debezium.io/) and [Elasticsearch](https://github.com/confluentinc/kafka-connect-elasticsearch) Connectors 39 | * Elasticsearch 40 | 41 | ### Usage 42 | 43 | ```shell 44 | docker-compose up --build 45 | 46 | # wait until it's setup 47 | ./start.sh 48 | ``` 49 | 50 | ### Testing 51 | 52 | Check database's content 53 | 54 | ```shell 55 | # Check contents of the PostgreSQL database: 56 | docker-compose exec postgres bash -c 'psql -U $POSTGRES_USER $POSTGRES_DATABASE -c "SELECT * FROM users"' 57 | 58 | # Check contents of the Elasticsearch database: 59 | curl http://localhost:9200/users/_search?pretty 60 | ``` 61 | 62 | Create user 63 | 64 | ```shell 65 | docker-compose exec postgres bash -c 'psql -U $POSTGRES_USER $POSTGRES_DATABASE' 66 | test_db=# INSERT INTO users (email) VALUES ('apple@gmail.com'); 67 | 68 | # Check contents of the Elasticsearch database: 69 | curl http://localhost:9200/users/_search?q=id:6 70 | ``` 71 | 72 | ```json 73 | { 74 | ... 75 | "hits": { 76 | "total": 1, 77 | "max_score": 1.0, 78 | "hits": [ 79 | { 80 | "_index": "users", 81 | "_type": "_doc", 82 | "_id": "6", 83 | "_score": 1.0, 84 | "_source": { 85 | "id": 6, 86 | "email": "apple@gmail.com" 87 | } 88 | } 89 | ] 90 | } 91 | } 92 | ``` 93 | 94 | Update user 95 | 96 | ```shell 97 | test_db=# UPDATE users SET email = 'tesla@gmail.com' WHERE id = 6; 98 | 99 | # Check contents of the Elasticsearch database: 100 | curl http://localhost:9200/users/_search?q=id:6 101 | ``` 102 | 103 | ```json 104 | { 105 | ... 106 | "hits": { 107 | "total": 1, 108 | "max_score": 1.0, 109 | "hits": [ 110 | { 111 | "_index": "users", 112 | "_type": "_doc", 113 | "_id": "6", 114 | "_score": 1.0, 115 | "_source": { 116 | "id": 6, 117 | "email": "tesla@gmail.com" 118 | } 119 | } 120 | ] 121 | } 122 | } 123 | ``` 124 | 125 | Delete user 126 | 127 | ```shell 128 | test_db=# DELETE FROM users WHERE id = 6; 129 | 130 | # Check contents of the Elasticsearch database: 131 | curl http://localhost:9200/users/_search?q=id:6 132 | ``` 133 | 134 | ```json 135 | { 136 | ... 137 | "hits": { 138 | "total": 1, 139 | "max_score": 1.0, 140 | "hits": [] 141 | } 142 | } 143 | ``` 144 | -------------------------------------------------------------------------------- /debezium-jdbc-es/Dockerfile: -------------------------------------------------------------------------------- 1 | #https://raw.githubusercontent.com/debezium/debezium-examples/master/unwrap-smt/debezium-jdbc-es/Dockerfile 2 | 3 | FROM debezium/connect:1.0 4 | ENV KAFKA_CONNECT_JDBC_DIR=$KAFKA_CONNECT_PLUGINS_DIR/kafka-connect-jdbc \ 5 | KAFKA_CONNECT_ES_DIR=$KAFKA_CONNECT_PLUGINS_DIR/kafka-connect-elasticsearch 6 | 7 | ARG POSTGRES_VERSION=42.2.8 8 | ARG KAFKA_JDBC_VERSION=5.3.1 9 | 10 | # Deploy PostgreSQL JDBC Driver 11 | RUN cd /kafka/libs && curl -sO https://jdbc.postgresql.org/download/postgresql-$POSTGRES_VERSION.jar 12 | 13 | # Deploy Kafka Connect JDBC 14 | RUN mkdir $KAFKA_CONNECT_JDBC_DIR && cd $KAFKA_CONNECT_JDBC_DIR &&\ 15 | curl -sO http://packages.confluent.io/maven/io/confluent/kafka-connect-jdbc/$KAFKA_JDBC_VERSION/kafka-connect-jdbc-$KAFKA_JDBC_VERSION.jar 16 | 17 | # Deploy Confluent Elasticsearch sink connector 18 | RUN mkdir $KAFKA_CONNECT_ES_DIR && cd $KAFKA_CONNECT_ES_DIR &&\ 19 | curl -sO http://packages.confluent.io/maven/io/confluent/kafka-connect-elasticsearch/5.0.0/kafka-connect-elasticsearch-5.0.0.jar && \ 20 | curl -sO https://repo.maven.apache.org/maven2/io/searchbox/jest/2.0.0/jest-2.0.0.jar && \ 21 | curl -sO https://repo.maven.apache.org/maven2/org/apache/httpcomponents/httpcore-nio/4.4.4/httpcore-nio-4.4.4.jar && \ 22 | curl -sO https://repo.maven.apache.org/maven2/org/apache/httpcomponents/httpclient/4.5.1/httpclient-4.5.1.jar && \ 23 | curl -sO https://repo.maven.apache.org/maven2/org/apache/httpcomponents/httpasyncclient/4.1.1/httpasyncclient-4.1.1.jar && \ 24 | curl -sO https://repo.maven.apache.org/maven2/org/apache/httpcomponents/httpcore/4.4.4/httpcore-4.4.4.jar && \ 25 | curl -sO https://repo.maven.apache.org/maven2/commons-logging/commons-logging/1.2/commons-logging-1.2.jar && \ 26 | curl -sO https://repo.maven.apache.org/maven2/commons-codec/commons-codec/1.9/commons-codec-1.9.jar && \ 27 | curl -sO https://repo.maven.apache.org/maven2/org/apache/httpcomponents/httpcore/4.4.4/httpcore-4.4.4.jar && \ 28 | curl -sO https://repo.maven.apache.org/maven2/io/searchbox/jest-common/2.0.0/jest-common-2.0.0.jar && \ 29 | curl -sO https://repo.maven.apache.org/maven2/com/google/code/gson/gson/2.4/gson-2.4.jar 30 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | zookeeper: 4 | image: debezium/zookeeper:1.0 5 | restart: always 6 | ports: 7 | - 2181:2181 8 | - 2888:2888 9 | - 3888:3888 10 | 11 | kafka: 12 | image: debezium/kafka:1.0 13 | restart: always 14 | ports: 15 | - 9092:9092 16 | links: 17 | - zookeeper 18 | environment: 19 | - ZOOKEEPER_CONNECT=zookeeper:2181 20 | 21 | postgres: 22 | build: ./postgres 23 | restart: always 24 | ports: 25 | - 5432:5432 26 | environment: 27 | - POSTGRES_USER=postgres 28 | - POSTGRES_PASSWORD=postgres 29 | - POSTGRES_DATABASE=test_db 30 | 31 | elasticsearch: 32 | image: docker.elastic.co/elasticsearch/elasticsearch:6.6.2 33 | restart: always 34 | ports: 35 | - 9200:9200 36 | environment: 37 | - http.host=0.0.0.0 38 | - transport.host=127.0.0.1 39 | - xpack.security.enabled=false 40 | - "ES_JAVA_OPTS=-Xms512m -Xmx512m" 41 | 42 | connect: 43 | build: ./debezium-jdbc-es 44 | restart: always 45 | ports: 46 | - 8083:8083 47 | - 5005:5005 48 | links: 49 | - kafka 50 | - postgres 51 | - elasticsearch 52 | environment: 53 | - BOOTSTRAP_SERVERS=kafka:9092 54 | - GROUP_ID=1 55 | - CONFIG_STORAGE_TOPIC=my_connect_configs 56 | - OFFSET_STORAGE_TOPIC=my_connect_offsets 57 | -------------------------------------------------------------------------------- /postgres/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debezium/postgres:11 2 | 3 | COPY ./setup.sql /home/setup.sql 4 | COPY ./fake.sql /home/fake.sql 5 | COPY ./init-db.sh /docker-entrypoint-initdb.d/init-db.sh 6 | -------------------------------------------------------------------------------- /postgres/fake.sql: -------------------------------------------------------------------------------- 1 | START TRANSACTION; 2 | 3 | INSERT INTO users(email) 4 | SELECT 5 | 'user_' || seq || '@' || ( 6 | CASE (RANDOM() * 2)::INT 7 | WHEN 0 THEN 'gmail' 8 | WHEN 1 THEN 'hotmail' 9 | WHEN 2 THEN 'yahoo' 10 | END 11 | ) || '.com' AS email 12 | FROM GENERATE_SERIES(1, 5) seq; 13 | 14 | 15 | INSERT INTO posts(user_id, title) 16 | WITH expanded AS ( 17 | SELECT RANDOM(), seq, u.id AS user_id 18 | FROM GENERATE_SERIES(1, 25) seq, users u 19 | ), shuffled AS ( 20 | SELECT e.* 21 | FROM expanded e 22 | INNER JOIN ( 23 | SELECT ei.seq, MIN(ei.random) FROM expanded ei GROUP BY ei.seq 24 | ) em ON (e.seq = em.seq AND e.random = em.min) 25 | ORDER BY e.seq 26 | ) 27 | SELECT 28 | s.user_id, 29 | 'It is ' || s.seq || ' ' || ( 30 | CASE (RANDOM() * 2)::INT 31 | WHEN 0 THEN 'sql' 32 | WHEN 1 THEN 'elixir' 33 | WHEN 2 THEN 'ruby' 34 | END 35 | ) as title 36 | FROM shuffled s; 37 | 38 | 39 | INSERT INTO comments(user_id, post_id, body) 40 | WITH expanded AS ( 41 | SELECT RANDOM(), seq, u.id AS user_id, p.id AS post_id 42 | FROM GENERATE_SERIES(1, 100) seq, users u, posts p 43 | ), shuffled AS ( 44 | SELECT e.* 45 | FROM expanded e 46 | INNER JOIN ( 47 | SELECT ei.seq, MIN(ei.random) FROM expanded ei GROUP BY ei.seq 48 | ) em ON (e.seq = em.seq AND e.random = em.min) 49 | ORDER BY e.seq 50 | ) 51 | SELECT 52 | s.user_id, 53 | s.post_id, 54 | 'Here some comment ' || s.seq AS body 55 | FROM shuffled s; 56 | 57 | COMMIT; 58 | -------------------------------------------------------------------------------- /postgres/init-db.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" <<-EOSQL 5 | CREATE DATABASE test_db; 6 | \c test_db; 7 | \i home/setup.sql; 8 | \i home/fake.sql; 9 | -------------------------------------------------------------------------------- /postgres/setup.sql: -------------------------------------------------------------------------------- 1 | START TRANSACTION; 2 | 3 | CREATE TABLE users( 4 | id SERIAL PRIMARY KEY, 5 | email VARCHAR(40) NOT NULL UNIQUE 6 | ); 7 | 8 | CREATE TABLE posts( 9 | id SERIAL PRIMARY KEY, 10 | user_id INTEGER NOT NULL REFERENCES users(id), 11 | title VARCHAR(100) NOT NULL UNIQUE 12 | ); 13 | 14 | CREATE TABLE comments( 15 | id SERIAL PRIMARY KEY, 16 | user_id INTEGER NOT NULL REFERENCES users(id), 17 | post_id INTEGER NOT NULL REFERENCES posts(id), 18 | body VARCHAR(500) NOT NULL 19 | ); 20 | 21 | COMMIT; 22 | -------------------------------------------------------------------------------- /reqs/connections/es-sink-comments.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "es-sink-comments", 3 | "config": { 4 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 5 | "tasks.max": "1", 6 | "topics": "comments", 7 | "connection.url": "http://elasticsearch:9200", 8 | "transforms": "unwrap,key", 9 | "transforms.unwrap.type": "io.debezium.transforms.UnwrapFromEnvelope", 10 | "transforms.unwrap.drop.tombstones": "false", 11 | "transforms.unwrap.drop.deletes": "false", 12 | "transforms.key.type": "org.apache.kafka.connect.transforms.ExtractField$Key", 13 | "transforms.key.field": "id", 14 | "key.ignore": "false", 15 | "type.name": "_doc", 16 | "behavior.on.null.values": "delete" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /reqs/connections/es-sink-posts.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "es-sink-posts", 3 | "config": { 4 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 5 | "tasks.max": "1", 6 | "topics": "posts", 7 | "connection.url": "http://elasticsearch:9200", 8 | "transforms": "unwrap,key", 9 | "transforms.unwrap.type": "io.debezium.transforms.UnwrapFromEnvelope", 10 | "transforms.unwrap.drop.tombstones": "false", 11 | "transforms.unwrap.drop.deletes": "false", 12 | "transforms.key.type": "org.apache.kafka.connect.transforms.ExtractField$Key", 13 | "transforms.key.field": "id", 14 | "key.ignore": "false", 15 | "type.name": "_doc", 16 | "behavior.on.null.values": "delete" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /reqs/connections/es-sink-users.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "es-sink-users", 3 | "config": { 4 | "connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector", 5 | "tasks.max": "1", 6 | "topics": "users", 7 | "connection.url": "http://elasticsearch:9200", 8 | "transforms": "unwrap,key", 9 | "transforms.unwrap.type": "io.debezium.transforms.UnwrapFromEnvelope", 10 | "transforms.unwrap.drop.tombstones": "false", 11 | "transforms.unwrap.drop.deletes": "false", 12 | "transforms.key.type": "org.apache.kafka.connect.transforms.ExtractField$Key", 13 | "transforms.key.field": "id", 14 | "key.ignore": "false", 15 | "type.name": "_doc", 16 | "behavior.on.null.values": "delete" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /reqs/connections/source.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "test_db-connector", 3 | "config": { 4 | "connector.class": "io.debezium.connector.postgresql.PostgresConnector", 5 | "tasks.max": "1", 6 | "database.hostname": "postgres", 7 | "database.port": "5432", 8 | "database.user": "postgres", 9 | "database.password": "postgres", 10 | "database.server.id": "184054", 11 | "database.dbname": "test_db", 12 | "database.server.name": "dbserver1", 13 | "database.whitelist": "test_db", 14 | "database.history.kafka.bootstrap.servers": "kafka:9092", 15 | "database.history.kafka.topic": "schema-changes.test_db", 16 | "transforms": "route", 17 | "transforms.route.type": "org.apache.kafka.connect.transforms.RegexRouter", 18 | "transforms.route.regex": "([^.]+)\\.([^.]+)\\.([^.]+)", 19 | "transforms.route.replacement": "$3" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /reqs/mappings/comments.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings": { 3 | "number_of_shards": 1 4 | }, 5 | "mappings": { 6 | "_doc": { 7 | "properties": { 8 | "id": { 9 | "type": "integer" 10 | }, 11 | "body": { 12 | "type": "text", 13 | "fields": { 14 | "keyword": { 15 | "type": "keyword", 16 | "ignore_above": 256 17 | } 18 | } 19 | }, 20 | "post_id": { 21 | "type": "integer" 22 | }, 23 | "user_id": { 24 | "type": "integer" 25 | } 26 | } 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /reqs/mappings/posts.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings": { 3 | "number_of_shards": 1 4 | }, 5 | "mappings": { 6 | "_doc": { 7 | "properties": { 8 | "id": { 9 | "type": "integer" 10 | }, 11 | "title": { 12 | "type": "text", 13 | "fields": { 14 | "keyword": { 15 | "type": "keyword", 16 | "ignore_above": 256 17 | } 18 | } 19 | }, 20 | "user_id": { 21 | "type": "integer" 22 | } 23 | } 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /reqs/mappings/users.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings": { 3 | "number_of_shards": 1 4 | }, 5 | "mappings": { 6 | "_doc": { 7 | "properties": { 8 | "id": { 9 | "type": "integer" 10 | }, 11 | "email": { 12 | "type": "text", 13 | "fields": { 14 | "keyword": { 15 | "type": "keyword", 16 | "ignore_above": 256 17 | } 18 | } 19 | } 20 | } 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | # create indices with mappings 2 | curl -i -X PUT -H "Accept:application/json" -H "Content-Type:application/json" http://localhost:9200/users -d @reqs/mappings/users.json 3 | curl -i -X PUT -H "Accept:application/json" -H "Content-Type:application/json" http://localhost:9200/posts -d @reqs/mappings/posts.json 4 | curl -i -X PUT -H "Accept:application/json" -H "Content-Type:application/json" http://localhost:9200/comments -d @reqs/mappings/comments.json 5 | 6 | # setup connections 7 | curl -i -X POST -H "Accept:application/json" -H "Content-Type:application/json" http://localhost:8083/connectors/ -d @reqs/connections/es-sink-users.json 8 | curl -i -X POST -H "Accept:application/json" -H "Content-Type:application/json" http://localhost:8083/connectors/ -d @reqs/connections/es-sink-posts.json 9 | curl -i -X POST -H "Accept:application/json" -H "Content-Type:application/json" http://localhost:8083/connectors/ -d @reqs/connections/es-sink-comments.json 10 | curl -i -X POST -H "Accept:application/json" -H "Content-Type:application/json" http://localhost:8083/connectors/ -d @reqs/connections/source.json 11 | --------------------------------------------------------------------------------