├── debezium-server-databend-dist ├── README.md ├── target │ └── classes │ │ ├── distro │ │ ├── run.sh │ │ ├── conf │ │ │ └── application.properties.example │ │ └── debezium.py │ │ └── assemblies │ │ └── server-distribution.xml ├── src │ └── main │ │ └── resources │ │ ├── distro │ │ ├── run.sh │ │ ├── conf │ │ │ └── application.properties.example │ │ └── debezium.py │ │ └── assemblies │ │ └── server-distribution.xml └── pom.xml ├── CONTRIBUTING.md ├── debezium-server-databend-sink ├── target │ ├── classes │ │ ├── META-INF │ │ │ └── beans.xml │ │ └── conf │ │ │ └── application.properties.example │ └── maven-status │ │ └── maven-compiler-plugin │ │ └── compile │ │ └── default-compile │ │ ├── createdFiles.lst │ │ └── inputFiles.lst ├── src │ ├── main │ │ ├── resources │ │ │ ├── META-INF │ │ │ │ └── beans.xml │ │ │ └── conf │ │ │ │ └── application.properties.example │ │ └── java │ │ │ └── io │ │ │ └── debezium │ │ │ └── server │ │ │ └── databend │ │ │ ├── tablewriter │ │ │ ├── TableNotFoundException.java │ │ │ ├── AppendTableWriter.java │ │ │ ├── TableWriterFactory.java │ │ │ ├── BaseTableWriter.java │ │ │ ├── RelationalTable.java │ │ │ └── UpsertTableWriter.java │ │ │ ├── batchsizewait │ │ │ ├── NoBatchSizeWait.java │ │ │ ├── InterfaceBatchSizeWait.java │ │ │ └── MaxBatchSizeWait.java │ │ │ ├── DatabendTypes.java │ │ │ ├── DebeziumMetrics.java │ │ │ └── DatabendChangeEvent.java │ └── test │ │ ├── resources │ │ ├── json │ │ │ ├── serde-update.json │ │ │ ├── unwrap-with-schema.json │ │ │ ├── serde-with-array.json │ │ │ ├── serde-with-schema_geom.json │ │ │ ├── serde-with-schema.json │ │ │ ├── serde-with-schema2.json │ │ │ └── serde-with-array2.json │ │ └── META-INF │ │ │ └── services │ │ │ └── org.eclipse.microprofile.config.spi.ConfigSource │ │ └── java │ │ └── io │ │ └── debezium │ │ └── databend │ │ ├── tablewriter │ │ ├── TableWriterTest.java │ │ └── RelationalTableTest.java │ │ ├── testresources │ │ ├── TestUtil.java │ │ ├── TargetDatabendDB.java │ │ ├── SourceMysqlDB.java │ │ ├── SourcePostgresqlDB.java │ │ ├── BaseDbTest.java │ │ ├── TestChangeEvent.java │ │ └── DatabendChangeEventBuilder.java │ │ ├── DatabendChangeConsumerSimpleTest.java │ │ ├── DatabendConfigSource.java │ │ ├── ConfigSource.java │ │ ├── DatabendChangeConsumerUpsertTest.java │ │ ├── DatabendChangeConsumerDeleteTest.java │ │ └── DatabendChangeConsumerTest.java └── pom.xml ├── .gitignore ├── Dockerfile ├── .github └── workflows │ ├── test_ci.yaml │ └── docker_release.yaml ├── README.md ├── pom.xml ├── LICENSE └── docs └── docs.md /debezium-server-databend-dist/README.md: -------------------------------------------------------------------------------- 1 | Copy of 2 | Debezium [debezium-server-dist](https://github.com/debezium/debezium/tree/master/debezium-server/debezium-server-dist) 3 | project 4 | 5 | Authors : Debezium Authors -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Please feel free to send pull request, report bugs or open feature request. 4 | 5 | ## License 6 | 7 | By contributing, you agree that your contributions will be licensed under Apache 2.0 License. 8 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/target/classes/META-INF/beans.xml: -------------------------------------------------------------------------------- 1 | 8 | 9 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/main/resources/META-INF/beans.xml: -------------------------------------------------------------------------------- 1 | 8 | 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled class file 2 | .idea 3 | *.class 4 | *debezium-server-databend-sink/target 5 | *debezium-server-databend-dist/target 6 | */target/* 7 | 8 | # Log file 9 | *.log 10 | 11 | # BlueJ files 12 | *.ctxt 13 | 14 | # Mobile Tools for Java (J2ME) 15 | .mtj.tmp/ 16 | 17 | # Package Files # 18 | *.jar 19 | *.war 20 | *.nar 21 | *.ear 22 | *.zip 23 | *.tar.gz 24 | *.rar 25 | 26 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 27 | hs_err_pid* 28 | replay_pid* 29 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/TableNotFoundException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.databend.tablewriter; 10 | 11 | public class TableNotFoundException extends RuntimeException { 12 | public TableNotFoundException(String message) { 13 | super(message); 14 | } 15 | } -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/AppendTableWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.databend.tablewriter; 10 | 11 | import java.sql.Connection; 12 | 13 | public class AppendTableWriter extends BaseTableWriter { 14 | public AppendTableWriter(Connection connection, String identifierQuoteCharacter, boolean isSchemaEvolutionEnabled) { 15 | super(connection, identifierQuoteCharacter, isSchemaEvolutionEnabled); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /debezium-server-databend-dist/target/classes/distro/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # /* 4 | # * Copyright Databend Authors. 5 | # * 6 | # * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 7 | # */ 8 | # 9 | 10 | if [ -z "$JAVA_HOME" ]; then 11 | JAVA_BINARY="java" 12 | else 13 | JAVA_BINARY="$JAVA_HOME/bin/java" 14 | fi 15 | 16 | if [ "$OSTYPE" = "msys" ] || [ "$OSTYPE" = "cygwin" ]; then 17 | PATH_SEP=";" 18 | else 19 | PATH_SEP=":" 20 | fi 21 | 22 | RUNNER=$(ls debezium-server-*runner.jar) 23 | 24 | exec $JAVA_BINARY $DEBEZIUM_OPTS $JAVA_OPTS -cp "$RUNNER"$PATH_SEP"conf"$PATH_SEP"lib/*"$PATH_SEP"/opt/conf" io.debezium.server.Main 25 | -------------------------------------------------------------------------------- /debezium-server-databend-dist/src/main/resources/distro/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # /* 4 | # * Copyright Databend Authors. 5 | # * 6 | # * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 7 | # */ 8 | # 9 | 10 | if [ -z "$JAVA_HOME" ]; then 11 | JAVA_BINARY="java" 12 | else 13 | JAVA_BINARY="$JAVA_HOME/bin/java" 14 | fi 15 | 16 | if [ "$OSTYPE" = "msys" ] || [ "$OSTYPE" = "cygwin" ]; then 17 | PATH_SEP=";" 18 | else 19 | PATH_SEP=":" 20 | fi 21 | 22 | RUNNER=$(ls debezium-server-*runner.jar) 23 | 24 | exec $JAVA_BINARY $DEBEZIUM_OPTS $JAVA_OPTS -cp "$RUNNER"$PATH_SEP"conf"$PATH_SEP"lib/*"$PATH_SEP"/opt/conf" io.debezium.server.Main 25 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/main/java/io/debezium/server/databend/batchsizewait/NoBatchSizeWait.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright hantmac Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.databend.batchsizewait; 10 | 11 | import javax.enterprise.context.Dependent; 12 | import javax.inject.Named; 13 | 14 | /** 15 | * Optimizes batch size around 85%-90% of max,batch.size using dynamically calculated sleep(ms) 16 | * 17 | * @author hantmac 18 | */ 19 | @Dependent 20 | @Named("NoBatchSizeWait") 21 | public class NoBatchSizeWait implements InterfaceBatchSizeWait { 22 | } 23 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/main/java/io/debezium/server/databend/batchsizewait/InterfaceBatchSizeWait.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright hantmac Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.databend.batchsizewait; 10 | 11 | /** 12 | * Implementation of the consumer that delivers the messages to Databend database tables. 13 | * 14 | * @author hantmac 15 | */ 16 | public interface InterfaceBatchSizeWait { 17 | 18 | default void initizalize() { 19 | } 20 | 21 | default void waitMs(Integer numRecordsProcessed, Integer processingTimeMs) throws InterruptedException { 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM eclipse-temurin:11-jdk as builder 2 | LABEL org.opencontainers.image.source=https://github.com/databendcloud/debezium-server-databend 3 | LABEL org.opencontainers.image.description="Debezium server databend container image" 4 | LABEL org.opencontainers.image.licenses=Apache 5 | RUN apt-get -qq update && apt-get -qq install maven unzip 6 | COPY . /app 7 | WORKDIR /app 8 | RUN mvn clean install -DskipTests 9 | RUN mvn clean package -Passembly -Dmaven.test.skip --quiet 10 | RUN unzip /app/debezium-server-databend-dist/target/debezium-server-databend-dist*.zip -d appdist 11 | 12 | FROM eclipse-temurin:11-jre 13 | COPY --from=builder /app/appdist/debezium-server-databend/ /app/ 14 | 15 | WORKDIR /app 16 | EXPOSE 8080 8083 17 | VOLUME ["/app/conf", "/app/data"] 18 | 19 | ENTRYPOINT ["/app/run.sh"] -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/resources/json/serde-update.json: -------------------------------------------------------------------------------- 1 | { 2 | "op": "u", 3 | "ts_ms": 1465491411815, 4 | "before": { 5 | "id": 1004, 6 | "first_name": "Anne-Marie", 7 | "last_name": "Kretchmar", 8 | "email": "annek@noanswer.org" 9 | }, 10 | "after": { 11 | "id": 1004, 12 | "first_name": "Anne", 13 | "last_name": "Kretchmar", 14 | "email": "annek@noanswer.org" 15 | }, 16 | "source": { 17 | "version": "0.10.0.Final", 18 | "connector": "mysql", 19 | "name": "mysql-server-1", 20 | "ts_ms": 0, 21 | "snapshot": false, 22 | "db": "inventory", 23 | "table": "customers", 24 | "server_id": 0, 25 | "gtid": null, 26 | "file": "mysql-bin.000003", 27 | "pos": 154, 28 | "row": 0, 29 | "thread": 7, 30 | "query": "INSERT INTO customers (first_name, last_name, email) VALUES ('Anne', 'Kretchmar', 'annek@noanswer.org')" 31 | } 32 | } -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/java/io/debezium/databend/tablewriter/TableWriterTest.java: -------------------------------------------------------------------------------- 1 | package io.debezium.databend.tablewriter; 2 | 3 | import org.junit.Assert; 4 | import org.junit.jupiter.api.Test; 5 | 6 | import static io.debezium.server.databend.tablewriter.BaseTableWriter.replaceFirstWordAfterTable; 7 | 8 | public class TableWriterTest { 9 | @Test 10 | public void testFirstWordAfterTable() throws Exception { 11 | String statement = "alter table products add column a int"; 12 | String newStatement = replaceFirstWordAfterTable(statement, "newTable"); 13 | System.out.println(newStatement); 14 | Assert.assertEquals(newStatement, "alter table newTable add column a int"); 15 | 16 | statement = "alter table products drop column a"; 17 | newStatement = replaceFirstWordAfterTable(statement, "yyy"); 18 | System.out.println(newStatement); 19 | Assert.assertEquals(newStatement, "alter table yyy drop column a"); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/resources/META-INF/services/org.eclipse.microprofile.config.spi.ConfigSource: -------------------------------------------------------------------------------- 1 | # 2 | # /* 3 | # * Copyright databend Authors. 4 | # * 5 | # * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | # */ 7 | # 8 | 9 | # 10 | # /* 11 | # * Copyright databend Authors. 12 | # * 13 | # * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 14 | # */ 15 | # 16 | 17 | # 18 | # /* 19 | # * Copyright databend Authors. 20 | # * 21 | # * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 22 | # */ 23 | # 24 | 25 | # 26 | # /* 27 | # * Copyright databend Authors. 28 | # * 29 | # * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 30 | # */ 31 | # 32 | 33 | io.debezium.databend.ConfigSource 34 | io.debezium.databend.DatabendConfigSource 35 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst: -------------------------------------------------------------------------------- 1 | io/debezium/server/databend/tablewriter/UpsertTableWriter.class 2 | io/debezium/server/databend/DatabendChangeEvent$2.class 3 | io/debezium/server/databend/batchsizewait/NoBatchSizeWait.class 4 | io/debezium/server/databend/batchsizewait/MaxBatchSizeWait.class 5 | io/debezium/server/databend/DatabendChangeEvent$Schema.class 6 | io/debezium/server/databend/tablewriter/AppendTableWriter.class 7 | io/debezium/server/databend/batchsizewait/InterfaceBatchSizeWait.class 8 | io/debezium/server/databend/DatabendChangeEvent.class 9 | io/debezium/server/databend/DatabendUtil.class 10 | io/debezium/server/databend/DatabendChangeConsumer.class 11 | io/debezium/server/databend/DatabendTypes.class 12 | io/debezium/server/databend/tablewriter/TableWriterFactory.class 13 | io/debezium/server/databend/DebeziumMetrics.class 14 | io/debezium/server/databend/DatabendChangeEvent$1.class 15 | io/debezium/server/databend/tablewriter/RelationalTable.class 16 | io/debezium/server/databend/tablewriter/BaseTableWriter.class 17 | io/debezium/server/databend/tablewriter/TableNotFoundException.class 18 | META-INF/sisu/javax.inject.Named 19 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/TableWriterFactory.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.databend.tablewriter; 2 | 3 | import java.sql.Connection; 4 | import java.util.Optional; 5 | import javax.enterprise.context.Dependent; 6 | 7 | import org.eclipse.microprofile.config.inject.ConfigProperty; 8 | 9 | @Dependent 10 | public class TableWriterFactory { 11 | @ConfigProperty(name = "debezium.sink.databend.upsert", defaultValue = "true") 12 | boolean upsert; 13 | @ConfigProperty(name = "debezium.sink.databend.upsert-keep-deletes", defaultValue = "false") 14 | boolean upsertKeepDeletes; 15 | 16 | @ConfigProperty(name = "debezium.sink.databend.identifier-quote-char", defaultValue = "") 17 | Optional identifierQuoteCharacter; 18 | 19 | @ConfigProperty(name = "debezium.sink.databend.schema.evolution", defaultValue = "false") 20 | boolean isSchemaEvolutionEnabled; 21 | 22 | public BaseTableWriter get(final Connection connection) { 23 | if (upsert) { 24 | return new UpsertTableWriter(connection, identifierQuoteCharacter.orElse(""), upsertKeepDeletes, isSchemaEvolutionEnabled); 25 | } else { 26 | return new AppendTableWriter(connection, identifierQuoteCharacter.orElse(""),isSchemaEvolutionEnabled); 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/main/java/io/debezium/server/databend/DatabendTypes.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.databend; 2 | public final class DatabendTypes 3 | { 4 | public static final String NULL = "null"; 5 | public static final String NULLABLE = "nullable"; 6 | public static final String BOOLEAN = "boolean"; 7 | public static final String INT8 = "int8"; 8 | public static final String INT16 = "int16"; 9 | public static final String INT32 = "int32"; 10 | public static final String INT64 = "int64"; 11 | public static final String UINT8 = "uint8"; 12 | public static final String UINT16 = "uint16"; 13 | public static final String UINT32 = "uint32"; 14 | public static final String UINT64 = "uint64"; 15 | public static final String FLOAT32 = "float32"; 16 | public static final String FLOAT64 = "float64"; 17 | public static final String DATE = "date"; 18 | public static final String DATETIME = "datetime"; 19 | public static final String DATETIME64 = "datetime64"; 20 | public static final String TIMESTAMP = "timestamp"; 21 | public static final String STRING = "string"; 22 | public static final String STRUCT = "struct"; 23 | public static final String ARRAY = "array"; 24 | public static final String VARIANT = "variant"; 25 | public static final String VARIANT_ARRAY = "variantarray"; 26 | public static final String VARIANT_OBJECT = "variantobject"; 27 | public static final String INTERVAL = "interval"; 28 | } 29 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/resources/json/unwrap-with-schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": { 3 | "type": "struct", 4 | "fields": [ 5 | { 6 | "type": "int32", 7 | "optional": false, 8 | "field": "id" 9 | }, 10 | { 11 | "type": "int32", 12 | "optional": false, 13 | "name": "io.debezium.time.Date", 14 | "version": 1, 15 | "field": "order_date" 16 | }, 17 | { 18 | "type": "int32", 19 | "optional": false, 20 | "field": "purchaser" 21 | }, 22 | { 23 | "type": "int32", 24 | "optional": false, 25 | "field": "quantity" 26 | }, 27 | { 28 | "type": "int32", 29 | "optional": false, 30 | "field": "product_id" 31 | }, 32 | { 33 | "type": "string", 34 | "optional": true, 35 | "field": "__op" 36 | }, 37 | { 38 | "type": "string", 39 | "optional": true, 40 | "field": "__table" 41 | }, 42 | { 43 | "type": "int64", 44 | "optional": true, 45 | "field": "__lsn" 46 | }, 47 | { 48 | "type": "int64", 49 | "optional": true, 50 | "field": "__source_ts_ms" 51 | }, 52 | { 53 | "type": "string", 54 | "optional": true, 55 | "field": "__deleted" 56 | } 57 | ], 58 | "optional": false, 59 | "name": "testc.inventory.orders.Value" 60 | }, 61 | "payload": { 62 | "id": 10003, 63 | "order_date": 16850, 64 | "purchaser": 1002, 65 | "quantity": 2, 66 | "product_id": 106, 67 | "__op": "r", 68 | "__table": "orders", 69 | "__lsn": 33832960, 70 | "__source_ts_ms": 1596309876678, 71 | "__deleted": "false" 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/resources/json/serde-with-array.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": { 3 | "type": "struct", 4 | "fields": [ 5 | { 6 | "type": "string", 7 | "optional": true, 8 | "field": "name" 9 | }, 10 | { 11 | "type": "array", 12 | "items": { 13 | "type": "int32", 14 | "optional": true 15 | }, 16 | "optional": true, 17 | "field": "pay_by_quarter" 18 | }, 19 | { 20 | "type": "array", 21 | "items": { 22 | "type": "string", 23 | "optional": true 24 | }, 25 | "optional": true, 26 | "field": "schedule" 27 | }, 28 | { 29 | "type": "string", 30 | "optional": true, 31 | "field": "__op" 32 | }, 33 | { 34 | "type": "string", 35 | "optional": true, 36 | "field": "__table" 37 | }, 38 | { 39 | "type": "int64", 40 | "optional": true, 41 | "field": "__source_ts_ms" 42 | }, 43 | { 44 | "type": "string", 45 | "optional": true, 46 | "field": "__db" 47 | }, 48 | { 49 | "type": "string", 50 | "optional": true, 51 | "field": "__deleted" 52 | } 53 | ], 54 | "optional": false, 55 | "name": "testc.inventory.array_data.Value" 56 | }, 57 | "payload": { 58 | "name": "Bill", 59 | "pay_by_quarter": [ 60 | 10000, 61 | 10001, 62 | 10002, 63 | 10003 64 | ], 65 | "schedule": [ 66 | "[Ljava.lang.String;@508917a0", 67 | "[Ljava.lang.String;@7412bd2" 68 | ], 69 | "__op": "c", 70 | "__table": "array_data", 71 | "__source_ts_ms": 1638128893618, 72 | "__db": "postgres", 73 | "__deleted": "false" 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /.github/workflows/test_ci.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | pull_request: 9 | branches: 10 | - main 11 | - master 12 | 13 | jobs: 14 | test: 15 | runs-on: ubuntu-latest 16 | services: 17 | databend: 18 | image: datafuselabs/databend 19 | env: 20 | QUERY_DEFAULT_USER: databend 21 | QUERY_DEFAULT_PASSWORD: databend 22 | MINIO_ENABLED: true 23 | # options: >- 24 | # --health-cmd "curl -fs http://localhost:8000/v1/health || exit 1" 25 | # --health-interval 10s 26 | # --health-timeout 5s 27 | # --health-retries 5 28 | ports: 29 | - 8000:8000 30 | - 9000:9000 31 | steps: 32 | - name: Checkout repository 33 | uses: actions/checkout@v2 34 | with: 35 | ref: ${{ github.ref }} 36 | 37 | - name: Set up JDK 11 38 | uses: actions/setup-java@v2 39 | with: 40 | distribution: 'temurin' 41 | java-version: '11' 42 | cache: 'maven' 43 | gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }} # Value of the GPG private key to import 44 | gpg-passphrase: MAVEN_GPG_PASSPHRASE # env variable for GPG private key passphrase 45 | 46 | - name: Verify Service Running 47 | run: | 48 | sleep 30 49 | cid=$(docker ps -a | grep databend | cut -d' ' -f1) 50 | docker logs ${cid} 51 | curl -u databend:databend --request POST localhost:8000/v1/query --header 'Content-Type:application/json' --data-raw '{"sql":"select 1"}' 52 | 53 | - name: Run Maven clean deploy with release profile 54 | run: mvn clean test 55 | env: 56 | MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} 57 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/java/io/debezium/databend/testresources/TestUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.databend.testresources; 10 | 11 | import io.debezium.engine.ChangeEvent; 12 | import io.debezium.engine.DebeziumEngine; 13 | 14 | import java.security.SecureRandom; 15 | 16 | import org.apache.kafka.connect.source.SourceRecord; 17 | 18 | @SuppressWarnings("unchecked") 19 | public class TestUtil { 20 | static final String AB = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; 21 | static final SecureRandom rnd = new SecureRandom(); 22 | 23 | 24 | public static int randomInt(int low, int high) { 25 | return rnd.nextInt(high - low) + low; 26 | } 27 | 28 | public static String randomString(int len) { 29 | StringBuilder sb = new StringBuilder(len); 30 | for (int i = 0; i < len; i++) 31 | sb.append(AB.charAt(rnd.nextInt(AB.length()))); 32 | return sb.toString(); 33 | } 34 | 35 | public static DebeziumEngine.RecordCommitter> getCommitter() { 36 | return new DebeziumEngine.RecordCommitter() { 37 | public synchronized void markProcessed(SourceRecord record) { 38 | } 39 | 40 | @Override 41 | public void markProcessed(Object record) { 42 | } 43 | 44 | public synchronized void markBatchFinished() { 45 | } 46 | 47 | @Override 48 | public void markProcessed(Object record, DebeziumEngine.Offsets sourceOffsets) { 49 | } 50 | 51 | @Override 52 | public DebeziumEngine.Offsets buildOffsets() { 53 | return null; 54 | } 55 | }; 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # debezium-server-databend 2 | This project could be used to receive database CDC changes from debezium server and send it to [databend](https://github.com/datafuselabs/databend) table.It's in realtime and started independently without the need for auxiliary streaming platforms such as Kafka, Flink and Spark. 3 | 4 | # debezium databend consumer 5 | The detail introduction docs available in [docs page](./docs/docs.md) 6 | 7 | # Install from source 8 | 9 | - Requirements: 10 | - JDK 11 11 | - Maven 12 | - Clone from repo: `git clone https://github.com/databendcloud/debezium-server-databend.git` 13 | - From the root of the project: 14 | - Build and package debezium server: `mvn -Passembly -Dmaven.test.skip package` 15 | - After building, unzip your server 16 | distribution: `unzip debezium-server-databend-dist/target/debezium-server-databend-dist*.zip -d databendDist` 17 | - cd into unzipped folder: `cd databendDist` 18 | - Create `application.properties` file and config it: `nano conf/application.properties`, you can check the example 19 | configuration 20 | in [application.properties.example](debezium-server-databend-sink/src/main/resources/conf/application.properties.example) 21 | - Run the server using provided script: `bash run.sh` 22 | - The debezium server with databend will be started 23 | 24 | # Install from release 25 | - Download zip file from [release](https://github.com/databendcloud/debezium-server-databend/releases) 26 | - Unzip it 27 | - Create `application.properties` file and config it: `nano conf/application.properties`, you can check the example 28 | configuration 29 | in [application.properties.example](debezium-server-databend-sink/src/main/resources/conf/application.properties.example) 30 | - Run the server using provided script: `bash run.sh` 31 | - The debezium server with databend will be started 32 | 33 | # Contributing 34 | 35 | You are warmly welcome to hack on debezium-server-databend. We have prepared a guide [CONTRIBUTING.md](./CONTRIBUTING.md). 36 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst: -------------------------------------------------------------------------------- 1 | /Users/hanshanjie/git-works/debezium-server-databend/debezium-server-databend-sink/src/main/java/io/debezium/server/databend/batchsizewait/MaxBatchSizeWait.java 2 | /Users/hanshanjie/git-works/debezium-server-databend/debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/TableWriterFactory.java 3 | /Users/hanshanjie/git-works/debezium-server-databend/debezium-server-databend-sink/src/main/java/io/debezium/server/databend/DatabendTypes.java 4 | /Users/hanshanjie/git-works/debezium-server-databend/debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/BaseTableWriter.java 5 | /Users/hanshanjie/git-works/debezium-server-databend/debezium-server-databend-sink/src/main/java/io/debezium/server/databend/DatabendChangeEvent.java 6 | /Users/hanshanjie/git-works/debezium-server-databend/debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/AppendTableWriter.java 7 | /Users/hanshanjie/git-works/debezium-server-databend/debezium-server-databend-sink/src/main/java/io/debezium/server/databend/DebeziumMetrics.java 8 | /Users/hanshanjie/git-works/debezium-server-databend/debezium-server-databend-sink/src/main/java/io/debezium/server/databend/DatabendChangeConsumer.java 9 | /Users/hanshanjie/git-works/debezium-server-databend/debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/TableNotFoundException.java 10 | /Users/hanshanjie/git-works/debezium-server-databend/debezium-server-databend-sink/src/main/java/io/debezium/server/databend/DatabendUtil.java 11 | /Users/hanshanjie/git-works/debezium-server-databend/debezium-server-databend-sink/src/main/java/io/debezium/server/databend/batchsizewait/InterfaceBatchSizeWait.java 12 | /Users/hanshanjie/git-works/debezium-server-databend/debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/RelationalTable.java 13 | /Users/hanshanjie/git-works/debezium-server-databend/debezium-server-databend-sink/src/main/java/io/debezium/server/databend/batchsizewait/NoBatchSizeWait.java 14 | /Users/hanshanjie/git-works/debezium-server-databend/debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/UpsertTableWriter.java 15 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/resources/json/serde-with-schema_geom.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": { 3 | "type": "struct", 4 | "fields": [ 5 | { 6 | "type": "int32", 7 | "optional": false, 8 | "default": 0, 9 | "field": "id" 10 | }, 11 | { 12 | "type": "struct", 13 | "fields": [ 14 | { 15 | "type": "string", 16 | "optional": false, 17 | "field": "wkb" 18 | }, 19 | { 20 | "type": "int32", 21 | "optional": true, 22 | "field": "srid" 23 | } 24 | ], 25 | "optional": true, 26 | "name": "io.debezium.data.geometry.Geometry", 27 | "version": 1, 28 | "doc": "Geometry", 29 | "field": "g" 30 | }, 31 | { 32 | "type": "struct", 33 | "fields": [ 34 | { 35 | "type": "string", 36 | "optional": false, 37 | "field": "wkb" 38 | }, 39 | { 40 | "type": "int32", 41 | "optional": true, 42 | "field": "srid" 43 | } 44 | ], 45 | "optional": true, 46 | "name": "io.debezium.data.geometry.Geometry", 47 | "version": 1, 48 | "doc": "Geometry", 49 | "field": "h" 50 | }, 51 | { 52 | "type": "string", 53 | "optional": true, 54 | "field": "__op" 55 | }, 56 | { 57 | "type": "string", 58 | "optional": true, 59 | "field": "__table" 60 | }, 61 | { 62 | "type": "int64", 63 | "optional": true, 64 | "field": "__source_ts_ms" 65 | }, 66 | { 67 | "type": "string", 68 | "optional": true, 69 | "field": "__db" 70 | }, 71 | { 72 | "type": "string", 73 | "optional": true, 74 | "field": "__deleted" 75 | } 76 | ], 77 | "optional": false, 78 | "name": "testc.inventory.geom.Value" 79 | }, 80 | "payload": { 81 | "id": 1, 82 | "g": { 83 | "wkb": "AQEAAAAAAAAAAADwPwAAAAAAAPA/", 84 | "srid": 123 85 | }, 86 | "h": null, 87 | "__op": "r", 88 | "__table": "geom", 89 | "__source_ts_ms": 1634844424986, 90 | "__db": "postgres", 91 | "__deleted": "false" 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/java/io/debezium/databend/testresources/TargetDatabendDB.java: -------------------------------------------------------------------------------- 1 | package io.debezium.databend.testresources; 2 | 3 | import io.quarkus.test.common.QuarkusTestResourceLifecycleManager; 4 | import org.slf4j.Logger; 5 | import org.slf4j.LoggerFactory; 6 | 7 | import java.sql.Connection; 8 | import java.sql.DriverManager; 9 | import java.sql.SQLException; 10 | import java.sql.Statement; 11 | import java.util.Map; 12 | import java.util.concurrent.ConcurrentHashMap; 13 | 14 | public class TargetDatabendDB implements QuarkusTestResourceLifecycleManager { 15 | public static final String DB_USER = "databend"; 16 | public static final String DB_PASSWORD = "databend"; 17 | public String DB_DATABASE = "public"; 18 | private static final Logger LOGGER = LoggerFactory.getLogger(TargetDatabendDB.class); 19 | 20 | public Connection createConnection() 21 | throws SQLException, ClassNotFoundException { 22 | String url = "jdbc:databend://localhost:8000"; 23 | Class.forName("com.databend.jdbc.DatabendDriver"); 24 | return DriverManager.getConnection(url, DB_USER, DB_PASSWORD); 25 | } 26 | 27 | public static void runSQL(String query) throws SQLException, ClassNotFoundException { 28 | try { 29 | String url = "jdbc:databend://localhost:8000"; 30 | Class.forName("com.databend.jdbc.DatabendDriver"); 31 | Connection con = DriverManager.getConnection(url, DB_USER, DB_PASSWORD); 32 | Statement st = con.createStatement(); 33 | st.execute(query); 34 | con.close(); 35 | } catch (Exception e) { 36 | LOGGER.error(query); 37 | throw e; 38 | } 39 | } 40 | 41 | @Override 42 | public Map start() { 43 | Map config = new ConcurrentHashMap<>(); 44 | config.put("debezium.sink.databend.database.url", "jdbc:databend://localhost:8000"); 45 | config.put("debezium.sink.databend.database.username", "databend"); 46 | config.put("debezium.sink.databend.database.password", "databend"); 47 | config.put("debezium.sink.databend.database.databaseName", "public"); 48 | config.put("debezium.sink.databend.database.param.xyz", "val"); 49 | return config; 50 | } 51 | 52 | @Override 53 | public void stop() { 54 | 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /.github/workflows/docker_release.yaml: -------------------------------------------------------------------------------- 1 | # 2 | name: Create and publish a Docker image 3 | 4 | # Configures this workflow to run every time a change is pushed to the branch called `release`. 5 | on: 6 | create: 7 | tags: 8 | - 'v*' # 匹配以 'v' 开头的标签 9 | 10 | # Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. 11 | env: 12 | REGISTRY: ghcr.io 13 | IMAGE_NAME: ${{ github.repository }} 14 | 15 | # There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. 16 | jobs: 17 | build-and-push-image: 18 | runs-on: ubuntu-latest 19 | # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. 20 | permissions: 21 | contents: read 22 | packages: write 23 | # 24 | steps: 25 | - name: Checkout repository 26 | uses: actions/checkout@v3 27 | # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. 28 | - name: Log in to the Container registry 29 | uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 30 | with: 31 | registry: ${{ env.REGISTRY }} 32 | username: ${{ github.actor }} 33 | password: ${{ secrets.GITHUB_TOKEN }} 34 | # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. 35 | - name: Extract metadata (tags, labels) for Docker 36 | id: meta 37 | uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 38 | with: 39 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 40 | # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. 41 | # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. 42 | # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. 43 | - name: Build and push Docker image 44 | uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 45 | with: 46 | context: . 47 | push: true 48 | tags: ${{ steps.meta.outputs.tags }} 49 | labels: ${{ steps.meta.outputs.labels }} 50 | -------------------------------------------------------------------------------- /debezium-server-databend-dist/target/classes/distro/conf/application.properties.example: -------------------------------------------------------------------------------- 1 | debezium.sink.type=databend 2 | debezium.sink.databend.upsert=true 3 | debezium.sink.databend.upsert-keep-deletes=false 4 | debezium.sink.databend.database.databaseName=debezium 5 | debezium.sink.databend.database.url=jdbc:databend://localhost:8000 6 | debezium.sink.databend.database.username=databend 7 | debezium.sink.databend.database.password=databend 8 | # debezium.sink.databend.database.primaryKey=id 9 | # debezium.sink.databend.database.tableName=products 10 | # additional databend parameters 11 | debezium.sink.databend.database.param.ssl=false 12 | 13 | # enable event schemas 14 | debezium.format.value.schemas.enable=true 15 | debezium.format.key.schemas.enable=true 16 | debezium.format.value=json 17 | debezium.format.key=json 18 | 19 | # mysql source, related docs: https://docs.confluent.io/kafka-connectors/debezium-mysql-source/current/mysql_source_connector_config.html 20 | debezium.source.connector.class=io.debezium.connector.mysql.MySqlConnector 21 | debezium.source.offset.storage.file.filename=data/offsets.dat 22 | debezium.source.offset.flush.interval.ms=10 23 | 24 | debezium.source.database.hostname=127.0.0.1 25 | debezium.source.database.port=3306 26 | debezium.source.database.user=mysqlUser 27 | debezium.source.database.password=mysqlPassword 28 | debezium.source.database.dbname=mydb 29 | debezium.source.database.server.name=serverName 30 | debezium.source.include.schema.changes=false 31 | # debezium.source.table.include.list=databaseName.tableName 32 | # debezium.source.database.ssl.mode=required 33 | # Run without Kafka, use local file to store checkpoints 34 | debezium.source.database.history=io.debezium.relational.history.FileDatabaseHistory 35 | debezium.source.database.history.file.filename=data/status.dat 36 | # do event flattening. unwrap message! 37 | # https://debezium.io/documentation/reference/1.2/configuration/event-flattening.html#extract-new-record-state-drop-tombstones 38 | debezium.transforms=unwrap 39 | debezium.transforms.unwrap.type=io.debezium.transforms.ExtractNewRecordState 40 | #debezium.transforms.unwrap.add.fields=op,table,source.ts_ms,db 41 | # soft delete 42 | #debezium.transforms.unwrap.delete.handling.mode=rewrite 43 | #debezium.transforms.unwrap.drop.tombstones=false 44 | # hard delete 45 | debezium.transforms.unwrap.delete.handling.mode=none 46 | debezium.transforms.unwrap.drop.tombstones=false 47 | 48 | # ############ SET LOG LEVELS ############ 49 | quarkus.log.console.json=true 50 | # Ignore messages below warning level from Jetty, because it's a bit verbose 51 | quarkus.log.category."org.eclipse.jetty".level=WARN 52 | quarkus.log.file.path=./logs/debezium.log 53 | quarkus.log.file.rotation.max-file-size=5M 54 | quarkus.log.file.rotation.file-suffix=.yyyy-MM-dd.gz 55 | quarkus.log.file.rotation.max-backup-index=3 56 | quarkus.log.level=WARN 57 | quarkus.log.file.enable=true 58 | quarkus.http.port=8080 59 | -------------------------------------------------------------------------------- /debezium-server-databend-dist/src/main/resources/distro/conf/application.properties.example: -------------------------------------------------------------------------------- 1 | debezium.sink.type=databend 2 | debezium.sink.databend.upsert=true 3 | debezium.sink.databend.upsert-keep-deletes=false 4 | debezium.sink.databend.database.databaseName=debezium 5 | debezium.sink.databend.database.url=jdbc:databend://localhost:8000 6 | debezium.sink.databend.database.username=databend 7 | debezium.sink.databend.database.password=databend 8 | # debezium.sink.databend.database.primaryKey=id 9 | # debezium.sink.databend.database.tableName=products 10 | # additional databend parameters 11 | debezium.sink.databend.database.param.ssl=false 12 | 13 | # enable event schemas 14 | debezium.format.value.schemas.enable=true 15 | debezium.format.key.schemas.enable=true 16 | debezium.format.value=json 17 | debezium.format.key=json 18 | 19 | # mysql source, related docs: https://docs.confluent.io/kafka-connectors/debezium-mysql-source/current/mysql_source_connector_config.html 20 | debezium.source.connector.class=io.debezium.connector.mysql.MySqlConnector 21 | debezium.source.offset.storage.file.filename=data/offsets.dat 22 | debezium.source.offset.flush.interval.ms=10 23 | 24 | debezium.source.database.hostname=127.0.0.1 25 | debezium.source.database.port=3306 26 | debezium.source.database.user=mysqlUser 27 | debezium.source.database.password=mysqlPassword 28 | debezium.source.database.dbname=mydb 29 | debezium.source.database.server.name=serverName 30 | debezium.source.include.schema.changes=false 31 | # debezium.source.table.include.list=databaseName.tableName 32 | # debezium.source.database.ssl.mode=required 33 | # Run without Kafka, use local file to store checkpoints 34 | debezium.source.database.history=io.debezium.relational.history.FileDatabaseHistory 35 | debezium.source.database.history.file.filename=data/status.dat 36 | # do event flattening. unwrap message! 37 | # https://debezium.io/documentation/reference/1.2/configuration/event-flattening.html#extract-new-record-state-drop-tombstones 38 | debezium.transforms=unwrap 39 | debezium.transforms.unwrap.type=io.debezium.transforms.ExtractNewRecordState 40 | #debezium.transforms.unwrap.add.fields=op,table,source.ts_ms,db 41 | # soft delete 42 | #debezium.transforms.unwrap.delete.handling.mode=rewrite 43 | #debezium.transforms.unwrap.drop.tombstones=false 44 | # hard delete 45 | debezium.transforms.unwrap.delete.handling.mode=none 46 | debezium.transforms.unwrap.drop.tombstones=false 47 | 48 | # ############ SET LOG LEVELS ############ 49 | quarkus.log.console.json=true 50 | # Ignore messages below warning level from Jetty, because it's a bit verbose 51 | quarkus.log.category."org.eclipse.jetty".level=WARN 52 | quarkus.log.file.path=./logs/debezium.log 53 | quarkus.log.file.rotation.max-file-size=5M 54 | quarkus.log.file.rotation.file-suffix=.yyyy-MM-dd.gz 55 | quarkus.log.file.rotation.max-backup-index=3 56 | quarkus.log.level=WARN 57 | quarkus.log.file.enable=true 58 | quarkus.http.port=8080 59 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/java/io/debezium/databend/DatabendChangeConsumerSimpleTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.databend; 10 | 11 | import io.debezium.databend.testresources.BaseDbTest; 12 | import io.debezium.databend.testresources.TargetDatabendDB; 13 | import io.debezium.databend.testresources.TestChangeEvent; 14 | import io.debezium.databend.testresources.TestUtil; 15 | import io.debezium.server.databend.DatabendChangeConsumer; 16 | import io.quarkus.test.common.QuarkusTestResource; 17 | import io.quarkus.test.junit.QuarkusTest; 18 | import org.junit.jupiter.api.AfterEach; 19 | import org.junit.jupiter.api.BeforeAll; 20 | import org.junit.jupiter.api.Test; 21 | 22 | import javax.inject.Inject; 23 | import java.sql.Connection; 24 | import java.sql.ResultSet; 25 | import java.sql.SQLException; 26 | import java.util.ArrayList; 27 | import java.util.List; 28 | 29 | /** 30 | * @author hantmac 31 | */ 32 | @QuarkusTest 33 | @QuarkusTestResource(TargetDatabendDB.class) 34 | public class DatabendChangeConsumerSimpleTest extends BaseDbTest { 35 | @Inject 36 | DatabendChangeConsumer consumer; 37 | public static Connection connection; 38 | 39 | @BeforeAll 40 | static void beforeAll() throws Exception { 41 | // CREATE TES TABLE USING JOOQ 42 | TargetDatabendDB targetDatabendDB = new TargetDatabendDB(); 43 | targetDatabendDB.start(); 44 | 45 | connection = targetDatabendDB.createConnection(); 46 | connection.createStatement().execute("CREATE DATABASE if not exists " + targetDatabendDB.DB_DATABASE); 47 | } 48 | 49 | @Test 50 | public void testSimpleUpload() throws Exception { 51 | consumer.connection = new TargetDatabendDB().createConnection(); 52 | 53 | String dest = "customers_append"; 54 | List> records = new ArrayList<>(); 55 | records.add(TestChangeEvent.of(dest, 1, "c")); 56 | records.add(TestChangeEvent.of(dest, 2, "c")); 57 | records.add(TestChangeEvent.of(dest, 3, "c")); 58 | consumer.handleBatch(records, TestUtil.getCommitter()); 59 | 60 | // check that its consumed! 61 | // 3 records should be updated 4th one should be inserted 62 | records.clear(); 63 | records.add(TestChangeEvent.of(dest, 1, "r")); 64 | records.add(TestChangeEvent.of(dest, 2, "d")); 65 | records.add(TestChangeEvent.of(dest, 3, "u", "UpdatednameV1")); 66 | records.add(TestChangeEvent.of(dest, 4, "c")); 67 | consumer.handleBatch(records, TestUtil.getCommitter()); 68 | } 69 | 70 | @AfterEach 71 | public void clearData() throws SQLException, ClassNotFoundException { 72 | ResultSet rs = select("delete from public.debeziumcdc_customers_append"); 73 | } 74 | } 75 | 76 | -------------------------------------------------------------------------------- /debezium-server-databend-dist/target/classes/assemblies/server-distribution.xml: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | 13 | distribution 14 | 15 | 16 | zip 17 | 18 | false 19 | 20 | 21 | ${project.parent.artifactId}/lib 22 | false 23 | runtime 24 | false 25 | true 26 | 27 | org.apache.kafka:kafka-tools:* 28 | javax.ws.rs:javax.ws.rs-api:* 29 | org.apache.kafka:connect-file:* 30 | org.glassfish.jersey.*:*:* 31 | org.eclipse.jetty:*:* 32 | org.apache.maven:*:* 33 | 34 | 35 | 36 | 37 | 38 | 39 | ${project.basedir}/.. 40 | ${project.parent.artifactId} 41 | 42 | README* 43 | CHANGELOG* 44 | CONTRIBUTE* 45 | COPYRIGHT* 46 | LICENSE* 47 | 48 | true 49 | 50 | 51 | 52 | ${project.build.directory} 53 | ${project.parent.artifactId} 54 | 55 | *-runner.jar 56 | 57 | 58 | 59 | src/main/resources/distro 60 | ${project.parent.artifactId} 61 | 62 | **/* 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /debezium-server-databend-dist/src/main/resources/assemblies/server-distribution.xml: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | 13 | distribution 14 | 15 | 16 | zip 17 | 18 | false 19 | 20 | 21 | ${project.parent.artifactId}/lib 22 | false 23 | runtime 24 | false 25 | true 26 | 27 | org.apache.kafka:kafka-tools:* 28 | javax.ws.rs:javax.ws.rs-api:* 29 | org.apache.kafka:connect-file:* 30 | org.glassfish.jersey.*:*:* 31 | org.eclipse.jetty:*:* 32 | org.apache.maven:*:* 33 | 34 | 35 | 36 | 37 | 38 | 39 | ${project.basedir}/.. 40 | ${project.parent.artifactId} 41 | 42 | README* 43 | CHANGELOG* 44 | CONTRIBUTE* 45 | COPYRIGHT* 46 | LICENSE* 47 | 48 | true 49 | 50 | 51 | 52 | ${project.build.directory} 53 | ${project.parent.artifactId} 54 | 55 | *-runner.jar 56 | 57 | 58 | 59 | src/main/resources/distro 60 | ${project.parent.artifactId} 61 | 62 | **/* 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/java/io/debezium/databend/DatabendConfigSource.java: -------------------------------------------------------------------------------- 1 | package io.debezium.databend; 2 | 3 | import io.debezium.server.TestConfigSource; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | 7 | public class DatabendConfigSource extends TestConfigSource { 8 | public static final String TARGET_SCHEMA = "public"; 9 | 10 | public DatabendConfigSource() { 11 | config.put("quarkus.profile", "databend"); 12 | // sink conf 13 | config.put("debezium.sink.type", "databend"); 14 | config.put("debezium.sink.databend.upsert", "true"); 15 | config.put("debezium.sink.databend.database.url","jdbc:databend://localhost:8000"); 16 | config.put("debezium.sink.databend.database.password","databend"); 17 | config.put("debezium.sink.databend.database.username","databend"); 18 | config.put("debezium.sink.databend.upsert-keep-deletes", "true"); 19 | config.put("debezium.sink.databend.database.databaseName", TARGET_SCHEMA); 20 | config.put("debezium.sink.databend.table-prefix", "debeziumcdc_"); 21 | // ==== configure batch behaviour/size ==== 22 | // Positive integer value that specifies the maximum size of each batch of events that should be processed during 23 | // each iteration of this connector. Defaults to 2048. 24 | //config.put("debezium.source.max.batch.size", "2048"); 25 | config.put("debezium.source.decimal.handling.mode", "double"); 26 | // enable disable schema 27 | config.put("debezium.format.value.schemas.enable", "true"); 28 | 29 | // debezium unwrap message 30 | config.put("debezium.transforms", "unwrap"); 31 | config.put("debezium.transforms.unwrap.type", "io.debezium.transforms.ExtractNewRecordState"); 32 | config.put("debezium.transforms.unwrap.add.fields", "op,table,source.ts_ms,db"); 33 | config.put("debezium.transforms.unwrap.delete.handling.mode", "rewrite"); 34 | config.put("debezium.transforms.unwrap.drop.tombstones", "true"); 35 | 36 | // DEBEZIUM SOURCE conf 37 | config.put("debezium.source.offset.storage.file.filename", "data/offsets.dat"); 38 | config.put("debezium.source.database.history", "io.debezium.relational.history.MemoryDatabaseHistory"); 39 | config.put("debezium.source.offset.flush.interval.ms", "60000"); 40 | config.put("debezium.source.database.server.name", "testc"); 41 | config.put("%postgresql.debezium.source.schema.whitelist", "inventory"); 42 | config.put("%postgresql.debezium.source.database.whitelist", "inventory"); 43 | config.put("debezium.source.table.whitelist", "inventory.*"); 44 | config.put("debezium.source.include.schema.changes", "false"); 45 | 46 | config.put("quarkus.log.level", "INFO"); 47 | } 48 | 49 | @Override 50 | public int getOrdinal() { 51 | // Configuration property precedence is based on ordinal values and since we override the 52 | // properties in TestConfigSource, we should give this a higher priority. 53 | return super.getOrdinal() + 1; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/target/classes/conf/application.properties.example: -------------------------------------------------------------------------------- 1 | debezium.sink.type=databend 2 | debezium.sink.databend.upsert=false 3 | debezium.sink.databend.upsert-keep-deletes=false 4 | debezium.sink.databend.database.databaseName=debezium 5 | debezium.sink.databend.database.url=jdbc:databend://localhost:8000 6 | debezium.sink.databend.database.username=databend 7 | debezium.sink.databend.database.password=databend 8 | debezium.sink.databend.database.primaryKey=id 9 | #debezium.sink.databend.database.tableName=products 10 | debezium.sink.databend.database.param.ssl=false 11 | # additional databend parameters 12 | 13 | # enable event schemas 14 | debezium.format.value.schemas.enable=true 15 | debezium.format.key.schemas.enable=true 16 | debezium.format.value=json 17 | debezium.format.key=json 18 | 19 | # mysql source 20 | debezium.source.connector.class=io.debezium.connector.mysql.MySqlConnector 21 | debezium.source.offset.storage.file.filename=data/offsets.dat 22 | debezium.source.offset.flush.interval.ms=60000 23 | 24 | debezium.source.database.hostname=localhost 25 | debezium.source.database.port=3306 26 | debezium.source.database.user=root 27 | #debezium.source.database.password=123456 28 | debezium.source.database.server.name=from_mysql 29 | debezium.source.include.schema.changes=false 30 | debezium.source.database.whitelist=mydb 31 | #debezium.source.table.include.list=mydb.products 32 | debezium.source.max.batch.size=200000 33 | debezium.source.max.queue.size=800000 34 | debezium.source.decimal.handling.mode=precise 35 | # debezium.source.database.ssl.mode=required 36 | # Run without Kafka, use local file to store checkpoints 37 | debezium.source.database.history=io.debezium.relational.history.FileDatabaseHistory 38 | debezium.source.database.history.file.filename=data/status.dat 39 | # do event flattening. unwrap message! 40 | # https://debezium.io/documentation/reference/1.2/configuration/event-flattening.html#extract-new-record-state-drop-tombstones 41 | debezium.transforms=unwrap,a 42 | debezium.transforms.unwrap.type=io.debezium.transforms.ExtractNewRecordState 43 | debezium.transforms.a.type=org.apache.kafka.connect.transforms.TimestampConverter$Value 44 | debezium.transforms.a.target.type=string 45 | debezium.transforms.a.field=a 46 | # datetime format 47 | debezium.transforms.a.format=yyyy-MM-dd 48 | debezium.source.time.precision.mode=connect 49 | #debezium.transforms.unwrap.add.fields=op,table,source.ts_ms,db 50 | # soft delete 51 | #debezium.transforms.unwrap.delete.handling.mode=rewrite 52 | #debezium.transforms.unwrap.drop.tombstones=false 53 | # hard delete 54 | debezium.transforms.unwrap.delete.handling.mode=none 55 | debezium.transforms.unwrap.drop.tombstones=false 56 | 57 | # ############ SET LOG LEVELS ############ 58 | quarkus.log.console.json=true 59 | # Ignore messages below warning level from Jetty, because it's a bit verbose 60 | quarkus.log.category."org.eclipse.jetty".level=WARN 61 | quarkus.log.file.path=./logs/debezium.log 62 | quarkus.log.file.rotation.max-file-size=5M 63 | quarkus.log.file.rotation.file-suffix=.yyyy-MM-dd.gz 64 | quarkus.log.file.rotation.max-backup-index=3 65 | quarkus.log.level=INFO 66 | quarkus.log.file.enable=true -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/main/resources/conf/application.properties.example: -------------------------------------------------------------------------------- 1 | debezium.sink.type=databend 2 | debezium.sink.databend.upsert=false 3 | debezium.sink.databend.upsert-keep-deletes=false 4 | debezium.sink.databend.database.databaseName=debezium 5 | debezium.sink.databend.database.url=jdbc:databend://localhost:8000 6 | debezium.sink.databend.database.username=databend 7 | debezium.sink.databend.database.password=databend 8 | debezium.sink.databend.database.primaryKey=id 9 | #debezium.sink.databend.database.tableName=products 10 | debezium.sink.databend.database.param.ssl=false 11 | # additional databend parameters 12 | 13 | # enable event schemas 14 | debezium.format.value.schemas.enable=true 15 | debezium.format.key.schemas.enable=true 16 | debezium.format.value=json 17 | debezium.format.key=json 18 | 19 | # mysql source 20 | debezium.source.connector.class=io.debezium.connector.mysql.MySqlConnector 21 | debezium.source.offset.storage.file.filename=data/offsets.dat 22 | debezium.source.offset.flush.interval.ms=60000 23 | 24 | debezium.source.database.hostname=localhost 25 | debezium.source.database.port=3306 26 | debezium.source.database.user=root 27 | #debezium.source.database.password=123456 28 | debezium.source.database.server.name=from_mysql 29 | debezium.source.include.schema.changes=false 30 | debezium.source.database.whitelist=mydb 31 | #debezium.source.table.include.list=mydb.products 32 | debezium.source.max.batch.size=200000 33 | debezium.source.max.queue.size=800000 34 | debezium.source.decimal.handling.mode=precise 35 | # debezium.source.database.ssl.mode=required 36 | # Run without Kafka, use local file to store checkpoints 37 | debezium.source.database.history=io.debezium.relational.history.FileDatabaseHistory 38 | debezium.source.database.history.file.filename=data/status.dat 39 | # do event flattening. unwrap message! 40 | # https://debezium.io/documentation/reference/1.2/configuration/event-flattening.html#extract-new-record-state-drop-tombstones 41 | #debezium.transforms=unwrap,a 42 | #debezium.transforms.unwrap.type=io.debezium.transforms.ExtractNewRecordState 43 | #debezium.transforms.a.type=org.apache.kafka.connect.transforms.TimestampConverter$Value 44 | #debezium.transforms.a.target.type=string 45 | #debezium.transforms.a.field=a 46 | # datetime format 47 | debezium.transforms.a.format=yyyy-MM-dd 48 | debezium.source.time.precision.mode=connect 49 | #debezium.transforms.unwrap.add.fields=op,table,source.ts_ms,db 50 | # soft delete 51 | #debezium.transforms.unwrap.delete.handling.mode=rewrite 52 | #debezium.transforms.unwrap.drop.tombstones=false 53 | # hard delete 54 | debezium.transforms.unwrap.delete.handling.mode=none 55 | debezium.transforms.unwrap.drop.tombstones=false 56 | 57 | # ############ SET LOG LEVELS ############ 58 | quarkus.log.console.json=true 59 | # Ignore messages below warning level from Jetty, because it's a bit verbose 60 | quarkus.log.category."org.eclipse.jetty".level=WARN 61 | quarkus.log.file.path=./logs/debezium.log 62 | quarkus.log.file.rotation.max-file-size=5M 63 | quarkus.log.file.rotation.file-suffix=.yyyy-MM-dd.gz 64 | quarkus.log.file.rotation.max-backup-index=3 65 | quarkus.log.level=INFO 66 | quarkus.log.file.enable=true -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/java/io/debezium/databend/ConfigSource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.databend; 10 | 11 | import io.debezium.server.TestConfigSource; 12 | 13 | public class ConfigSource extends TestConfigSource { 14 | 15 | public static final String TARGET_SCHEMA = "public"; 16 | 17 | public ConfigSource() { 18 | config.put("quarkus.profile", "postgresql"); 19 | // sink conf 20 | config.put("debezium.sink.type", "databend"); 21 | config.put("debezium.sink.databend.upsert", "true"); 22 | config.put("debezium.sink.databend.database.url","jdbc:databend://localhost:8000"); 23 | config.put("debezium.sink.databend.database.password","databend"); 24 | config.put("debezium.sink.databend.database.username","databend"); 25 | config.put("debezium.sink.databend.upsert-keep-deletes", "true"); 26 | config.put("debezium.sink.databend.database.databaseName", TARGET_SCHEMA); 27 | config.put("debezium.sink.databend.table-prefix", "debeziumcdc_"); 28 | // ==== configure batch behaviour/size ==== 29 | // Positive integer value that specifies the maximum size of each batch of events that should be processed during 30 | // each iteration of this connector. Defaults to 2048. 31 | //config.put("debezium.source.max.batch.size", "2048"); 32 | config.put("debezium.source.decimal.handling.mode", "double"); 33 | // enable disable schema 34 | config.put("debezium.format.value.schemas.enable", "true"); 35 | 36 | // debezium unwrap message 37 | config.put("debezium.transforms", "unwrap"); 38 | config.put("debezium.transforms.unwrap.type", "io.debezium.transforms.ExtractNewRecordState"); 39 | config.put("debezium.transforms.unwrap.add.fields", "op,table,source.ts_ms,db"); 40 | config.put("debezium.transforms.unwrap.delete.handling.mode", "rewrite"); 41 | config.put("debezium.transforms.unwrap.drop.tombstones", "true"); 42 | 43 | // DEBEZIUM SOURCE conf 44 | config.put("debezium.source.offset.storage.file.filename", "data/offsets.dat"); 45 | config.put("debezium.source.database.history", "io.debezium.relational.history.MemoryDatabaseHistory"); 46 | config.put("debezium.source.offset.flush.interval.ms", "60000"); 47 | config.put("debezium.source.database.server.name", "testc"); 48 | config.put("%postgresql.debezium.source.schema.whitelist", "inventory"); 49 | config.put("%postgresql.debezium.source.database.whitelist", "inventory"); 50 | config.put("debezium.source.table.whitelist", "inventory.*"); 51 | config.put("debezium.source.include.schema.changes", "false"); 52 | 53 | config.put("quarkus.log.level", "INFO"); 54 | } 55 | 56 | @Override 57 | public int getOrdinal() { 58 | // Configuration property precedence is based on ordinal values and since we override the 59 | // properties in TestConfigSource, we should give this a higher priority. 60 | return super.getOrdinal() + 1; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/main/java/io/debezium/server/databend/batchsizewait/MaxBatchSizeWait.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright hantmac Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.databend.batchsizewait; 10 | 11 | import io.debezium.DebeziumException; 12 | import io.debezium.config.CommonConnectorConfig; 13 | import io.debezium.server.databend.DebeziumMetrics; 14 | import org.eclipse.microprofile.config.inject.ConfigProperty; 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | 18 | import javax.enterprise.context.Dependent; 19 | import javax.inject.Inject; 20 | import javax.inject.Named; 21 | 22 | /** 23 | * Optimizes batch size around 85%-90% of max,batch.size using dynamically calculated sleep(ms) 24 | * 25 | * @author hantmac 26 | */ 27 | @Dependent 28 | @Named("MaxBatchSizeWait") 29 | public class MaxBatchSizeWait implements InterfaceBatchSizeWait { 30 | protected static final Logger LOGGER = LoggerFactory.getLogger(MaxBatchSizeWait.class); 31 | 32 | @ConfigProperty(name = "debezium.source.max.queue.size", defaultValue = CommonConnectorConfig.DEFAULT_MAX_QUEUE_SIZE + "") 33 | int maxQueueSize; 34 | @ConfigProperty(name = "debezium.source.max.batch.size", defaultValue = CommonConnectorConfig.DEFAULT_MAX_BATCH_SIZE + "") 35 | int maxBatchSize; 36 | @ConfigProperty(name = "debezium.sink.batch.batch-size-wait.max-wait-ms", defaultValue = "300000") 37 | int maxWaitMs; 38 | @ConfigProperty(name = "debezium.sink.batch.batch-size-wait.wait-interval-ms", defaultValue = "10000") 39 | int waitIntervalMs; 40 | 41 | @Inject 42 | DebeziumMetrics dbzMetrics; 43 | 44 | @Override 45 | public void initizalize() throws DebeziumException { 46 | assert waitIntervalMs < maxWaitMs : "`wait-interval-ms` cannot be bigger than `max-wait-ms`"; 47 | dbzMetrics.initizalize(); 48 | } 49 | 50 | @Override 51 | public void waitMs(Integer numRecordsProcessed, Integer processingTimeMs) throws InterruptedException { 52 | 53 | // don't wait if snapshot process is running 54 | if (dbzMetrics.snapshotRunning()) { 55 | return; 56 | } 57 | 58 | LOGGER.debug("Processed {}, QueueCurrentSize:{}, QueueTotalCapacity:{}, SecondsBehindSource:{}, SnapshotCompleted:{}", 59 | numRecordsProcessed, 60 | dbzMetrics.streamingQueueCurrentSize(), 61 | maxQueueSize, 62 | (int) (dbzMetrics.streamingMilliSecondsBehindSource() / 1000), 63 | dbzMetrics.snapshotCompleted() 64 | ); 65 | 66 | int totalWaitMs = 0; 67 | while (totalWaitMs < maxWaitMs && dbzMetrics.streamingQueueCurrentSize() < maxBatchSize) { 68 | totalWaitMs += waitIntervalMs; 69 | LOGGER.debug("Sleeping {} Milliseconds, QueueCurrentSize:{} < maxBatchSize:{}", 70 | waitIntervalMs, dbzMetrics.streamingQueueCurrentSize(), maxBatchSize); 71 | 72 | Thread.sleep(waitIntervalMs); 73 | } 74 | 75 | LOGGER.debug("Total wait {} Milliseconds, QueueCurrentSize:{} < maxBatchSize:{}", 76 | totalWaitMs, dbzMetrics.streamingQueueCurrentSize(), maxBatchSize); 77 | 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/java/io/debezium/databend/testresources/SourceMysqlDB.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.databend.testresources; 10 | 11 | import io.quarkus.test.common.QuarkusTestResourceLifecycleManager; 12 | 13 | import java.sql.Connection; 14 | import java.sql.DriverManager; 15 | import java.sql.SQLException; 16 | import java.sql.Statement; 17 | import java.time.Duration; 18 | import java.util.Map; 19 | import java.util.concurrent.ConcurrentHashMap; 20 | 21 | import org.slf4j.Logger; 22 | import org.slf4j.LoggerFactory; 23 | import org.testcontainers.containers.GenericContainer; 24 | import org.testcontainers.containers.wait.strategy.Wait; 25 | 26 | public class SourceMysqlDB implements QuarkusTestResourceLifecycleManager { 27 | 28 | public static final String MYSQL_ROOT_PASSWORD = "debezium"; 29 | public static final String MYSQL_USER = "mysqluser"; 30 | public static final String MYSQL_PASSWORD = "mysqlpw"; 31 | public static final String MYSQL_DEBEZIUM_USER = "debezium"; 32 | public static final String MYSQL_DEBEZIUM_PASSWORD = "dbz"; 33 | public static final String MYSQL_IMAGE = "debezium/example-mysql:1.9.2.Final"; 34 | public static final String MYSQL_HOST = "127.0.0.1"; 35 | public static final String MYSQL_DATABASE = "inventory"; 36 | public static final Integer MYSQL_PORT_DEFAULT = 3306; 37 | private static final Logger LOGGER = LoggerFactory.getLogger(SourceMysqlDB.class); 38 | 39 | static private final GenericContainer container = new GenericContainer<>(MYSQL_IMAGE) 40 | .waitingFor(Wait.forLogMessage(".*mysqld: ready for connections.*", 2)) 41 | .withEnv("MYSQL_USER", MYSQL_USER) 42 | .withEnv("MYSQL_PASSWORD", MYSQL_PASSWORD) 43 | .withEnv("MYSQL_ROOT_PASSWORD", MYSQL_ROOT_PASSWORD) 44 | .withExposedPorts(MYSQL_PORT_DEFAULT) 45 | .withStartupTimeout(Duration.ofSeconds(30)); 46 | 47 | public static void runSQL(String query) throws SQLException, ClassNotFoundException { 48 | try { 49 | String url = "jdbc:mysql://" + MYSQL_HOST + ":" + container.getMappedPort(MYSQL_PORT_DEFAULT) + "/" + MYSQL_DATABASE + "?useSSL=false"; 50 | Class.forName("com.mysql.cj.jdbc.Driver"); 51 | Connection con = DriverManager.getConnection(url, MYSQL_USER, MYSQL_PASSWORD); 52 | Statement st = con.createStatement(); 53 | st.execute(query); 54 | con.close(); 55 | } catch (Exception e) { 56 | LOGGER.error(query); 57 | throw e; 58 | } 59 | } 60 | 61 | @Override 62 | public Map start() { 63 | container.start(); 64 | 65 | Map params = new ConcurrentHashMap<>(); 66 | params.put("%mysql.debezium.source.database.hostname", MYSQL_HOST); 67 | params.put("%mysql.debezium.source.database.port", container.getMappedPort(MYSQL_PORT_DEFAULT).toString()); 68 | params.put("%mysql.debezium.source.database.user", MYSQL_DEBEZIUM_USER); 69 | params.put("%mysql.debezium.source.database.password", MYSQL_DEBEZIUM_PASSWORD); 70 | params.put("%mysql.debezium.source.database.dbname", MYSQL_DATABASE); 71 | return params; 72 | } 73 | 74 | @Override 75 | public void stop() { 76 | if (container != null) { 77 | container.stop(); 78 | } 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/java/io/debezium/databend/testresources/SourcePostgresqlDB.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.databend.testresources; 10 | 11 | import io.quarkus.test.common.QuarkusTestResourceLifecycleManager; 12 | 13 | import java.sql.Connection; 14 | import java.sql.DriverManager; 15 | import java.sql.SQLException; 16 | import java.sql.Statement; 17 | import java.time.Duration; 18 | import java.util.Map; 19 | import java.util.concurrent.ConcurrentHashMap; 20 | 21 | import org.slf4j.Logger; 22 | import org.slf4j.LoggerFactory; 23 | import org.testcontainers.containers.GenericContainer; 24 | import org.testcontainers.containers.wait.strategy.Wait; 25 | 26 | public class SourcePostgresqlDB implements QuarkusTestResourceLifecycleManager { 27 | 28 | public static final String POSTGRES_USER = "postgres"; 29 | public static final String POSTGRES_PASSWORD = "postgres"; 30 | public static final String POSTGRES_DBNAME = "postgres"; 31 | public static final String POSTGRES_IMAGE = "debezium/example-postgres:1.9.2.Final"; 32 | public static final String POSTGRES_HOST = "localhost"; 33 | public static final Integer POSTGRES_PORT_DEFAULT = 5432; 34 | private static final Logger LOGGER = LoggerFactory.getLogger(SourcePostgresqlDB.class); 35 | 36 | private static GenericContainer container = new GenericContainer<>(POSTGRES_IMAGE) 37 | .waitingFor(Wait.forLogMessage(".*database system is ready to accept connections.*", 2)) 38 | .withEnv("POSTGRES_USER", POSTGRES_USER) 39 | .withEnv("POSTGRES_PASSWORD", POSTGRES_PASSWORD) 40 | .withEnv("POSTGRES_DB", POSTGRES_DBNAME) 41 | .withEnv("POSTGRES_INITDB_ARGS", "-E UTF8") 42 | .withEnv("LANG", "en_US.utf8") 43 | .withExposedPorts(POSTGRES_PORT_DEFAULT) 44 | .withStartupTimeout(Duration.ofSeconds(30)); 45 | 46 | public static void runSQL(String query) throws SQLException, ClassNotFoundException { 47 | try { 48 | 49 | String url = "jdbc:postgresql://" + POSTGRES_HOST + ":" + container.getMappedPort(POSTGRES_PORT_DEFAULT) + "/" + POSTGRES_DBNAME; 50 | Class.forName("org.postgresql.Driver"); 51 | Connection con = DriverManager.getConnection(url, POSTGRES_USER, POSTGRES_PASSWORD); 52 | Statement st = con.createStatement(); 53 | st.execute(query); 54 | con.close(); 55 | } catch (Exception e) { 56 | e.printStackTrace(); 57 | throw e; 58 | } 59 | } 60 | 61 | @Override 62 | public Map start() { 63 | container.start(); 64 | 65 | Map params = new ConcurrentHashMap<>(); 66 | params.put("debezium.source.connector.class", "io.debezium.connector.postgresql.PostgresConnector"); 67 | params.put("debezium.source.database.hostname", POSTGRES_HOST); 68 | params.put("debezium.source.database.port", container.getMappedPort(POSTGRES_PORT_DEFAULT).toString()); 69 | params.put("debezium.source.database.user", POSTGRES_USER); 70 | params.put("debezium.source.database.password", POSTGRES_PASSWORD); 71 | params.put("debezium.source.database.dbname", POSTGRES_DBNAME); 72 | return params; 73 | } 74 | 75 | @Override 76 | public void stop() { 77 | if (container != null) { 78 | container.stop(); 79 | } 80 | } 81 | 82 | } 83 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/main/java/io/debezium/server/databend/DebeziumMetrics.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright hantmac Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.databend; 10 | 11 | import io.debezium.DebeziumException; 12 | import io.debezium.config.CommonConnectorConfig; 13 | 14 | import java.lang.management.ManagementFactory; 15 | import java.util.Optional; 16 | import javax.enterprise.context.Dependent; 17 | import javax.management.MBeanServer; 18 | import javax.management.ObjectName; 19 | 20 | import org.eclipse.microprofile.config.inject.ConfigProperty; 21 | import org.slf4j.Logger; 22 | import org.slf4j.LoggerFactory; 23 | 24 | /** 25 | * @author hantmac 26 | */ 27 | @Dependent 28 | public class DebeziumMetrics { 29 | protected static final Logger LOGGER = LoggerFactory.getLogger(DebeziumMetrics.class); 30 | final MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer(); 31 | @ConfigProperty(name = "debezium.sink.batch.metrics.snapshot-mbean", defaultValue = "") 32 | Optional snapshotMbean; 33 | @ConfigProperty(name = "debezium.sink.batch.metrics.streaming-mbean", defaultValue = "") 34 | Optional streamingMbean; 35 | @ConfigProperty(name = "debezium.source.max.queue.size", defaultValue = CommonConnectorConfig.DEFAULT_MAX_QUEUE_SIZE + "") 36 | int maxQueueSize; 37 | 38 | ObjectName snapshotMetricsObjectName; 39 | ObjectName streamingMetricsObjectName; 40 | 41 | public void initizalize() throws DebeziumException { 42 | assert snapshotMbean.isPresent() : 43 | "Snapshot metrics Mbean `debezium.sink.batch.metrics.snapshot-mbean` not provided"; 44 | assert streamingMbean.isPresent() : 45 | "Streaming metrics Mbean `debezium.sink.batch.metrics.streaming-mbean` not provided"; 46 | try { 47 | snapshotMetricsObjectName = new ObjectName(snapshotMbean.get()); 48 | streamingMetricsObjectName = new ObjectName(streamingMbean.get()); 49 | } catch (Exception e) { 50 | throw new DebeziumException(e); 51 | } 52 | } 53 | 54 | public boolean snapshotRunning() { 55 | try { 56 | return (boolean) mbeanServer.getAttribute(snapshotMetricsObjectName, "SnapshotRunning"); 57 | } catch (Exception e) { 58 | throw new DebeziumException(e); 59 | } 60 | } 61 | 62 | public boolean snapshotCompleted() { 63 | try { 64 | return (boolean) mbeanServer.getAttribute(snapshotMetricsObjectName, "SnapshotCompleted"); 65 | } catch (Exception e) { 66 | throw new DebeziumException(e); 67 | } 68 | } 69 | 70 | public int streamingQueueRemainingCapacity() { 71 | try { 72 | return (int) mbeanServer.getAttribute(streamingMetricsObjectName, "QueueRemainingCapacity"); 73 | } catch (Exception e) { 74 | throw new DebeziumException(e); 75 | } 76 | } 77 | 78 | public int streamingQueueCurrentSize() { 79 | return maxQueueSize - streamingQueueRemainingCapacity(); 80 | } 81 | 82 | public long streamingMilliSecondsBehindSource() { 83 | try { 84 | return (long) mbeanServer.getAttribute(streamingMetricsObjectName, "MilliSecondsBehindSource"); 85 | } catch (Exception e) { 86 | throw new DebeziumException(e); 87 | } 88 | } 89 | 90 | } 91 | -------------------------------------------------------------------------------- /debezium-server-databend-dist/target/classes/distro/debezium.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import jnius_config 3 | import logging 4 | import os 5 | import sys 6 | ##### loggger 7 | import threading 8 | from pathlib import Path 9 | 10 | log = logging.getLogger(name="debezium") 11 | log.setLevel(logging.INFO) 12 | handler = logging.StreamHandler(sys.stdout) 13 | handler.setLevel(logging.INFO) 14 | formatter = logging.Formatter('%(asctime)s %(levelname)s [%(module)s] (%(funcName)s) %(message)s') 15 | handler.setFormatter(formatter) 16 | log.addHandler(handler) 17 | 18 | 19 | ##### 20 | 21 | class Debezium(): 22 | 23 | def __init__(self, debezium_dir: str = None, conf_dir: str = None, java_home: str = None): 24 | if debezium_dir is None: 25 | self.debezium_server_dir: Path = Path(__file__).resolve().parent 26 | else: 27 | if not Path(debezium_dir).is_dir(): 28 | raise Exception("Debezium Server directory '%s' not found" % debezium_dir) 29 | self.debezium_server_dir: Path = Path(debezium_dir) 30 | log.info("Setting Debezium dir to:%s" % self.debezium_server_dir.as_posix()) 31 | 32 | if conf_dir is None: 33 | self.conf_dir = self.debezium_server_dir.joinpath("conf") 34 | else: 35 | if not Path(conf_dir).is_dir(): 36 | raise Exception("Debezium conf directory '%s' not found" % conf_dir) 37 | self.conf_dir: Path = Path(conf_dir) 38 | log.info("Setting conf dir to:%s" % self.conf_dir.as_posix()) 39 | 40 | ##### jnius 41 | if java_home: 42 | self.java_home(java_home=java_home) 43 | 44 | DEBEZIUM_CLASSPATH: list = [ 45 | self.debezium_server_dir.joinpath('*').as_posix(), 46 | self.debezium_server_dir.joinpath("lib/*").as_posix(), 47 | self.conf_dir.as_posix()] 48 | self.add_classpath(*DEBEZIUM_CLASSPATH) 49 | 50 | def add_classpath(self, *claspath): 51 | if jnius_config.vm_running: 52 | raise ValueError( 53 | "VM is already running, can't set classpath/options; VM started at %s" % jnius_config.vm_started_at) 54 | 55 | jnius_config.add_classpath(*claspath) 56 | log.info("VM Classpath: %s" % jnius_config.get_classpath()) 57 | 58 | def java_home(self, java_home: str): 59 | if jnius_config.vm_running: 60 | raise ValueError("VM is already running, can't set java home; VM started at" + jnius_config.vm_started_at) 61 | 62 | os.putenv("JAVA_HOME", java_home) 63 | os.environ["JAVA_HOME"] = java_home 64 | log.info("JAVA_HOME set to %s" % java_home) 65 | 66 | # pylint: disable=no-name-in-module 67 | def run(self, *args: str): 68 | 69 | try: 70 | jnius_config.add_options(*args) 71 | log.info("Configured jvm options:%s" % jnius_config.get_options()) 72 | 73 | from jnius import autoclass 74 | DebeziumServer = autoclass('io.debezium.server.Main') 75 | _dbz = DebeziumServer() 76 | return _dbz.main() 77 | finally: 78 | from jnius import detach 79 | detach() 80 | 81 | 82 | class DebeziumRunAsyn(threading.Thread): 83 | def __init__(self, debezium_dir: str, java_args: list, java_home: str = None): 84 | threading.Thread.__init__(self) 85 | self.debezium_dir = debezium_dir 86 | self.java_args = java_args 87 | self.java_home = java_home 88 | self._dbz: Debezium = None 89 | 90 | def run(self): 91 | self._dbz = Debezium(debezium_dir=self.debezium_dir, java_home=self.java_home) 92 | return self._dbz.run(*self.java_args) 93 | 94 | 95 | def main(): 96 | parser = argparse.ArgumentParser() 97 | parser.add_argument('--debezium_dir', type=str, default=None, 98 | help='Directory of debezium server application') 99 | parser.add_argument('--conf_dir', type=str, default=None, 100 | help='Directory of application.properties') 101 | parser.add_argument('--java_home', type=str, default=None, 102 | help='JAVA_HOME directory') 103 | _args, args = parser.parse_known_args() 104 | ds = Debezium(debezium_dir=_args.debezium_dir, conf_dir=_args.conf_dir, java_home=_args.java_home) 105 | ds.run(*args) 106 | 107 | 108 | if __name__ == '__main__': 109 | main() 110 | -------------------------------------------------------------------------------- /debezium-server-databend-dist/src/main/resources/distro/debezium.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import jnius_config 3 | import logging 4 | import os 5 | import sys 6 | ##### loggger 7 | import threading 8 | from pathlib import Path 9 | 10 | log = logging.getLogger(name="debezium") 11 | log.setLevel(logging.INFO) 12 | handler = logging.StreamHandler(sys.stdout) 13 | handler.setLevel(logging.INFO) 14 | formatter = logging.Formatter('%(asctime)s %(levelname)s [%(module)s] (%(funcName)s) %(message)s') 15 | handler.setFormatter(formatter) 16 | log.addHandler(handler) 17 | 18 | 19 | ##### 20 | 21 | class Debezium(): 22 | 23 | def __init__(self, debezium_dir: str = None, conf_dir: str = None, java_home: str = None): 24 | if debezium_dir is None: 25 | self.debezium_server_dir: Path = Path(__file__).resolve().parent 26 | else: 27 | if not Path(debezium_dir).is_dir(): 28 | raise Exception("Debezium Server directory '%s' not found" % debezium_dir) 29 | self.debezium_server_dir: Path = Path(debezium_dir) 30 | log.info("Setting Debezium dir to:%s" % self.debezium_server_dir.as_posix()) 31 | 32 | if conf_dir is None: 33 | self.conf_dir = self.debezium_server_dir.joinpath("conf") 34 | else: 35 | if not Path(conf_dir).is_dir(): 36 | raise Exception("Debezium conf directory '%s' not found" % conf_dir) 37 | self.conf_dir: Path = Path(conf_dir) 38 | log.info("Setting conf dir to:%s" % self.conf_dir.as_posix()) 39 | 40 | ##### jnius 41 | if java_home: 42 | self.java_home(java_home=java_home) 43 | 44 | DEBEZIUM_CLASSPATH: list = [ 45 | self.debezium_server_dir.joinpath('*').as_posix(), 46 | self.debezium_server_dir.joinpath("lib/*").as_posix(), 47 | self.conf_dir.as_posix()] 48 | self.add_classpath(*DEBEZIUM_CLASSPATH) 49 | 50 | def add_classpath(self, *claspath): 51 | if jnius_config.vm_running: 52 | raise ValueError( 53 | "VM is already running, can't set classpath/options; VM started at %s" % jnius_config.vm_started_at) 54 | 55 | jnius_config.add_classpath(*claspath) 56 | log.info("VM Classpath: %s" % jnius_config.get_classpath()) 57 | 58 | def java_home(self, java_home: str): 59 | if jnius_config.vm_running: 60 | raise ValueError("VM is already running, can't set java home; VM started at" + jnius_config.vm_started_at) 61 | 62 | os.putenv("JAVA_HOME", java_home) 63 | os.environ["JAVA_HOME"] = java_home 64 | log.info("JAVA_HOME set to %s" % java_home) 65 | 66 | # pylint: disable=no-name-in-module 67 | def run(self, *args: str): 68 | 69 | try: 70 | jnius_config.add_options(*args) 71 | log.info("Configured jvm options:%s" % jnius_config.get_options()) 72 | 73 | from jnius import autoclass 74 | DebeziumServer = autoclass('io.debezium.server.Main') 75 | _dbz = DebeziumServer() 76 | return _dbz.main() 77 | finally: 78 | from jnius import detach 79 | detach() 80 | 81 | 82 | class DebeziumRunAsyn(threading.Thread): 83 | def __init__(self, debezium_dir: str, java_args: list, java_home: str = None): 84 | threading.Thread.__init__(self) 85 | self.debezium_dir = debezium_dir 86 | self.java_args = java_args 87 | self.java_home = java_home 88 | self._dbz: Debezium = None 89 | 90 | def run(self): 91 | self._dbz = Debezium(debezium_dir=self.debezium_dir, java_home=self.java_home) 92 | return self._dbz.run(*self.java_args) 93 | 94 | 95 | def main(): 96 | parser = argparse.ArgumentParser() 97 | parser.add_argument('--debezium_dir', type=str, default=None, 98 | help='Directory of debezium server application') 99 | parser.add_argument('--conf_dir', type=str, default=None, 100 | help='Directory of application.properties') 101 | parser.add_argument('--java_home', type=str, default=None, 102 | help='JAVA_HOME directory') 103 | _args, args = parser.parse_known_args() 104 | ds = Debezium(debezium_dir=_args.debezium_dir, conf_dir=_args.conf_dir, java_home=_args.java_home) 105 | ds.run(*args) 106 | 107 | 108 | if __name__ == '__main__': 109 | main() 110 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/java/io/debezium/databend/DatabendChangeConsumerUpsertTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.databend; 10 | 11 | import io.debezium.databend.testresources.BaseDbTest; 12 | import io.debezium.databend.testresources.TargetDatabendDB; 13 | import io.debezium.databend.testresources.TestChangeEvent; 14 | import io.debezium.databend.testresources.TestUtil; 15 | import io.debezium.server.databend.DatabendChangeConsumer; 16 | import io.quarkus.test.common.QuarkusTestResource; 17 | import io.quarkus.test.junit.QuarkusTest; 18 | import io.quarkus.test.junit.QuarkusTestProfile; 19 | import io.quarkus.test.junit.TestProfile; 20 | import org.jooq.meta.derby.sys.Sys; 21 | import org.junit.jupiter.api.AfterEach; 22 | import org.junit.jupiter.api.Assertions; 23 | import org.junit.jupiter.api.Test; 24 | 25 | import javax.inject.Inject; 26 | import java.sql.ResultSet; 27 | import java.sql.SQLException; 28 | import java.util.ArrayList; 29 | import java.util.HashMap; 30 | import java.util.List; 31 | import java.util.Map; 32 | 33 | /** 34 | * @author hantmac 35 | */ 36 | @QuarkusTest 37 | @QuarkusTestResource(TargetDatabendDB.class) 38 | @TestProfile(DatabendChangeConsumerUpsertTest.DatabendChangeConsumerUpsertTestProfile.class) 39 | public class DatabendChangeConsumerUpsertTest extends BaseDbTest { 40 | @Inject 41 | DatabendChangeConsumer consumer; 42 | 43 | @Test 44 | public void testSimpleUpload() throws Exception { 45 | consumer.connection = new TargetDatabendDB().createConnection(); 46 | 47 | String dest = "customers_upsert"; 48 | List> records = new ArrayList<>(); 49 | records.add(TestChangeEvent.of(dest, 1, "c")); 50 | records.add(TestChangeEvent.of(dest, 2, "c")); 51 | records.add(TestChangeEvent.of(dest, 3, "c")); 52 | consumer.handleBatch(records, TestUtil.getCommitter()); 53 | // check that its consumed! 54 | ResultSet rs = getDatabendTableData("select * from public.debeziumcdc_customers_upsert"); 55 | Assertions.assertEquals(getResultSetRowCount(rs), 3); 56 | ResultSet rs1 = getDatabendTableData("select * from public.debeziumcdc_customers_upsert where id =3"); 57 | if (rs1.next()) { 58 | int id = rs1.getInt("id"); 59 | Assertions.assertEquals(3, id); 60 | } else { 61 | throw new Exception("failed to get correct data"); 62 | } 63 | 64 | // 3 records should be updated 4th one should be inserted 65 | records.clear(); 66 | records.add(TestChangeEvent.of(dest, 1, "r")); 67 | records.add(TestChangeEvent.of(dest, 2, "d")); 68 | records.add(TestChangeEvent.of(dest, 3, "u", "UpdatednameV1")); 69 | records.add(TestChangeEvent.of(dest, 4, "c")); 70 | consumer.handleBatch(records, TestUtil.getCommitter()); 71 | ResultSet rsR = getDatabendTableData("select * from public.debeziumcdc_customers_upsert where id = 1 AND __op= 'r'"); 72 | Assertions.assertEquals(getResultSetRowCount(rsR), 1); 73 | ResultSet rsD = getDatabendTableData("select * from public.debeziumcdc_customers_upsert where id = 2 AND __op= 'd'"); 74 | Assertions.assertEquals(getResultSetRowCount(rsD), 1); 75 | ResultSet rsU = getDatabendTableData("select * from public.debeziumcdc_customers_upsert where id = 3 AND __op= 'u'"); 76 | Assertions.assertEquals(getResultSetRowCount(rsU), 1); 77 | ResultSet rsUName = getDatabendTableData("select * from public.debeziumcdc_customers_upsert where id = 3 AND first_name= 'UpdatednameV1'"); 78 | Assertions.assertEquals(getResultSetRowCount(rsUName), 1); 79 | } 80 | 81 | // @AfterEach 82 | // public void clearData() throws SQLException, ClassNotFoundException { 83 | //// ResultSet rs = select("delete from public.debeziumcdc_customers_upsert"); 84 | // ResultSet rs = select("Drop database if exists public"); 85 | // } 86 | 87 | public static class DatabendChangeConsumerUpsertTestProfile implements QuarkusTestProfile { 88 | 89 | @Override 90 | public Map getConfigOverrides() { 91 | Map config = new HashMap<>(); 92 | 93 | config.put("debezium.sink.databend.upsert", "true"); 94 | config.put("debezium.sink.databend.upsert-keep-deletes", "true"); 95 | return config; 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/java/io/debezium/databend/DatabendChangeConsumerDeleteTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.databend; 10 | 11 | import io.debezium.databend.testresources.BaseDbTest; 12 | import io.debezium.databend.testresources.TargetDatabendDB; 13 | import io.debezium.databend.testresources.TestChangeEvent; 14 | import io.debezium.databend.testresources.TestUtil; 15 | import io.debezium.server.databend.DatabendChangeConsumer; 16 | import io.quarkus.test.common.QuarkusTestResource; 17 | import io.quarkus.test.junit.QuarkusTest; 18 | import io.quarkus.test.junit.QuarkusTestProfile; 19 | import io.quarkus.test.junit.TestProfile; 20 | import org.jooq.meta.derby.sys.Sys; 21 | import org.junit.jupiter.api.*; 22 | 23 | import javax.inject.Inject; 24 | import java.sql.Connection; 25 | import java.sql.ResultSet; 26 | import java.sql.SQLException; 27 | import java.util.ArrayList; 28 | import java.util.HashMap; 29 | import java.util.List; 30 | import java.util.Map; 31 | 32 | /** 33 | * @author hantmac 34 | */ 35 | @QuarkusTest 36 | @QuarkusTestResource(TargetDatabendDB.class) 37 | @TestProfile(DatabendChangeConsumerDeleteTest.DatabendChangeConsumerDeleteProfile.class) 38 | public class DatabendChangeConsumerDeleteTest extends BaseDbTest { 39 | @Inject 40 | DatabendChangeConsumer consumer; 41 | public static Connection connection; 42 | @BeforeAll 43 | static void beforeAll() throws Exception { 44 | // CREATE TES TABLE USING JOOQ 45 | TargetDatabendDB targetDatabendDB = new TargetDatabendDB(); 46 | targetDatabendDB.start(); 47 | 48 | connection = targetDatabendDB.createConnection(); 49 | connection.createStatement().execute("CREATE DATABASE if not exists " + targetDatabendDB.DB_DATABASE); 50 | } 51 | 52 | @Test 53 | public void testSimpleUpload() throws Exception { 54 | consumer.connection = new TargetDatabendDB().createConnection(); 55 | 56 | String dest = "customers_delete"; 57 | List> records = new ArrayList<>(); 58 | records.add(TestChangeEvent.of(dest, 1, "c")); 59 | records.add(TestChangeEvent.of(dest, 2, "c")); 60 | records.add(TestChangeEvent.of(dest, 3, "c")); 61 | consumer.handleBatch(records, TestUtil.getCommitter()); 62 | // check that its consumed! 63 | ResultSet rs = getDatabendTableData("select * from public.debeziumcdc_customers_delete"); 64 | Assertions.assertEquals(getResultSetRowCount(rs), 3); 65 | ResultSet rs1 = getDatabendTableData("select * from public.debeziumcdc_customers_delete where id =3"); 66 | if (rs1.next()) { 67 | int id = rs1.getInt("id"); 68 | Assertions.assertEquals(3, id); 69 | } else { 70 | throw new Exception("failed to get correct data"); 71 | } 72 | 73 | // 3 records should be updated 4th one should be inserted 74 | records.clear(); 75 | records.add(TestChangeEvent.of(dest, 1, "r")); 76 | records.add(TestChangeEvent.of(dest, 2, "d")); 77 | records.add(TestChangeEvent.of(dest, 3, "u", "UpdatednameV1")); 78 | records.add(TestChangeEvent.of(dest, 4, "c")); 79 | consumer.handleBatch(records, TestUtil.getCommitter()); 80 | ResultSet rsR = getDatabendTableData("select * from public.debeziumcdc_customers_delete where id = 1 AND __op= 'r'"); 81 | Assertions.assertEquals(getResultSetRowCount(rsR), 1); 82 | ResultSet rsD = getDatabendTableData("select * from public.debeziumcdc_customers_delete where id = 2 AND __op= 'd'"); 83 | Assertions.assertEquals(getResultSetRowCount(rsD), 1); 84 | ResultSet rsU = getDatabendTableData("select * from public.debeziumcdc_customers_delete where id = 3 AND __op= 'u'"); 85 | Assertions.assertEquals(getResultSetRowCount(rsU), 1); 86 | ResultSet rsUName = getDatabendTableData("select * from public.debeziumcdc_customers_delete where id = 3 AND first_name= 'UpdatednameV1'"); 87 | Assertions.assertEquals(getResultSetRowCount(rsUName), 1); 88 | } 89 | 90 | // @AfterEach 91 | // public void clearData() throws SQLException, ClassNotFoundException { 92 | // ResultSet rs = select("Drop database if exists public"); 93 | // } 94 | 95 | public static class DatabendChangeConsumerDeleteProfile implements QuarkusTestProfile { 96 | 97 | @Override 98 | public Map getConfigOverrides() { 99 | Map config = new HashMap<>(); 100 | 101 | config.put("debezium.sink.databend.upsert", "true"); 102 | config.put("debezium.sink.databend.upsert-keep-deletes", "false"); 103 | return config; 104 | } 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/resources/json/serde-with-schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": { 3 | "type": "struct", 4 | "fields": [ 5 | { 6 | "type": "struct", 7 | "fields": [ 8 | { 9 | "type": "int32", 10 | "optional": false, 11 | "field": "id" 12 | }, 13 | { 14 | "type": "string", 15 | "optional": false, 16 | "field": "first_name" 17 | }, 18 | { 19 | "type": "string", 20 | "optional": false, 21 | "field": "last_name" 22 | }, 23 | { 24 | "type": "string", 25 | "optional": false, 26 | "field": "email" 27 | } 28 | ], 29 | "optional": true, 30 | "name": "mysql-server-1.inventory.customers.Value", 31 | "field": "before" 32 | }, 33 | { 34 | "type": "struct", 35 | "fields": [ 36 | { 37 | "type": "int32", 38 | "optional": false, 39 | "field": "id" 40 | }, 41 | { 42 | "type": "string", 43 | "optional": false, 44 | "field": "first_name" 45 | }, 46 | { 47 | "type": "string", 48 | "optional": false, 49 | "field": "last_name" 50 | }, 51 | { 52 | "type": "string", 53 | "optional": false, 54 | "field": "email" 55 | } 56 | ], 57 | "optional": true, 58 | "name": "mysql-server-1.inventory.customers.Value", 59 | "field": "after" 60 | }, 61 | { 62 | "type": "struct", 63 | "fields": [ 64 | { 65 | "type": "string", 66 | "optional": false, 67 | "field": "version" 68 | }, 69 | { 70 | "type": "string", 71 | "optional": false, 72 | "field": "connector" 73 | }, 74 | { 75 | "type": "string", 76 | "optional": false, 77 | "field": "name" 78 | }, 79 | { 80 | "type": "int64", 81 | "optional": false, 82 | "field": "ts_ms" 83 | }, 84 | { 85 | "type": "boolean", 86 | "optional": true, 87 | "default": false, 88 | "field": "snapshot" 89 | }, 90 | { 91 | "type": "string", 92 | "optional": false, 93 | "field": "db" 94 | }, 95 | { 96 | "type": "string", 97 | "optional": true, 98 | "field": "table" 99 | }, 100 | { 101 | "type": "int64", 102 | "optional": false, 103 | "field": "server_id" 104 | }, 105 | { 106 | "type": "string", 107 | "optional": true, 108 | "field": "gtid" 109 | }, 110 | { 111 | "type": "string", 112 | "optional": false, 113 | "field": "file" 114 | }, 115 | { 116 | "type": "int64", 117 | "optional": false, 118 | "field": "pos" 119 | }, 120 | { 121 | "type": "int32", 122 | "optional": false, 123 | "field": "row" 124 | }, 125 | { 126 | "type": "int64", 127 | "optional": true, 128 | "field": "thread" 129 | }, 130 | { 131 | "type": "string", 132 | "optional": true, 133 | "field": "query" 134 | } 135 | ], 136 | "optional": false, 137 | "name": "io.debezium.connector.mysql.Source", 138 | "field": "source" 139 | }, 140 | { 141 | "type": "string", 142 | "optional": false, 143 | "field": "op" 144 | }, 145 | { 146 | "type": "int64", 147 | "optional": true, 148 | "field": "ts_ms" 149 | } 150 | ], 151 | "optional": false, 152 | "name": "mysql-server-1.inventory.customers.Envelope" 153 | }, 154 | "payload": { 155 | "op": "c", 156 | "ts_ms": 1465491411815, 157 | "before": null, 158 | "after": { 159 | "id": 1004, 160 | "first_name": "Anne", 161 | "last_name": "Kretchmar", 162 | "email": "annek@noanswer.org" 163 | }, 164 | "source": { 165 | "version": "0.10.0.Final", 166 | "connector": "mysql", 167 | "name": "mysql-server-1", 168 | "ts_ms": 0, 169 | "snapshot": false, 170 | "db": "inventory", 171 | "table": "customers", 172 | "server_id": 0, 173 | "gtid": null, 174 | "file": "mysql-bin.000003", 175 | "pos": 154, 176 | "row": 0, 177 | "thread": 7, 178 | "query": "INSERT INTO customers (first_name, last_name, email) VALUES ('Anne', 'Kretchmar', 'annek@noanswer.org')" 179 | } 180 | } 181 | } -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/BaseTableWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.databend.tablewriter; 10 | 11 | import com.fasterxml.jackson.databind.JsonNode; 12 | import io.debezium.server.databend.DatabendChangeConsumer; 13 | import io.debezium.server.databend.DatabendChangeEvent; 14 | import io.debezium.server.databend.DatabendUtil; 15 | import org.eclipse.microprofile.config.inject.ConfigProperty; 16 | import org.slf4j.Logger; 17 | import org.slf4j.LoggerFactory; 18 | 19 | import javax.enterprise.context.Dependent; 20 | import java.math.BigDecimal; 21 | import java.math.BigInteger; 22 | import java.sql.Connection; 23 | import java.sql.*; 24 | import java.sql.SQLException; 25 | import java.util.*; 26 | import java.util.regex.Matcher; 27 | import java.util.regex.Pattern; 28 | 29 | import static io.debezium.server.databend.DatabendUtil.addParametersToStatement; 30 | 31 | public abstract class BaseTableWriter { 32 | 33 | protected static final Logger LOGGER = LoggerFactory.getLogger(BaseTableWriter.class); 34 | final Connection connection; 35 | final String identifierQuoteCharacter; 36 | final boolean isSchemaEvolutionEnabled; 37 | 38 | public BaseTableWriter(final Connection connection, String identifierQuoteCharacter, boolean isSchemaEvolutionEnabled) { 39 | this.connection = connection; 40 | this.identifierQuoteCharacter = identifierQuoteCharacter; 41 | this.isSchemaEvolutionEnabled = isSchemaEvolutionEnabled; 42 | } 43 | 44 | public void addToTable(final RelationalTable table, final List events) { 45 | final String sql = table.prepareInsertStatement(this.identifierQuoteCharacter); 46 | int inserts = 0; 47 | List schemaEvolutionEvents = new ArrayList<>(); 48 | try (PreparedStatement statement = connection.prepareStatement(sql)) { 49 | connection.setAutoCommit(false); 50 | for (DatabendChangeEvent event : events) { 51 | if (DatabendUtil.isSchemaChanged(event.schema()) && isSchemaEvolutionEnabled) { 52 | schemaEvolutionEvents.add(event); 53 | } else { 54 | addParametersToStatement(statement, event); 55 | statement.addBatch(); 56 | } 57 | } 58 | 59 | // Each batch needs to have the same schemas, so get the buffered records out 60 | int[] batchResult = statement.executeBatch(); 61 | inserts = Arrays.stream(batchResult).sum(); 62 | System.out.printf("insert rows %d%n", inserts); 63 | } catch (SQLException e) { 64 | throw new RuntimeException(e.getMessage()); 65 | } 66 | // handle schema evolution 67 | try { 68 | schemaEvolution(table, schemaEvolutionEvents); 69 | } catch (Exception e) { 70 | throw new RuntimeException(e.getMessage()); 71 | } 72 | } 73 | 74 | public void schemaEvolution(RelationalTable table, List events) { 75 | for (DatabendChangeEvent event : events) { 76 | Map values = event.valueAsMap(); 77 | for (Map.Entry entry : values.entrySet()) { 78 | // String key = entry.getKey(); 79 | // Object value = entry.getValue(); 80 | // System.out.println("Key: " + key + ", Value: " + value); 81 | if (entry.getKey().contains("ddl") && entry.getValue().toString().toLowerCase().contains("alter table")) { 82 | String tableName = getFirstWordAfterAlterTable(entry.getValue().toString()); 83 | String ddlSql = replaceFirstWordAfterTable(entry.getValue().toString(), table.databaseName + "." + tableName); 84 | try (PreparedStatement statement = connection.prepareStatement(ddlSql)) { 85 | System.out.println(ddlSql); 86 | statement.execute(ddlSql); 87 | } catch (SQLException e) { 88 | throw new RuntimeException(e.getMessage()); 89 | } 90 | } 91 | } 92 | } 93 | } 94 | 95 | public static String replaceFirstWordAfterTable(String statement, String newTableName) { 96 | if (statement == null || newTableName == null) { 97 | return statement; 98 | } 99 | Pattern pattern = Pattern.compile("(?<=table )\\w+"); 100 | Matcher matcher = pattern.matcher(statement); 101 | return matcher.replaceFirst(newTableName); 102 | } 103 | 104 | public static String getFirstWordAfterAlterTable(String alterStatement) { 105 | if (alterStatement == null) { 106 | return null; 107 | } 108 | String[] parts = alterStatement.split(" "); 109 | if (parts.length >= 3) { 110 | return parts[2]; 111 | } 112 | return null; 113 | } 114 | } 115 | 116 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/java/io/debezium/databend/testresources/BaseDbTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.databend.testresources; 10 | 11 | import com.databend.jdbc.DatabendResultSet; 12 | import org.apache.spark.sql.Dataset; 13 | import org.apache.spark.sql.Row; 14 | import org.apache.spark.sql.SparkSession; 15 | import org.eclipse.microprofile.config.inject.ConfigProperty; 16 | 17 | import java.sql.ResultSet; 18 | import java.sql.SQLException; 19 | import java.sql.Statement; 20 | 21 | /** 22 | * @author hantmac 23 | */ 24 | public class BaseDbTest { 25 | @ConfigProperty(name = "debezium.sink.databend.table-prefix", defaultValue = "debezium") 26 | String tablePrefix; 27 | public static SparkSession spark = SparkSession.builder().appName("unittest").master("local[2]").getOrCreate(); 28 | 29 | public static void PGCreateTestDataTable() throws Exception { 30 | String sql = "" + 31 | " CREATE TABLE IF NOT EXISTS inventory.test_data (\n" + 32 | " c_id INTEGER ,\n" + 33 | " c_text TEXT,\n" + 34 | " c_varchar VARCHAR" + 35 | " );"; 36 | SourcePostgresqlDB.runSQL(sql); 37 | } 38 | 39 | public static ResultSet select(String sql) 40 | throws SQLException, ClassNotFoundException { 41 | Statement statement = (new TargetDatabendDB()).createConnection().createStatement(); 42 | DatabendResultSet resultSet; 43 | try { 44 | resultSet = (DatabendResultSet) statement.executeQuery(sql); 45 | } catch (Throwable e) { 46 | try { 47 | statement.close(); 48 | } catch (Throwable closeException) { 49 | if (closeException != e) { 50 | e.addSuppressed(closeException); 51 | } 52 | } 53 | 54 | throw e; 55 | } 56 | return resultSet; 57 | } 58 | 59 | public static ResultSet getDatabendTableData(String sql) throws SQLException, ClassNotFoundException { 60 | return select(sql); 61 | } 62 | 63 | public static int getResultSetRowCount(ResultSet rs) throws SQLException { 64 | int num = 0; 65 | while (rs.next()) { 66 | num += 1; 67 | } 68 | return num; 69 | } 70 | 71 | 72 | public static int PGLoadTestDataTable(int numRows) { 73 | return PGLoadTestDataTable(numRows, false); 74 | } 75 | 76 | public static int PGLoadTestDataTable(int numRows, boolean addRandomDelay) { 77 | int numInsert = 0; 78 | do { 79 | 80 | new Thread(() -> { 81 | try { 82 | if (addRandomDelay) { 83 | Thread.sleep(TestUtil.randomInt(20000, 100000)); 84 | } 85 | String sql = "INSERT INTO inventory.test_data (c_id, c_text, c_varchar ) " + 86 | "VALUES "; 87 | StringBuilder values = new StringBuilder("\n(" + TestUtil.randomInt(15, 32) + ", '" + TestUtil.randomString(524) + "', '" + TestUtil.randomString(524) + "')"); 88 | for (int i = 0; i < 100; i++) { 89 | values.append("\n,(").append(TestUtil.randomInt(15, 32)).append(", '").append(TestUtil.randomString(524)).append("', '").append(TestUtil.randomString(524)).append("')"); 90 | } 91 | SourcePostgresqlDB.runSQL(sql + values); 92 | SourcePostgresqlDB.runSQL("COMMIT;"); 93 | } catch (Exception e) { 94 | Thread.currentThread().interrupt(); 95 | } 96 | }).start(); 97 | 98 | numInsert += 100; 99 | } while (numInsert <= numRows); 100 | return numInsert; 101 | } 102 | 103 | public static void mysqlCreateTestDataTable() throws Exception { 104 | String sql = "\n" + 105 | " CREATE TABLE IF NOT EXISTS inventory.test_data (\n" + 106 | " c_id INTEGER ,\n" + 107 | " c_text TEXT,\n" + 108 | " c_varchar TEXT\n" + 109 | " );"; 110 | SourceMysqlDB.runSQL(sql); 111 | } 112 | 113 | public static int mysqlLoadTestDataTable(int numRows) throws Exception { 114 | int numInsert = 0; 115 | do { 116 | String sql = "INSERT INTO inventory.test_data (c_id, c_text, c_varchar ) " + 117 | "VALUES "; 118 | StringBuilder values = new StringBuilder("\n(" + TestUtil.randomInt(15, 32) + ", '" + TestUtil.randomString(524) + "', '" + TestUtil.randomString(524) + "')"); 119 | for (int i = 0; i < 10; i++) { 120 | values.append("\n,(").append(TestUtil.randomInt(15, 32)).append(", '").append(TestUtil.randomString(524)).append("', '").append(TestUtil.randomString(524)).append("')"); 121 | } 122 | SourceMysqlDB.runSQL(sql + values); 123 | numInsert += 10; 124 | } while (numInsert <= numRows); 125 | return numInsert; 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/resources/json/serde-with-schema2.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "struct", 3 | "fields": [ 4 | { 5 | "type": "struct", 6 | "fields": [ 7 | { 8 | "type": "struct", 9 | "fields": [ 10 | { 11 | "type": "int32", 12 | "optional": false, 13 | "field": "id" 14 | }, 15 | { 16 | "type": "int32", 17 | "optional": false, 18 | "name": "io.debezium.time.Date", 19 | "version": 1, 20 | "field": "order_date" 21 | }, 22 | { 23 | "type": "int32", 24 | "optional": false, 25 | "field": "purchaser" 26 | }, 27 | { 28 | "type": "int32", 29 | "optional": false, 30 | "field": "quantity" 31 | }, 32 | { 33 | "type": "int32", 34 | "optional": false, 35 | "field": "product_id" 36 | } 37 | ], 38 | "optional": true, 39 | "name": "testc.inventory.orders.Value", 40 | "field": "before" 41 | }, 42 | { 43 | "type": "struct", 44 | "fields": [ 45 | { 46 | "type": "int32", 47 | "optional": false, 48 | "field": "id" 49 | }, 50 | { 51 | "type": "int32", 52 | "optional": false, 53 | "name": "io.debezium.time.Date", 54 | "version": 1, 55 | "field": "order_date" 56 | }, 57 | { 58 | "type": "int32", 59 | "optional": false, 60 | "field": "purchaser" 61 | }, 62 | { 63 | "type": "int32", 64 | "optional": false, 65 | "field": "quantity" 66 | }, 67 | { 68 | "type": "int32", 69 | "optional": false, 70 | "field": "product_id" 71 | } 72 | ], 73 | "optional": true, 74 | "name": "testc.inventory.orders.Value", 75 | "field": "after" 76 | }, 77 | { 78 | "type": "struct", 79 | "fields": [ 80 | { 81 | "type": "string", 82 | "optional": false, 83 | "field": "version" 84 | }, 85 | { 86 | "type": "string", 87 | "optional": false, 88 | "field": "connector" 89 | }, 90 | { 91 | "type": "string", 92 | "optional": false, 93 | "field": "name" 94 | }, 95 | { 96 | "type": "int64", 97 | "optional": false, 98 | "field": "ts_ms" 99 | }, 100 | { 101 | "type": "string", 102 | "optional": true, 103 | "name": "io.debezium.data.Enum", 104 | "version": 1, 105 | "parameters": { 106 | "allowed": "true,last,false" 107 | }, 108 | "default": "false", 109 | "field": "snapshot" 110 | }, 111 | { 112 | "type": "string", 113 | "optional": false, 114 | "field": "db" 115 | }, 116 | { 117 | "type": "string", 118 | "optional": false, 119 | "field": "schema" 120 | }, 121 | { 122 | "type": "string", 123 | "optional": false, 124 | "field": "table" 125 | }, 126 | { 127 | "type": "int64", 128 | "optional": true, 129 | "field": "txId" 130 | }, 131 | { 132 | "type": "int64", 133 | "optional": true, 134 | "field": "lsn" 135 | }, 136 | { 137 | "type": "int64", 138 | "optional": true, 139 | "field": "xmin" 140 | } 141 | ], 142 | "optional": false, 143 | "name": "io.debezium.connector.postgresql.Source", 144 | "field": "source" 145 | }, 146 | { 147 | "type": "string", 148 | "optional": false, 149 | "field": "op" 150 | }, 151 | { 152 | "type": "int64", 153 | "optional": true, 154 | "field": "ts_ms" 155 | }, 156 | { 157 | "type": "struct", 158 | "fields": [ 159 | { 160 | "type": "string", 161 | "optional": false, 162 | "field": "id" 163 | }, 164 | { 165 | "type": "int64", 166 | "optional": false, 167 | "field": "total_order" 168 | }, 169 | { 170 | "type": "int64", 171 | "optional": false, 172 | "field": "data_collection_order" 173 | } 174 | ], 175 | "optional": true, 176 | "field": "transaction" 177 | } 178 | ], 179 | "optional": false, 180 | "name": "testc.inventory.orders.Envelope", 181 | "field": "line" 182 | } 183 | ], 184 | "optional": false 185 | } -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/java/io/debezium/databend/testresources/TestChangeEvent.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.databend.testresources; 10 | 11 | import io.debezium.engine.ChangeEvent; 12 | import io.debezium.engine.RecordChangeEvent; 13 | import io.debezium.server.databend.DatabendChangeEvent; 14 | 15 | import java.time.Instant; 16 | 17 | /** 18 | * helper class used to generate test customer change events 19 | * 20 | * @author hantmac 21 | */ 22 | public class TestChangeEvent implements ChangeEvent, RecordChangeEvent { 23 | 24 | private final K key; 25 | private final V value; 26 | private final String destination; 27 | 28 | public TestChangeEvent(K key, V value, String destination) { 29 | this.key = key; 30 | this.value = value; 31 | this.destination = destination; 32 | } 33 | 34 | public TestChangeEvent(V value) { 35 | this(null, value, null); 36 | } 37 | 38 | public static TestChangeEvent of(String destination, Integer id, String operation, String name, 39 | Long epoch) { 40 | final DatabendChangeEvent t = new DatabendChangeEventBuilder() 41 | .destination(destination) 42 | .addKeyField("id", id) 43 | .addField("first_name", name) 44 | .addField("__op", operation) 45 | .addField("__source_ts_ms", epoch) 46 | .addField("__deleted", operation.equals("d")) 47 | .build(); 48 | final DatabendChangeEvent tk = new DatabendChangeEventBuilder() 49 | .destination(destination) 50 | .addKeyField("id", id) 51 | .build(); 52 | 53 | final String key = "{" + 54 | "\"schema\":" + tk.schema().keySchema() + "," + 55 | "\"payload\":" + tk.key() + 56 | "} "; 57 | final String val = "{" + 58 | "\"schema\":" + t.schema().valueSchema() + "," + 59 | "\"payload\":" + t.value() + 60 | "} "; 61 | return new TestChangeEvent<>(key, val, destination); 62 | } 63 | 64 | public static TestChangeEvent ofCompositeKey(String destination, Integer id, String operation, String name, 65 | Long epoch) { 66 | final DatabendChangeEvent t = new DatabendChangeEventBuilder() 67 | .destination(destination) 68 | .addKeyField("id", id) 69 | .addKeyField("first_name", name) 70 | .addField("__op", operation) 71 | .addField("__source_ts_ms", epoch) 72 | .addField("__deleted", operation.equals("d")) 73 | .build(); 74 | final DatabendChangeEvent tk = new DatabendChangeEventBuilder() 75 | .destination(destination) 76 | .addKeyField("id", id) 77 | .addKeyField("first_name", name) 78 | .build(); 79 | 80 | final String key = "{" + 81 | "\"schema\":" + tk.schema().keySchema() + "," + 82 | "\"payload\":" + tk.key() + 83 | "} "; 84 | final String val = "{" + 85 | "\"schema\":" + t.schema().valueSchema() + "," + 86 | "\"payload\":" + t.value() + 87 | "} "; 88 | 89 | return new TestChangeEvent<>(key, val, destination); 90 | } 91 | 92 | public static TestChangeEvent of(String destination, Integer id, String operation) { 93 | return of(destination, id, operation, TestUtil.randomString(12), Instant.now().toEpochMilli()); 94 | } 95 | 96 | public static TestChangeEvent of(String destination, Integer id, String operation, String name) { 97 | return of(destination, id, operation, name, Instant.now().toEpochMilli()); 98 | } 99 | 100 | public static TestChangeEvent of(String destination, Integer id, String operation, Long epoch) { 101 | return of(destination, id, operation, TestUtil.randomString(12), epoch); 102 | } 103 | 104 | public static TestChangeEvent ofNoKey(String destination, Integer id, String operation, String name, 105 | Long epoch) { 106 | final DatabendChangeEvent t = new DatabendChangeEventBuilder() 107 | .destination(destination) 108 | .addField("id", id) 109 | .addField("first_name", name) 110 | .addField("__op", operation) 111 | .addField("__source_ts_ms", epoch) 112 | .addField("__deleted", operation.equals("d")) 113 | .build(); 114 | 115 | final String val = "{" + 116 | "\"schema\":" + t.schema().valueSchema() + "," + 117 | "\"payload\":" + t.value() + 118 | "} "; 119 | return new TestChangeEvent<>(null, val, destination); 120 | } 121 | 122 | @Override 123 | public K key() { 124 | return key; 125 | } 126 | 127 | @Override 128 | public V value() { 129 | return value; 130 | } 131 | 132 | @Override 133 | public V record() { 134 | return value; 135 | } 136 | 137 | @Override 138 | public String destination() { 139 | return destination; 140 | } 141 | 142 | @Override 143 | public String toString() { 144 | return "EmbeddedEngineChangeEvent [key=" + key + ", value=" + value + ", sourceRecord=" + destination + "]"; 145 | } 146 | 147 | } 148 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/java/io/debezium/databend/tablewriter/RelationalTableTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.databend.tablewriter; 10 | 11 | import com.fasterxml.jackson.core.JsonProcessingException; 12 | import io.debezium.DebeziumException; 13 | 14 | import java.sql.*; 15 | import java.util.Arrays; 16 | import java.util.LinkedHashMap; 17 | 18 | import io.debezium.databend.testresources.TargetDatabendDB; 19 | import com.mongodb.assertions.Assertions; 20 | import io.debezium.server.databend.tablewriter.RelationalTable; 21 | 22 | import static io.debezium.server.databend.DatabendChangeConsumer.mapper; 23 | 24 | import org.junit.Assert; 25 | import org.junit.jupiter.api.AfterAll; 26 | import org.junit.jupiter.api.BeforeAll; 27 | import org.junit.jupiter.api.Test; 28 | 29 | class RelationalTableTest { 30 | public static Connection connection; 31 | 32 | @BeforeAll 33 | static void beforeAll() throws Exception { 34 | // CREATE TES TABLE USING JOOQ 35 | TargetDatabendDB targetDatabendDB = new TargetDatabendDB(); 36 | targetDatabendDB.start(); 37 | 38 | connection = targetDatabendDB.createConnection(); 39 | connection.createStatement().execute("CREATE DATABASE if not exists " + targetDatabendDB.DB_DATABASE); 40 | 41 | String createTableWithPkSql = "CREATE TABLE IF NOT EXISTS " + targetDatabendDB.DB_DATABASE + ".tbl_with_pk (id BIGINT,coll1 VARCHAR, coll2 INT, coll3 FLOAT, coll4 INT);"; 42 | connection.createStatement().execute(createTableWithPkSql); 43 | 44 | String createTableWithoutPkSql = "CREATE TABLE IF NOT EXISTS " + targetDatabendDB.DB_DATABASE + ".tbl_without_pk (id BIGINT,coll1 VARCHAR, coll2 INT, coll3 FLOAT, coll4 INT);"; 45 | connection.createStatement().execute(createTableWithoutPkSql); 46 | } 47 | 48 | @AfterAll 49 | static void tearDown() throws SQLException { 50 | Statement stmt = connection.createStatement(); 51 | stmt.execute("DROP DATABASE public"); 52 | } 53 | 54 | @Test 55 | void complexTypeBinding() throws SQLException { 56 | String withPK = "INSERT INTO \"public\".\"tbl_with_pk\" (\"id\", \"coll1\",\"coll2\",\"coll3\",\"coll4\") values (?, ?, ?, ?, ?)"; 57 | LinkedHashMap testhashmap = new LinkedHashMap<>(); 58 | testhashmap.put(100, "Amit"); 59 | 60 | try (PreparedStatement statement = connection.prepareStatement(withPK)) { 61 | (statement).setInt(1, 1); 62 | (statement).setString(2, mapper.writeValueAsString(testhashmap)); 63 | statement.setInt(3, 1); 64 | statement.setFloat(4, 1); 65 | statement.setInt(5, 1); 66 | statement.addBatch(); 67 | int[] ans = statement.executeBatch(); 68 | 69 | System.out.println("Rows inserted=" + Arrays.stream(ans).sum()); 70 | } catch (JsonProcessingException e) { 71 | throw new RuntimeException(e); 72 | } 73 | } 74 | 75 | 76 | @Test 77 | void experiment() throws SQLException { 78 | try (Statement statement = connection.createStatement()) { 79 | ResultSet resultSet = statement.executeQuery("SELECT * FROM public.tbl_with_pk"); 80 | 81 | // 处理结果集 82 | while (resultSet.next()) { 83 | // 读取每行数据的具体字段值 84 | int id = resultSet.getInt("id"); 85 | String coll1 = resultSet.getString("coll1"); 86 | 87 | System.out.println("ID: " + id + ", coll1: " + coll1); 88 | } 89 | 90 | resultSet.close(); 91 | } 92 | } 93 | 94 | 95 | @Test 96 | void hasPK() { 97 | RelationalTable tbl_without_pk = new RelationalTable("", "public", "tbl_without_pk", connection); 98 | RelationalTable tbl_with_pk = new RelationalTable("id", "public", "tbl_with_pk", connection); 99 | Assertions.assertTrue(tbl_with_pk.hasPK()); 100 | Assertions.assertFalse(tbl_without_pk.hasPK()); 101 | } 102 | 103 | @Test 104 | void preparedInsertStatement() { 105 | String withPK = "REPLACE INTO public.tbl_with_pk on(id)\n" + 106 | "VALUES (?, ?, ?, ?, ?)"; 107 | String withoutPK = "INSERT INTO \"public\".\"tbl_without_pk\" \n" + 108 | "VALUES (?, ?, ?, ?, ?)"; 109 | RelationalTable tbl_without_pk = new RelationalTable("", "public", "tbl_without_pk", connection); 110 | RelationalTable tbl_with_pk = new RelationalTable("id", "public", "tbl_with_pk", connection); 111 | System.out.println(tbl_with_pk.preparedUpsertStatement("")); 112 | Assert.assertEquals(withPK, tbl_with_pk.preparedUpsertStatement("")); 113 | Assert.assertEquals(withoutPK, tbl_without_pk.prepareInsertStatement("\"")); 114 | } 115 | 116 | @Test 117 | void preparedDeleteStatement() { 118 | String withPK = "DELETE FROM public.tbl_with_pk \n" + 119 | "WHERE id = :id"; 120 | RelationalTable tbl_without_pk = new RelationalTable("", "public", "tbl_without_pk", connection); 121 | RelationalTable tbl_with_pk = new RelationalTable("id", "public", "tbl_with_pk", connection); 122 | System.out.println(tbl_with_pk.preparedDeleteStatement("", ":id")); 123 | Assert.assertEquals(withPK, tbl_with_pk.preparedDeleteStatement("", ":id")); 124 | Assert.assertThrows(DebeziumException.class, () -> tbl_without_pk.preparedDeleteStatement("", ":id")); 125 | } 126 | } -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/java/io/debezium/databend/testresources/DatabendChangeEventBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.databend.testresources; 10 | 11 | import io.debezium.databend.DatabendChangeConsumerTest; 12 | import io.debezium.server.databend.DatabendChangeEvent; 13 | 14 | import java.util.Iterator; 15 | import java.util.Map; 16 | 17 | import com.fasterxml.jackson.databind.JsonNode; 18 | import com.fasterxml.jackson.databind.node.ArrayNode; 19 | import com.fasterxml.jackson.databind.node.JsonNodeFactory; 20 | import com.fasterxml.jackson.databind.node.ObjectNode; 21 | import org.slf4j.Logger; 22 | import org.slf4j.LoggerFactory; 23 | 24 | /** 25 | * helper class used to generate test change events 26 | * 27 | * @author hantmac 28 | */ 29 | public class DatabendChangeEventBuilder { 30 | 31 | protected static final Logger LOGGER = LoggerFactory.getLogger(DatabendChangeConsumerTest.class); 32 | ObjectNode payload = JsonNodeFactory.instance.objectNode(); 33 | ObjectNode keyPayload = JsonNodeFactory.instance.objectNode(); 34 | String destination = "test"; 35 | 36 | public DatabendChangeEventBuilder() { 37 | } 38 | 39 | public DatabendChangeEventBuilder destination(String destination) { 40 | this.destination = destination; 41 | return this; 42 | } 43 | 44 | public DatabendChangeEventBuilder addField(String parentFieldName, String name, String val) { 45 | ObjectNode nestedField = JsonNodeFactory.instance.objectNode(); 46 | nestedField.put(name, val); 47 | this.payload.set(parentFieldName, nestedField); 48 | return this; 49 | } 50 | 51 | public DatabendChangeEventBuilder addField(String parentFieldName, String name, int val) { 52 | ObjectNode nestedField = JsonNodeFactory.instance.objectNode(); 53 | nestedField.put(name, val); 54 | this.payload.set(parentFieldName, nestedField); 55 | return this; 56 | } 57 | 58 | public DatabendChangeEventBuilder addField(String parentFieldName, String name, boolean val) { 59 | 60 | ObjectNode nestedField = JsonNodeFactory.instance.objectNode(); 61 | if (this.payload.has(parentFieldName)) { 62 | nestedField = (ObjectNode) this.payload.get(parentFieldName); 63 | } 64 | nestedField.put(name, val); 65 | this.payload.set(parentFieldName, nestedField); 66 | return this; 67 | } 68 | 69 | public DatabendChangeEventBuilder addField(String name, int val) { 70 | payload.put(name, val); 71 | return this; 72 | } 73 | 74 | public DatabendChangeEventBuilder addField(String name, String val) { 75 | payload.put(name, val); 76 | return this; 77 | } 78 | 79 | public DatabendChangeEventBuilder addField(String name, long val) { 80 | payload.put(name, val); 81 | return this; 82 | } 83 | 84 | public DatabendChangeEventBuilder addField(String name, double val) { 85 | payload.put(name, val); 86 | return this; 87 | } 88 | 89 | public DatabendChangeEventBuilder addField(String name, boolean val) { 90 | payload.put(name, val); 91 | return this; 92 | } 93 | 94 | public DatabendChangeEventBuilder addKeyField(String name, int val) { 95 | keyPayload.put(name, val); 96 | payload.put(name, val); 97 | return this; 98 | } 99 | 100 | public DatabendChangeEventBuilder addKeyField(String name, String val) { 101 | keyPayload.put(name, val); 102 | payload.put(name, val); 103 | return this; 104 | } 105 | 106 | public DatabendChangeEvent build() { 107 | return new DatabendChangeEvent( 108 | this.destination, 109 | payload, 110 | keyPayload, 111 | this.valueSchema(), 112 | this.keySchema() 113 | ); 114 | } 115 | 116 | private ObjectNode valueSchema() { 117 | return getSchema(payload); 118 | } 119 | 120 | private ObjectNode keySchema() { 121 | return getSchema(keyPayload); 122 | } 123 | 124 | private ObjectNode getSchema(ObjectNode node) { 125 | ObjectNode schema = JsonNodeFactory.instance.objectNode(); 126 | 127 | ArrayNode fs = getSchemaFields(node); 128 | if (fs.isEmpty()) { 129 | return null; 130 | } else { 131 | schema.put("type", "struct"); 132 | schema.set("fields", fs); 133 | return schema; 134 | } 135 | } 136 | 137 | private ArrayNode getSchemaFields(ObjectNode node) { 138 | ArrayNode fields = JsonNodeFactory.instance.arrayNode(); 139 | Iterator> iter = node.fields(); 140 | while (iter.hasNext()) { 141 | Map.Entry field = iter.next(); 142 | 143 | ObjectNode schemaField = JsonNodeFactory.instance.objectNode(); 144 | if (field.getValue().isContainerNode()) { 145 | schemaField.put("type", "struct"); 146 | schemaField.set("fields", getSchemaFields((ObjectNode) field.getValue())); 147 | } else if (field.getValue().isInt()) { 148 | schemaField.put("type", "int32"); 149 | } else if (field.getValue().isLong()) { 150 | schemaField.put("type", "int64"); 151 | } else if (field.getValue().isBoolean()) { 152 | schemaField.put("type", "boolean"); 153 | } else if (field.getValue().isTextual()) { 154 | schemaField.put("type", "string"); 155 | } else if (field.getValue().isFloat()) { 156 | schemaField.put("type", "float64"); 157 | } 158 | if (keyPayload.has(field.getKey())) { 159 | schemaField.put("optional", false); 160 | } else { 161 | schemaField.put("optional", true); 162 | } 163 | schemaField.put("field", field.getKey()); 164 | fields.add(schemaField); 165 | } 166 | 167 | return fields; 168 | } 169 | 170 | } -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/RelationalTable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.databend.tablewriter; 10 | 11 | import com.databend.client.data.DatabendRawType; 12 | import io.debezium.DebeziumException; 13 | 14 | import java.sql.*; 15 | import java.util.HashMap; 16 | import java.util.Map; 17 | import java.util.Set; 18 | import java.util.stream.Collectors; 19 | 20 | import org.slf4j.Logger; 21 | import org.slf4j.LoggerFactory; 22 | 23 | public class RelationalTable { 24 | protected static final Logger LOGGER = LoggerFactory.getLogger(RelationalTable.class); 25 | 26 | public final String tableName; 27 | public final String databaseName; 28 | public final Map columns = new HashMap<>(); 29 | public final Map primaryKeysMap = new HashMap<>(); 30 | public final String primaryKey; 31 | 32 | 33 | public RelationalTable(String primaryKey, String databaseName, String tableName, Connection conn) throws DebeziumException { 34 | this.databaseName = databaseName; 35 | this.tableName = tableName; 36 | this.primaryKey = primaryKey; 37 | 38 | try { 39 | DatabaseMetaData meta = conn.getMetaData(); 40 | try (ResultSet tables = meta.getColumns(null, this.databaseName, this.tableName, null)) { 41 | 42 | int numTablesFound = 0; 43 | if (tables != null && tables.next()) { 44 | numTablesFound++; 45 | // String catalog = tables.getString("TABLE_CAT"); 46 | String schema = tables.getString("TABLE_SCHEM"); 47 | String table = tables.getString("TABLE_NAME"); 48 | 49 | // get table Columns 50 | try (ResultSet tColumns = meta.getColumns(null, schema, tableName, null)) { 51 | while (tColumns.next()) { 52 | String columnName = tColumns.getString("COLUMN_NAME"); 53 | DatabendRawType databendRawType = new DatabendRawType(columnName); 54 | columns.put(columnName, databendRawType); 55 | } 56 | } 57 | 58 | // get table PK 59 | if (!primaryKey.isEmpty()) { 60 | primaryKeysMap.put(primaryKey, 1); 61 | } 62 | LOGGER.warn("Loaded Databend table {}.{} \nColumns:{} \nPK:{}", schema, table, columns, primaryKeysMap); 63 | } 64 | 65 | if (numTablesFound == 0) { 66 | throw new TableNotFoundException(String.format("RelationalTable %s.%s not found", databaseName, tableName)); 67 | } 68 | } 69 | 70 | } catch (SQLException e) { 71 | throw new DebeziumException("Failed to read table from database", e); 72 | } 73 | } 74 | 75 | public boolean hasPK() { 76 | return !primaryKeysMap.isEmpty(); 77 | } 78 | 79 | public String tableId() { 80 | return String.format("%s.%s", databaseName, tableName); 81 | } 82 | 83 | public String preparedUpsertStatement(String identifierQuoteCharacter) { 84 | StringBuilder sql = new StringBuilder(); 85 | sql.append(String.format("REPLACE INTO %s%s%s.%s%s%s on(%s%s%s)\n", identifierQuoteCharacter, databaseName, identifierQuoteCharacter, identifierQuoteCharacter, tableName, identifierQuoteCharacter, identifierQuoteCharacter, primaryKey, identifierQuoteCharacter)); 86 | Set fields = this.columns.keySet(); 87 | // sql.append(String.format("(%s) \n", fields.stream().map(f -> String.format("%s%s%s ", identifierQuoteCharacter, f, identifierQuoteCharacter)).collect(Collectors.joining(", ")))); 88 | 89 | sql.append(String.format("VALUES (%s)\n", fields.stream().map(f -> "?").collect(Collectors.joining(", ")))); 90 | 91 | return sql.toString().trim(); 92 | } 93 | 94 | public String prepareInsertStatement(String identifierQuoteCharacter) { 95 | StringBuilder sql = new StringBuilder(); 96 | sql.append(String.format("INSERT INTO %s%s%s.%s%s%s \n", identifierQuoteCharacter, databaseName, identifierQuoteCharacter, identifierQuoteCharacter, tableName, identifierQuoteCharacter)); 97 | Set fields = this.columns.keySet(); 98 | // sql.append(String.format("(%s) \n", fields.stream().map(f -> String.format("%s%s%s ", identifierQuoteCharacter, f, identifierQuoteCharacter)).collect(Collectors.joining(", ")))); 99 | 100 | sql.append(String.format("VALUES (%s)\n", fields.stream().map(f -> "?").collect(Collectors.joining(", ")))); 101 | 102 | return sql.toString().trim(); 103 | } 104 | 105 | public String preparedDeleteStatement(String identifierQuoteCharacter, String deleteVal) { 106 | 107 | if (!hasPK()) { 108 | throw new DebeziumException("Cant delete from a table without primary key!"); 109 | } 110 | 111 | StringBuilder sql = new StringBuilder(); 112 | sql.append(String.format("DELETE FROM %s%s%s.%s%s%s \nWHERE ", identifierQuoteCharacter, databaseName, identifierQuoteCharacter, identifierQuoteCharacter, tableName, identifierQuoteCharacter)); 113 | 114 | Set fields = this.primaryKeysMap.keySet(); 115 | 116 | sql.append(String.format("%s \n", fields.stream().map(f -> String.format("%s%s%s = %s ", identifierQuoteCharacter, f, identifierQuoteCharacter, deleteVal)).collect(Collectors.joining("\n AND ")))); 117 | 118 | return sql.toString().trim(); 119 | } 120 | 121 | } -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/UpsertTableWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.databend.tablewriter; 10 | 11 | import io.debezium.server.databend.DatabendChangeEvent; 12 | 13 | import java.math.BigDecimal; 14 | import java.math.BigInteger; 15 | import java.sql.Connection; 16 | import java.sql.*; 17 | import java.sql.SQLException; 18 | import java.util.*; 19 | import java.util.concurrent.ConcurrentHashMap; 20 | 21 | import com.fasterxml.jackson.databind.JsonNode; 22 | import com.google.common.collect.ImmutableMap; 23 | import io.debezium.server.databend.DatabendUtil; 24 | import org.slf4j.Logger; 25 | import org.slf4j.LoggerFactory; 26 | 27 | import static io.debezium.server.databend.DatabendUtil.addParametersToStatement; 28 | 29 | 30 | public class UpsertTableWriter extends BaseTableWriter { 31 | static final ImmutableMap cdcOperations = ImmutableMap.of("c", 1, "r", 2, "u", 3, "d", 4); 32 | private final AppendTableWriter appendTableWriter; 33 | final String sourceTsMsColumn = "__source_ts_ms"; 34 | final String opColumn = "__op"; 35 | final String deleteColumn = "__delete"; 36 | final boolean upsertKeepDeletes; 37 | protected static final Logger LOGGER = LoggerFactory.getLogger(UpsertTableWriter.class); 38 | 39 | public UpsertTableWriter(Connection connection, String identifierQuoteCharacter, boolean upsertKeepDeletes, boolean isSchemaEvolutionEnabled) { 40 | super(connection, identifierQuoteCharacter, isSchemaEvolutionEnabled); 41 | this.upsertKeepDeletes = upsertKeepDeletes; 42 | appendTableWriter = new AppendTableWriter(connection, identifierQuoteCharacter, isSchemaEvolutionEnabled); 43 | } 44 | 45 | @Override 46 | public void addToTable(final RelationalTable table, final List events) { 47 | if (table.hasPK()) { 48 | this.deleteUpsert(table, deduplicateBatch(events)); 49 | } else { 50 | // log message 51 | appendTableWriter.addToTable(table, events); 52 | } 53 | } 54 | 55 | public void deleteUpsert(final RelationalTable table, final List events) { 56 | final String upsertSql = table.preparedUpsertStatement(this.identifierQuoteCharacter); 57 | int inserts = 0; 58 | List deleteEvents = new ArrayList<>(); 59 | List schemaEvolutionEvents = new ArrayList<>(); 60 | 61 | try (PreparedStatement statement = connection.prepareStatement(upsertSql)) { 62 | connection.setAutoCommit(false); 63 | 64 | for (DatabendChangeEvent event : events) { 65 | if (event.valueAsMap() == null) { 66 | deleteEvents.add(event); 67 | } else if (upsertKeepDeletes || !event.operation().equals("d")) { 68 | // NOTE: if upsertKeepDeletes = true, delete event data will insert into target table 69 | addParametersToStatement(statement, event); 70 | statement.addBatch(); 71 | } else if (event.operation().equals("d")) { 72 | // here use soft delete 73 | // if true delete, we can use this condition event.keyAsMap().containsKey(deleteColumn) 74 | deleteEvents.add(event); 75 | } else if (DatabendUtil.isSchemaChanged(event.schema()) && isSchemaEvolutionEnabled) { 76 | schemaEvolutionEvents.add(event); 77 | } 78 | } 79 | 80 | // Each batch needs to have the same schemas, so get the buffered records out 81 | int[] batchResult = statement.executeBatch(); 82 | inserts = Arrays.stream(batchResult).sum(); 83 | 84 | } catch (SQLException e) { 85 | e.printStackTrace(); 86 | throw new RuntimeException(e.getMessage()); 87 | } 88 | 89 | // handle delete event 90 | try { 91 | deleteFromTable(table, deleteEvents); 92 | } catch (Exception e) { 93 | throw new RuntimeException(e.getMessage()); 94 | } 95 | 96 | //handle schema changed events 97 | try { 98 | schemaEvolution(table, schemaEvolutionEvents); 99 | } catch (Exception e) { 100 | throw new RuntimeException(e.getMessage()); 101 | } 102 | } 103 | 104 | public void deleteFromTable(final RelationalTable table, final List events) throws Exception { 105 | for (DatabendChangeEvent event : events) { 106 | Map values = event.keyAsMap(); 107 | String deleteSql = table.preparedDeleteStatement(this.identifierQuoteCharacter, getPrimaryKeyValue(table.primaryKey, values)); 108 | try (PreparedStatement statement = connection.prepareStatement(deleteSql)) { 109 | System.out.println(deleteSql); 110 | statement.execute(deleteSql); 111 | } catch (SQLException e) { 112 | throw new RuntimeException(e.getMessage()); 113 | } 114 | } 115 | } 116 | 117 | private String getPrimaryKeyValue(String primaryKey, Map parameters) throws Exception { 118 | String primaryValue = ""; 119 | for (Map.Entry entry : parameters.entrySet()) { 120 | if (Objects.equals(primaryKey, entry.getKey())) { 121 | primaryValue = String.valueOf(entry.getValue()); 122 | return primaryValue; 123 | } 124 | } 125 | if (primaryValue.equals("")) { 126 | throw new Exception("No primary key set"); 127 | } 128 | 129 | return primaryValue; 130 | } 131 | 132 | private List deduplicateBatch(List events) { 133 | ConcurrentHashMap deduplicatedEvents = new ConcurrentHashMap<>(); 134 | events.stream() 135 | .filter(Objects::nonNull) // filter null object 136 | .forEach(e -> { 137 | deduplicatedEvents.merge(e.key(), e, (oldValue, newValue) -> { 138 | if (oldValue != null && newValue != null && compareByTsThenOp(oldValue.value(), newValue.value()) <= 0) { 139 | return newValue; 140 | } else { 141 | return oldValue; 142 | } 143 | }); 144 | }); 145 | return new ArrayList<>(deduplicatedEvents.values()); 146 | } 147 | 148 | private int compareByTsThenOp(JsonNode lhs, JsonNode rhs) { 149 | if (lhs == null || rhs == null) { 150 | return 0; 151 | } 152 | if (lhs.get(sourceTsMsColumn) == null || rhs.get(sourceTsMsColumn) == null) { 153 | return 0; 154 | } 155 | int result = Long.compare(lhs.get(sourceTsMsColumn).asLong(0), rhs.get(sourceTsMsColumn).asLong(0)); 156 | 157 | if (result == 0) { 158 | // return (x < y) ? -1 : ((x == y) ? 0 : 1); 159 | result = cdcOperations.getOrDefault(lhs.get(opColumn).asText("c"), -1) 160 | .compareTo( 161 | cdcOperations.getOrDefault(rhs.get(opColumn).asText("c"), -1) 162 | ); 163 | } 164 | 165 | return result; 166 | } 167 | 168 | } 169 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/main/java/io/debezium/server/databend/DatabendChangeEvent.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.databend; 10 | 11 | import java.util.HashMap; 12 | import java.util.Map; 13 | import java.util.Objects; 14 | 15 | import com.databend.client.data.DatabendRawType; 16 | import com.databend.jdbc.DatabendColumnInfo; 17 | import com.fasterxml.jackson.core.type.TypeReference; 18 | import com.fasterxml.jackson.databind.JsonNode; 19 | import org.slf4j.Logger; 20 | import org.slf4j.LoggerFactory; 21 | 22 | /** 23 | * @author hantmac 24 | */ 25 | public class DatabendChangeEvent { 26 | 27 | protected static final Logger LOGGER = LoggerFactory.getLogger(DatabendChangeEvent.class); 28 | protected final String destination; 29 | protected final JsonNode value; 30 | protected final JsonNode key; 31 | public final Schema schema; 32 | 33 | public DatabendChangeEvent(String destination, JsonNode value, JsonNode key, JsonNode valueSchema, JsonNode keySchema) { 34 | this.destination = destination; 35 | this.value = value; 36 | this.key = key; 37 | this.schema = new Schema(valueSchema, keySchema); 38 | } 39 | 40 | public JsonNode key() { 41 | return key; 42 | } 43 | 44 | public JsonNode value() { 45 | return value; 46 | } 47 | 48 | public Map valueAsMap() { 49 | return DatabendChangeConsumer.mapper.convertValue(value(), new TypeReference<>() { 50 | }); 51 | } 52 | 53 | public Map keyAsMap() { 54 | return DatabendChangeConsumer.mapper.convertValue(key(), new TypeReference<>() { 55 | }); 56 | } 57 | 58 | public String operation() { 59 | if (value == null || value.get("__op") == null) { 60 | return "c"; 61 | } 62 | return value().get("__op").textValue(); 63 | } 64 | 65 | public Schema schema() { 66 | return schema; 67 | } 68 | 69 | public String destination() { 70 | return destination; 71 | } 72 | 73 | 74 | public static class Schema { 75 | private final JsonNode valueSchema; 76 | private final JsonNode keySchema; 77 | 78 | public Schema(JsonNode valueSchema, JsonNode keySchema) { 79 | this.valueSchema = valueSchema; 80 | this.keySchema = keySchema; 81 | } 82 | 83 | public JsonNode valueSchema() { 84 | return valueSchema; 85 | } 86 | 87 | public JsonNode keySchema() { 88 | return keySchema; 89 | } 90 | 91 | public Map valueSchemaFields() { 92 | if (valueSchema != null && valueSchema.has("fields") && valueSchema.get("fields").isArray()) { 93 | LOGGER.debug(valueSchema.toString()); 94 | return fields(valueSchema, "", 0); 95 | } 96 | LOGGER.trace("Event schema not found!"); 97 | return new HashMap<>(); 98 | } 99 | 100 | public Map keySchemaFields() { 101 | if (keySchema != null && keySchema.has("fields") && keySchema.get("fields").isArray()) { 102 | LOGGER.debug(keySchema.toString()); 103 | return fields(keySchema, "", 0); 104 | } 105 | LOGGER.trace("Key schema not found!"); 106 | return new HashMap<>(); 107 | } 108 | 109 | private Map fields(JsonNode eventSchema, String schemaName, int columnId) { 110 | Map fields = new HashMap<>(); 111 | String schemaType = eventSchema.get("type").textValue(); 112 | LOGGER.debug("Converting Schema of: {}::{}", schemaName, schemaType); 113 | for (JsonNode jsonSchemaFieldNode : eventSchema.get("fields")) { 114 | columnId++; 115 | String fieldName = jsonSchemaFieldNode.get("field").textValue(); 116 | String fieldType = jsonSchemaFieldNode.get("type").textValue(); 117 | LOGGER.debug("Processing Field: [{}] {}.{}::{}", columnId, schemaName, fieldName, fieldType); 118 | DatabendRawType databendStrType = new DatabendRawType(DatabendTypes.STRING); 119 | switch (fieldType) { 120 | case "array": 121 | JsonNode items = jsonSchemaFieldNode.get("items"); 122 | if (items != null && items.has("type")) { 123 | fields.put(fieldName, DatabendColumnInfo.newBuilder(DatabendTypes.STRING, databendStrType).build()); 124 | } else { 125 | throw new RuntimeException("Unexpected Array type for field " + fieldName); 126 | } 127 | break; 128 | case "map": 129 | case "struct": 130 | // create it as struct, nested type 131 | fields.put(fieldName, DatabendColumnInfo.newBuilder("String", databendStrType).build()); 132 | break; 133 | default: //primitive types 134 | fields.put(fieldName, fieldType(fieldType)); 135 | break; 136 | } 137 | } 138 | 139 | return fields; 140 | } 141 | 142 | private DatabendColumnInfo fieldType(String fieldType) { 143 | switch (fieldType) { 144 | case "int8": 145 | return DatabendColumnInfo.of(DatabendTypes.INT8, new DatabendRawType(DatabendTypes.INT8)); 146 | case "int16": 147 | return DatabendColumnInfo.of(DatabendTypes.INT16, new DatabendRawType(DatabendTypes.INT16)); 148 | case "int32": // int 4 bytes 149 | return DatabendColumnInfo.of(DatabendTypes.INT32, new DatabendRawType(DatabendTypes.INT32)); 150 | case "int64": // long 8 bytes 151 | return DatabendColumnInfo.of(DatabendTypes.INT64, new DatabendRawType(DatabendTypes.INT64)); 152 | case "float32": // float is represented in 32 bits, 153 | return DatabendColumnInfo.of(DatabendTypes.FLOAT32, new DatabendRawType(DatabendTypes.FLOAT32)); 154 | case "float64": // double is represented in 64 bits 155 | return DatabendColumnInfo.of(DatabendTypes.FLOAT64, new DatabendRawType(DatabendTypes.FLOAT64)); 156 | case "boolean": 157 | return DatabendColumnInfo.of(DatabendTypes.BOOLEAN, new DatabendRawType(DatabendTypes.BOOLEAN)); 158 | case "string": 159 | return DatabendColumnInfo.of(DatabendTypes.STRING, new DatabendRawType(DatabendTypes.STRING)); 160 | default: 161 | // default to String type 162 | return DatabendColumnInfo.of(DatabendTypes.STRING, new DatabendRawType(DatabendTypes.STRING)); 163 | //throw new RuntimeException("'" + fieldName + "' has "+fieldType+" type, "+fieldType+" not supported!"); 164 | } 165 | } 166 | 167 | @Override 168 | public boolean equals(Object o) { 169 | if (this == o) return true; 170 | if (o == null || getClass() != o.getClass()) return false; 171 | Schema that = (Schema) o; 172 | return Objects.equals(valueSchema, that.valueSchema) && Objects.equals(keySchema, that.keySchema); 173 | } 174 | 175 | @Override 176 | public int hashCode() { 177 | return Objects.hash(valueSchema, keySchema); 178 | } 179 | } 180 | 181 | } 182 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 10 | 11 | 13 | 4.0.0 14 | io.debezium 15 | debezium-server-databend 16 | Debezium Server Parent 17 | 18 | 19 | io.debezium 20 | debezium-api 21 | 1.9.7.Final 22 | compile 23 | 24 | 25 | io.debezium 26 | debezium-core 27 | 1.9.7.Final 28 | compile 29 | 30 | 31 | jakarta.enterprise 32 | jakarta.enterprise.cdi-api 33 | 34 | 35 | org.eclipse.microprofile.config 36 | microprofile-config-api 37 | 38 | 39 | com.databend 40 | databend-jdbc 41 | 0.1.3 42 | 43 | 44 | org.jdbi 45 | jdbi3-core 46 | 47 | 48 | io.debezium 49 | debezium-server-core 50 | 1.9.7.Final 51 | compile 52 | 53 | 54 | org.apache.commons 55 | commons-dbcp2 56 | 2.9.0 57 | compile 58 | 59 | 60 | org.apache.commons 61 | commons-dbcp2 62 | 2.9.0 63 | compile 64 | 65 | 66 | org.jooq 67 | jooq 68 | 3.16.0 69 | compile 70 | 71 | 72 | org.jooq 73 | jooq 74 | 3.16.0 75 | compile 76 | 77 | 78 | io.quarkus 79 | quarkus-test-devtools 80 | 1.13.7.Final 81 | 82 | 83 | ${revision} 84 | pom 85 | 86 | 87 | 0.1.0-SNAPSHOT 88 | 89 | 90 | UTF-8 91 | 11 92 | 11 93 | 8 94 | true 95 | 96 | 3.0.15 97 | 3.4.2 98 | 1.17.6 99 | 100 | 1.9.7.Final 101 | 8.0.30 102 | 103 | 2.16.2.Final 104 | 105 | 106 | 4.8 107 | 108 | 109 | 110 | 111 | 112 | org.antlr 113 | antlr4-runtime 114 | ${version.antlr} 115 | 116 | 117 | com.databend 118 | databend-jdbc 119 | 0.1.3 120 | 121 | 122 | io.quarkus 123 | quarkus-bom 124 | ${version.quarkus} 125 | pom 126 | import 127 | 128 | 129 | org.jdbi 130 | jdbi3-bom 131 | pom 132 | 3.38.0 133 | import 134 | 135 | 136 | 137 | mysql 138 | mysql-connector-java 139 | ${version.mysql.driver} 140 | 141 | 142 | 143 | io.debezium 144 | debezium-server 145 | ${version.debezium} 146 | pom 147 | import 148 | 149 | 150 | io.debezium 151 | debezium-server-batch 152 | ${project.version} 153 | 154 | 155 | 156 | org.codehaus.groovy 157 | groovy 158 | ${version.groovy} 159 | 160 | 161 | org.codehaus.groovy 162 | groovy-json 163 | ${version.groovy} 164 | 165 | 166 | org.codehaus.groovy 167 | groovy-jsr223 168 | ${version.groovy} 169 | 170 | 171 | 172 | com.squareup.okhttp3 173 | okhttp 174 | 4.10.0 175 | test 176 | 177 | 178 | org.mockito 179 | mockito-core 180 | 3.12.4 181 | test 182 | 183 | 184 | 185 | 186 | 187 | debezium-server-databend-sink 188 | debezium-server-databend-dist 189 | 190 | 191 | 192 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/resources/json/serde-with-array2.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": { 3 | "type": "struct", 4 | "fields": [ 5 | { 6 | "type": "struct", 7 | "fields": [ 8 | { 9 | "type": "string", 10 | "optional": false, 11 | "field": "version" 12 | }, 13 | { 14 | "type": "string", 15 | "optional": false, 16 | "field": "connector" 17 | }, 18 | { 19 | "type": "string", 20 | "optional": false, 21 | "field": "name" 22 | }, 23 | { 24 | "type": "int64", 25 | "optional": false, 26 | "field": "ts_ms" 27 | }, 28 | { 29 | "type": "string", 30 | "optional": true, 31 | "name": "io.debezium.data.Enum", 32 | "version": 1, 33 | "parameters": { 34 | "allowed": "true,last,false" 35 | }, 36 | "default": "false", 37 | "field": "snapshot" 38 | }, 39 | { 40 | "type": "string", 41 | "optional": false, 42 | "field": "db" 43 | }, 44 | { 45 | "type": "string", 46 | "optional": true, 47 | "field": "sequence" 48 | }, 49 | { 50 | "type": "string", 51 | "optional": true, 52 | "field": "table" 53 | }, 54 | { 55 | "type": "int64", 56 | "optional": false, 57 | "field": "server_id" 58 | }, 59 | { 60 | "type": "string", 61 | "optional": true, 62 | "field": "gtid" 63 | }, 64 | { 65 | "type": "string", 66 | "optional": false, 67 | "field": "file" 68 | }, 69 | { 70 | "type": "int64", 71 | "optional": false, 72 | "field": "pos" 73 | }, 74 | { 75 | "type": "int32", 76 | "optional": false, 77 | "field": "row" 78 | }, 79 | { 80 | "type": "int64", 81 | "optional": true, 82 | "field": "thread" 83 | }, 84 | { 85 | "type": "string", 86 | "optional": true, 87 | "field": "query" 88 | } 89 | ], 90 | "optional": false, 91 | "name": "io.debezium.connector.mysql.Source", 92 | "field": "source" 93 | }, 94 | { 95 | "type": "string", 96 | "optional": true, 97 | "field": "databaseName" 98 | }, 99 | { 100 | "type": "string", 101 | "optional": true, 102 | "field": "schemaName" 103 | }, 104 | { 105 | "type": "string", 106 | "optional": true, 107 | "field": "ddl" 108 | }, 109 | { 110 | "type": "array", 111 | "items": { 112 | "type": "struct", 113 | "fields": [ 114 | { 115 | "type": "string", 116 | "optional": false, 117 | "field": "type" 118 | }, 119 | { 120 | "type": "string", 121 | "optional": false, 122 | "field": "id" 123 | }, 124 | { 125 | "type": "struct", 126 | "fields": [ 127 | { 128 | "type": "string", 129 | "optional": true, 130 | "field": "defaultCharsetName" 131 | }, 132 | { 133 | "type": "array", 134 | "items": { 135 | "type": "string", 136 | "optional": false 137 | }, 138 | "optional": true, 139 | "field": "primaryKeyColumnNames" 140 | }, 141 | { 142 | "type": "array", 143 | "items": { 144 | "type": "struct", 145 | "fields": [ 146 | { 147 | "type": "string", 148 | "optional": false, 149 | "field": "name" 150 | }, 151 | { 152 | "type": "int32", 153 | "optional": false, 154 | "field": "jdbcType" 155 | }, 156 | { 157 | "type": "int32", 158 | "optional": true, 159 | "field": "nativeType" 160 | }, 161 | { 162 | "type": "string", 163 | "optional": false, 164 | "field": "typeName" 165 | }, 166 | { 167 | "type": "string", 168 | "optional": true, 169 | "field": "typeExpression" 170 | }, 171 | { 172 | "type": "string", 173 | "optional": true, 174 | "field": "charsetName" 175 | }, 176 | { 177 | "type": "int32", 178 | "optional": true, 179 | "field": "length" 180 | }, 181 | { 182 | "type": "int32", 183 | "optional": true, 184 | "field": "scale" 185 | }, 186 | { 187 | "type": "int32", 188 | "optional": false, 189 | "field": "position" 190 | }, 191 | { 192 | "type": "boolean", 193 | "optional": true, 194 | "field": "optional" 195 | }, 196 | { 197 | "type": "boolean", 198 | "optional": true, 199 | "field": "autoIncremented" 200 | }, 201 | { 202 | "type": "boolean", 203 | "optional": true, 204 | "field": "generated" 205 | } 206 | ], 207 | "optional": false, 208 | "name": "io.debezium.connector.schema.Column" 209 | }, 210 | "optional": false, 211 | "field": "columns" 212 | } 213 | ], 214 | "optional": false, 215 | "name": "io.debezium.connector.schema.Table", 216 | "field": "table" 217 | } 218 | ], 219 | "optional": false, 220 | "name": "io.debezium.connector.schema.Change" 221 | }, 222 | "optional": false, 223 | "field": "tableChanges" 224 | } 225 | ], 226 | "optional": false, 227 | "name": "io.debezium.connector.mysql.SchemaChangeValue" 228 | }, 229 | "payload": { 230 | "source": { 231 | "version": "1.7.0.Final", 232 | "connector": "mysql", 233 | "name": "testc", 234 | "ts_ms": 1638187055631, 235 | "snapshot": "true", 236 | "db": "inventory", 237 | "sequence": null, 238 | "table": "geom", 239 | "server_id": 0, 240 | "gtid": null, 241 | "file": "mysql-bin.000003", 242 | "pos": 154, 243 | "row": 0, 244 | "thread": null, 245 | "query": null 246 | }, 247 | "databaseName": "inventory", 248 | "schemaName": null, 249 | "ddl": "CREATE TABLE `geom` (\n `id` int(11) NOT NULL AUTO_INCREMENT,\n `g` geometry NOT NULL,\n `h` geometry DEFAULT NULL,\n PRIMARY KEY (`id`)\n) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1", 250 | "tableChanges": [] 251 | } 252 | } 253 | -------------------------------------------------------------------------------- /debezium-server-databend-dist/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | 12 | 13 | io.debezium 14 | debezium-server-databend 15 | ${revision} 16 | ../pom.xml 17 | 18 | 4.0.0 19 | debezium-server-databend-dist 20 | Debezium Server Distribution 21 | jar 22 | 23 | 24 | server-distribution 25 | legacy-jar 26 | 27 | 28 | 29 | 30 | io.quarkus 31 | quarkus-micrometer 32 | 33 | 34 | io.quarkus 35 | quarkus-micrometer-registry-prometheus 36 | 37 | 38 | io.quarkus 39 | quarkus-resteasy-jackson 40 | 41 | 42 | io.debezium 43 | debezium-server-core 44 | test 45 | 46 | 47 | io.quarkus 48 | quarkus-test-devtools 49 | 1.13.7.Final 50 | 51 | 52 | 53 | 54 | 55 | io.quarkus 56 | quarkus-maven-plugin 57 | ${version.quarkus} 58 | 59 | 60 | 61 | build 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | assembly 72 | 73 | false 74 | 75 | 76 | 77 | io.debezium 78 | debezium-connector-mysql 79 | 80 | 81 | io.debezium 82 | debezium-connector-postgres 83 | runtime 84 | 85 | 86 | io.debezium 87 | debezium-connector-mongodb 88 | 89 | 90 | io.debezium 91 | debezium-connector-sqlserver 92 | 93 | 94 | io.debezium 95 | debezium-connector-oracle 96 | 97 | 98 | io.debezium 99 | debezium-connector-oracle 100 | 101 | 102 | io.debezium 103 | debezium-connector-db2 104 | 105 | 106 | io.debezium 107 | debezium-server-core 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | io.quarkus 143 | quarkus-logging-json 144 | 145 | 146 | io.debezium 147 | debezium-server-databend-sink 148 | ${revision} 149 | 150 | 151 | 152 | 153 | 154 | org.apache.maven.plugins 155 | maven-assembly-plugin 156 | ${version.assembly.plugin} 157 | 158 | 159 | default 160 | package 161 | 162 | single 163 | 164 | 165 | false 166 | true 167 | 168 | src/main/resources/assemblies/${assembly.descriptor}.xml 169 | 170 | 171 | posix 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | 12 | 13 | io.debezium 14 | debezium-server-databend 15 | ${revision} 16 | ../pom.xml 17 | 18 | 4.0.0 19 | debezium-server-databend-sink 20 | Debezium Server Databend Consumer 21 | jar 22 | 23 | 24 | 25 | 26 | io.debezium 27 | debezium-server-core 28 | 29 | 30 | org.apache.kafka 31 | kafka-clients 32 | 33 | 34 | io.debezium 35 | debezium-scripting 36 | 37 | 38 | org.codehaus.groovy 39 | groovy 40 | 41 | 42 | org.codehaus.groovy 43 | groovy-json 44 | 45 | 46 | com.databend 47 | databend-jdbc 48 | 0.1.0 49 | 50 | 51 | org.codehaus.groovy 52 | groovy-jsr223 53 | 54 | 55 | 56 | org.apache.commons 57 | commons-dbcp2 58 | 2.9.0 59 | 60 | 61 | 62 | org.jdbi 63 | jdbi3-core 64 | 65 | 66 | org.jdbi 67 | jdbi3-jackson2 68 | 69 | 70 | 71 | org.jooq 72 | jooq 73 | 3.16.0 74 | 75 | 76 | org.jooq 77 | jooq-meta 78 | 3.16.0 79 | 80 | 81 | 82 | io.quarkus 83 | quarkus-junit5 84 | ${version.quarkus} 85 | test 86 | 87 | 88 | org.postgresql 89 | postgresql 90 | test 91 | 42.6.0 92 | 93 | 94 | mysql 95 | mysql-connector-java 96 | test 97 | 98 | 99 | org.apache.spark 100 | spark-core_2.13 101 | 3.3.1 102 | test 103 | 104 | 105 | org.apache.spark 106 | spark-sql_2.13 107 | 3.3.1 108 | test 109 | 110 | 111 | org.easytesting 112 | fest-assert 113 | 1.4 114 | test 115 | 116 | 117 | org.awaitility 118 | awaitility 119 | 4.2.0 120 | test 121 | 122 | 123 | io.debezium 124 | debezium-core 125 | ${version.debezium} 126 | test-jar 127 | test 128 | 129 | 130 | io.debezium 131 | debezium-server-core 132 | test-jar 133 | test 134 | 135 | 136 | org.testcontainers 137 | testcontainers 138 | ${version.testcontainers} 139 | test 140 | 141 | 142 | org.testcontainers 143 | mysql 144 | ${version.testcontainers} 145 | test 146 | 147 | 148 | org.testcontainers 149 | postgresql 150 | ${version.testcontainers} 151 | test 152 | 153 | 154 | io.debezium 155 | debezium-connector-postgres 156 | test 157 | 158 | 159 | io.debezium 160 | debezium-connector-mysql 161 | test 162 | 163 | 164 | io.debezium 165 | debezium-connector-mongodb 166 | ${version.debezium} 167 | test 168 | 169 | 170 | io.quarkus 171 | quarkus-test-devtools 172 | 1.13.7.Final 173 | 174 | 175 | 176 | 177 | 178 | io.quarkus 179 | quarkus-maven-plugin 180 | ${version.quarkus} 181 | 182 | 183 | 184 | build 185 | 186 | 187 | 188 | 189 | 190 | org.jboss.jandex 191 | jandex-maven-plugin 192 | 1.2.3 193 | 194 | 195 | make-index 196 | 197 | jandex 198 | 199 | 200 | 201 | 202 | 203 | org.apache.maven.plugins 204 | maven-surefire-plugin 205 | 3.0.0 206 | 207 | 208 | org.jboss.logmanager.LogManager 209 | ${maven.home} 210 | ${session.request.userSettingsFile.path} 211 | 212 | 213 | 214 | 215 | org.apache.maven.plugins 216 | maven-failsafe-plugin 217 | 3.0.0-M8 218 | 219 | 220 | integration-test 221 | 222 | integration-test 223 | 224 | 225 | 226 | verify 227 | 228 | verify 229 | 230 | 231 | 232 | 233 | ${skipITs} 234 | true 235 | 236 | IT 237 | 238 | 239 | 240 | 241 | 242 | 243 | -------------------------------------------------------------------------------- /debezium-server-databend-sink/src/test/java/io/debezium/databend/DatabendChangeConsumerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright Databend Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.databend; 10 | 11 | import io.debezium.databend.testresources.BaseDbTest; 12 | import io.debezium.databend.testresources.SourcePostgresqlDB; 13 | import io.debezium.server.databend.DatabendChangeConsumer; 14 | import io.quarkus.test.common.QuarkusTestResource; 15 | import io.quarkus.test.junit.QuarkusTest; 16 | 17 | import java.time.Duration; 18 | 19 | import org.awaitility.Awaitility; 20 | import org.junit.jupiter.api.Assertions; 21 | import org.junit.jupiter.api.Test; 22 | 23 | import javax.inject.Inject; 24 | 25 | /** 26 | * Integration test that verifies basic reading from PostgreSQL database and writing to databend destination. 27 | * 28 | * @author hantmac 29 | */ 30 | @QuarkusTest 31 | @QuarkusTestResource(SourcePostgresqlDB.class) 32 | public class DatabendChangeConsumerTest extends BaseDbTest { 33 | @Inject 34 | DatabendChangeConsumer consumer; 35 | 36 | // @Test 37 | // public void testSchemaChanges() throws Exception { 38 | // // TEST add new columns, drop not null constraint 39 | // SourcePostgresqlDB.runSQL("UPDATE inventory.customers SET first_name='George__UPDATE1' WHERE ID = 1002 ;"); 40 | // SourcePostgresqlDB.runSQL("ALTER TABLE inventory.customers ADD test_varchar_column varchar(255);"); 41 | // SourcePostgresqlDB.runSQL("ALTER TABLE inventory.customers ADD test_boolean_column boolean;"); 42 | // SourcePostgresqlDB.runSQL("ALTER TABLE inventory.customers ADD test_date_column date;"); 43 | // 44 | // SourcePostgresqlDB.runSQL("UPDATE inventory.customers SET first_name='George__UPDATE1' WHERE id = 1002 ;"); 45 | // SourcePostgresqlDB.runSQL("ALTER TABLE inventory.customers ALTER COLUMN email DROP NOT NULL;"); 46 | // SourcePostgresqlDB.runSQL("INSERT INTO inventory.customers VALUES " + 47 | // "(default,'SallyUSer2','Thomas',null,'value1',false, '2020-01-01');"); 48 | // SourcePostgresqlDB.runSQL("ALTER TABLE inventory.customers ALTER COLUMN last_name DROP NOT NULL;"); 49 | // SourcePostgresqlDB.runSQL("UPDATE inventory.customers SET last_name = NULL WHERE id = 1002 ;"); 50 | // SourcePostgresqlDB.runSQL("DELETE FROM inventory.customers WHERE id = 1004 ;"); 51 | // 52 | // Awaitility.await().atMost(Duration.ofSeconds(180)).until(() -> { 53 | // try { 54 | // Dataset ds = getTableData("testc.inventory.customers"); 55 | // //ds.show(); 56 | // return 57 | // ds.where("__op == 'r'").count() == 4 // snapshot rows. initial table data 58 | // && ds.where("__op == 'u'").count() == 3 // 3 update 59 | // && ds.where("__op == 'c'").count() == 1 // 1 insert 60 | // && ds.where("__op == 'd'").count() == 1 // 1 insert 61 | // && ds.where("first_name == 'George__UPDATE1'").count() == 3 62 | // && ds.where("first_name == 'SallyUSer2'").count() == 1 63 | // && ds.where("last_name is null").count() == 1 64 | // && ds.where("id == '1004'").where("__op == 'd'").count() == 1; 65 | // } catch (Exception e) { 66 | // return false; 67 | // } 68 | // }); 69 | // 70 | // // added columns are not recognized by iceberg 71 | // getTableData("testc.inventory.customers").show(); 72 | // // insert row after defining new column in target iceberg table 73 | // SourcePostgresqlDB.runSQL("INSERT INTO inventory.customers VALUES " + 74 | // "(default,'After-Defining-Iceberg-fields','Thomas',null,'value1',false, '2020-01-01');"); 75 | // 76 | // // remove column from source 77 | // SourcePostgresqlDB.runSQL("ALTER TABLE inventory.customers DROP COLUMN email;"); 78 | // SourcePostgresqlDB.runSQL("INSERT INTO inventory.customers VALUES " + 79 | // "(default,'User3','lastname_value3','after-dropping-email-column-from-source',true, '2020-01-01'::DATE);"); 80 | // 81 | // Awaitility.await().atMost(Duration.ofSeconds(180)).until(() -> { 82 | // try { 83 | // Dataset ds = getTableData("testc.inventory.customers"); 84 | // ds.show(); 85 | // return ds.where("first_name == 'User3'").count() == 1 86 | // && ds.where("first_name == 'After-Defining-Iceberg-fields'").count() == 1 87 | // && ds.where("test_varchar_column == 'after-dropping-email-column-from-source' AND email is null").count() == 1; 88 | // } catch (Exception e) { 89 | // return false; 90 | // } 91 | // }); 92 | // getTableData("testc.inventory.customers").show(); 93 | // 94 | // } 95 | 96 | @Test 97 | public void testConsumingVariousDataTypes() throws Exception { 98 | String sql = "\n" + 99 | " DROP TABLE IF EXISTS inventory.data_types;\n" + 100 | " CREATE TABLE IF NOT EXISTS inventory.data_types (\n" + 101 | " c_id INTEGER ,\n" + 102 | " c_text TEXT,\n" + 103 | " c_varchar VARCHAR,\n" + 104 | " c_int INTEGER,\n" + 105 | " c_date DATE,\n" + 106 | " c_timestamp TIMESTAMP,\n" + 107 | " c_timestamptz TIMESTAMPTZ,\n" + 108 | " c_float FLOAT,\n" + 109 | " c_decimal DECIMAL(18,4),\n" + 110 | " c_numeric NUMERIC(18,4),\n" + 111 | " c_interval INTERVAL,\n" + 112 | " c_boolean BOOLEAN,\n" + 113 | " c_uuid UUID,\n" + 114 | " c_json JSON,\n" + 115 | " c_jsonb JSONB\n" + 116 | " );"; 117 | SourcePostgresqlDB.runSQL(sql); 118 | sql = "INSERT INTO inventory.data_types (" + 119 | "c_id, " + 120 | "c_text, c_varchar, c_int, c_date, c_timestamp, c_timestamptz, " + 121 | "c_float, c_decimal,c_numeric,c_interval,c_boolean,c_uuid," + 122 | "c_json, c_jsonb) " + 123 | "VALUES (1, null, null, null,null,null,null," + 124 | "null,null,null,null,null,null," + 125 | "null,null)," + 126 | "(2, 'val_text', 'A', 123, current_date , current_timestamp, current_timestamp," + 127 | "'1.23'::float,'1234566.34456'::decimal,'345672123.452'::numeric, interval '1 day',false," + 128 | "'3f207ac6-5dba-11eb-ae93-0242ac130002'::UUID," + 129 | "'{\"reading\": 1123}'::json, '{\"reading\": 1123}'::jsonb" + 130 | ")"; 131 | SourcePostgresqlDB.runSQL(sql); 132 | } 133 | 134 | @Test 135 | public void testTargetTableName() throws Exception { 136 | String destination = "server_name.databaseName.realTableName"; 137 | String realTableName = consumer.mapDestination(destination); 138 | Assertions.assertEquals("debeziumcdc_realTableName", realTableName); 139 | } 140 | 141 | @Test 142 | public void testConsumingArrayDataType() throws Exception { 143 | String sql = " DROP TABLE IF EXISTS inventory.array_data;\n" + 144 | " CREATE TABLE IF NOT EXISTS inventory.array_data (\n" + 145 | " name text,\n" + 146 | " pay_by_quarter integer[],\n" + 147 | " schedule text[][]\n" + 148 | " );\n" + 149 | " INSERT INTO inventory.array_data\n" + 150 | " VALUES " + 151 | "('Carol2',\n" + 152 | " ARRAY[20000, 25000, 25000, 25000],\n" + 153 | " ARRAY[['breakfast', 'consulting'], ['meeting', 'lunch']]),\n" + 154 | "('Bill',\n" + 155 | " '{10000, 10000, 10000, 10000}',\n" + 156 | " '{{\"meeting\", \"lunch\"}, {\"training\", \"presentation\"}}'),\n" + 157 | " ('Carol1',\n" + 158 | " '{20000, 25000, 25000, 25000}',\n" + 159 | " '{{\"breakfast\", \"consulting\"}, {\"meeting\", \"lunch\"}}')" + 160 | ";"; 161 | SourcePostgresqlDB.runSQL(sql); 162 | } 163 | 164 | 165 | // @Test 166 | // @Disabled 167 | // public void testDataTypeChanges() throws Exception { 168 | // String sql = "\n" + 169 | // " DROP TABLE IF EXISTS inventory.data_type_changes;\n" + 170 | // " CREATE TABLE IF NOT EXISTS inventory.data_type_changes (\n" + 171 | // " c_id INTEGER ,\n" + 172 | // " c_varchar VARCHAR,\n" + 173 | // " c_int2string INTEGER,\n" + 174 | // " c_date2string DATE,\n" + 175 | // " c_timestamp2string TIMESTAMP,\n" + 176 | // " string2int VARCHAR,\n" + 177 | // " string2timestamp VARCHAR,\n" + 178 | // " string2boolean VARCHAR\n" + 179 | // " );"; 180 | // SourcePostgresqlDB.runSQL(sql); 181 | // sql = "INSERT INTO inventory.data_type_changes " + 182 | // " (c_id, c_varchar, c_int2string, c_date2string, c_timestamp2string, string2int, string2timestamp, string2boolean) " + 183 | // " VALUES (1, 'STRING-DATA-1', 123, current_date , current_timestamp, 111, current_timestamp, false)"; 184 | // SourcePostgresqlDB.runSQL(sql); 185 | // sql = "INSERT INTO inventory.data_type_changes " + 186 | // " (c_id, c_varchar, c_int2string, c_date2string, c_timestamp2string, string2int, string2timestamp, string2boolean) " + 187 | // " VALUES (2, 'STRING-DATA-2', 222, current_date , current_timestamp, 222, current_timestamp, true)"; 188 | // SourcePostgresqlDB.runSQL(sql); 189 | // 190 | // SourcePostgresqlDB.runSQL("ALTER TABLE inventory.data_type_changes " + 191 | // "ALTER COLUMN c_int2string TYPE VARCHAR(555), " + 192 | // "ALTER COLUMN c_date2string TYPE VARCHAR(555), " + 193 | // "ALTER COLUMN c_timestamp2string TYPE VARCHAR(555), " + 194 | // "ALTER COLUMN string2int TYPE INTEGER USING string2int::integer, " + 195 | // "ALTER COLUMN string2timestamp TYPE TIMESTAMP USING string2timestamp::TIMESTAMP, " + 196 | // "ALTER COLUMN string2boolean TYPE boolean USING string2boolean::boolean" 197 | // ); 198 | // sql = "INSERT INTO inventory.data_type_changes " + 199 | // " (c_id, c_varchar, c_int2string, c_date2string, c_timestamp2string, string2int, string2timestamp, string2boolean) " + 200 | // " VALUES (3, 'STRING-DATA-3', '333', 'current_date-3' , 'current_timestamp-3', 333, current_timestamp, false)"; 201 | // SourcePostgresqlDB.runSQL(sql); 202 | // 203 | // Awaitility.await().atMost(Duration.ofSeconds(180)).until(() -> { 204 | // try { 205 | // Dataset ds = getTableData("testc.inventory.data_type_changes"); 206 | // ds.printSchema(); 207 | // ds.show(); 208 | // return ds.where("__op == 'r'").count() == 19; 209 | // } catch (Exception e) { 210 | // return false; 211 | // } 212 | // }); 213 | // } 214 | 215 | 216 | } 217 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /docs/docs.md: -------------------------------------------------------------------------------- 1 | # Debezium Databend Consumer 2 | 3 | Replicates database CDC events to a databend database 4 | 5 | ## Databend Consumer 6 | 7 | Databend debezium server consumer replicates debezium CDC events to destination databend tables. It is possible to 8 | replicate source database one 9 | to one or run it with `append` mode or `upsert` mode to keep all change events in databend table. When event and key 10 | schema 11 | enabled (`debezium.format.value.schemas.enable=true`, `debezium.format.key.schemas.enable=true`) destination databend 12 | tables created automatically with initial job. 13 | 14 | #### Configuration properties 15 | 16 | | Config | Default | Description | 17 | |----------------------------------------------------------|-------------------|-----------------------------------------------------------------------------------------------------------------------------------------| 18 | | `debezium.sink.databend.database.url` | `` | Databend database jdbc url, example: jdbc:databend://localhost:8000. | 19 | | `debezium.sink.databend.database.username` | `` | Databend database user name. | 20 | | `debezium.sink.databend.database.password` | `` | Databend database user password. | 21 | | `debezium.sink.databend.table-prefix` | `` | Prefix added to destination table names. | 22 | | `debezium.sink.databend.upsert` | `true` | Running upsert mode overwriting updated rows. explained below. | 23 | | `debezium.sink.databend.upsert-keep-deletes` | `true` | With upsert mode, keeps deleted rows in target table. | 24 | | `debezium.sink.databend.destination-regexp` | `` | Regexp to modify destination table. With this its possible to map `table_ptt1`,`table_ptt2` to `table_combined`. | 25 | | `debezium.sink.databend.destination-regexp-replace` | `` | Regexp Replace part to modify destination table | 26 | | `debezium.sink.batch.batch-size-wait` | `NoBatchSizeWait` | Batch size wait strategy to optimize data files and upload interval. explained below. | 27 | | `debezium.sink.databend.database.param.{jdbc.prop.name}` | | Additional jdbc connection config for destination database, example: ?ssl=true here use debezium.sink.databend.database.param.ssl=true. | 28 | | `debezium.sink.databend.database.primaryKey` | `` | Databend Table priamryKey for upsert mode, if upsert is true, user must set this config | 29 | 30 | ### Upsert mode 31 | 32 | By default, Debezium Databend consumer is running with upsert mode `debezium.sink.databend.upsert=true`. 33 | Upsert mode must set Primary Key `debezium.sink.databend.database.primaryKey` and does upsert on target table. For the tables without 34 | Primary Key consumer falls back to append mode. 35 | 36 | > NOTE: If in upsert mode, `debezium-server-databend` support two kind of delete mode: 37 | > 1. `hard delete`: In hard delete mode, add `debezium.transforms.unwrap.delete.handling.mode=none`, `debezium.transforms.unwrap.drop.tombstones=false` in config file. Because Debezium generates a tombstone record for each DELETE operation. The default behavior is that ExtractNewRecordState removes tombstone records from the stream. To keep tombstone records in the stream, specify drop.tombstones=false. 38 | > 2. `soft delete`: In soft delete mode, user must add the `__deleted` field in target databend table, and add `debezium.transforms.unwrap.delete.handling.mode=rewrite`, `debezium.transforms.unwrap.drop.tombstones=true` in config file. So now we softly delete by make `__deleted` as true. 39 | 40 | ### Append mode 41 | 42 | Setting `debezium.sink.databend.upsert=false` will set the operation mode to append. With append mode data deduplication is 43 | not done and all received records are appended to destination table. 44 | > Note: If user does not set primary key config the operation mode will fall back to append even configuration is set to upsert mode 45 | 46 | #### Keeping Deleted Records 47 | 48 | By default `debezium.sink.databend.upsert-keep-deletes=true` keeps deletes in the Databend table, setting it to false 49 | will do soft delete from the destination Databend table by making `__deleted` field as true. 50 | 51 | #### debezium.transforms.unwrap.add.fields 52 | 53 | If user has this config `debezium.transforms.unwrap.add.fields=op,table,source.ts_ms,db`, make sure your target table has `__op`, `__table`,`__source.ts_ms` and `__db` field or the target table was created by debezium server. 54 | 55 | ### Optimizing batch size 56 | 57 | Debezium extracts database events in real time and this could cause too frequent commits which is not optimal for batch 58 | processing especially when near realtime data feed is sufficient. To avoid this problem following batch-size-wait 59 | classes are used. 60 | 61 | Batch size wait adds delay between consumer calls to increase total number of events received per call and meanwhile 62 | events are collected in memory. 63 | This setting should be configured together with `debezium.source.max.queue.size` and `debezium.source.max.batch.size` 64 | debezium properties 65 | 66 | 67 | #### NoBatchSizeWait 68 | 69 | This is default configuration by default consumer will not use any wait. All the events are consumed immediately. 70 | 71 | #### MaxBatchSizeWait 72 | 73 | MaxBatchSizeWait uses debezium metrics to optimize batch size, this strategy is more precise compared to 74 | DynamicBatchSizeWait. 75 | MaxBatchSizeWait periodically reads streaming queue current size and waits until it reaches to `max.batch.size`. 76 | Maximum wait and check intervals are controlled by `debezium.sink.batch.batch-size-wait.max-wait-ms` 77 | , `debezium.sink.batch.batch-size-wait.wait-interval-ms` properties. 78 | 79 | example setup to receive ~2048 events per commit. maximum wait is set to 30 seconds, streaming queue current size 80 | checked every 5 seconds 81 | 82 | ```properties 83 | debezium.sink.batch.batch-size-wait=MaxBatchSizeWait 84 | debezium.sink.batch.metrics.snapshot-mbean=debezium.postgres:type=connector-metrics,context=snapshot,server=debezium 85 | debezium.sink.batch.metrics.streaming-mbean=debezium.postgres:type=connector-metrics,context=streaming,server=debezium 86 | debezium.source.connector.class=io.debezium.connector.postgresql.PostgresConnector 87 | debezium.source.max.batch.size=2048; 88 | debezium.source.max.queue.size=16000"; 89 | debezium.sink.batch.batch-size-wait.max-wait-ms=30000 90 | debezium.sink.batch.batch-size-wait.wait-interval-ms=5000 91 | ``` 92 | ### include databases and include tables 93 | 94 | User can use `debezium.source.database.include.list=databaseName1,databaseName2` to monitor the source databases and use `debezium.source.table.include.list=databaseName.tableName1,databaseName.tableName2` to monitor the source tables. 95 | 96 | ## Debezium Event Flattening 97 | 98 | Debezium Databend consumer requires event flattening. 99 | 100 | ```properties 101 | debezium.transforms=unwrap 102 | debezium.transforms.unwrap.type=io.debezium.transforms.ExtractNewRecordState 103 | # debezium.transforms.unwrap.add.fields=op,table,source.ts_ms,db make sure the target table has there field 104 | debezium.transforms.unwrap.add.headers=db 105 | debezium.transforms.unwrap.delete.handling.mode=rewrite 106 | debezium.transforms.unwrap.drop.tombstones=true 107 | ``` 108 | 109 | ### Databend JDBC Configuring 110 | 111 | All the properties starting with `debezium.sink.databend.database.param.key` are passed to Databend Jdbc connection. 112 | 113 | ```properties 114 | debezium.sink.databend.database.param.key=value 115 | ``` 116 | 117 | ### Table Name Mapping 118 | 119 | Target Databend tables are named by following rule : `table-prefix``database.server.name`_`database`_`table` 120 | 121 | For example: 122 | 123 | ```properties 124 | database.server.name=databend 125 | debezium.sink.databend.table-prefix=cdc_ 126 | ``` 127 | 128 | With above config database table = `database.table` is replicated to `databend_cdc_database_table` 129 | 130 | Or if user don't like this kind of table name mapping, just use `debezium.sink.databend.database.tableName` to point a specific name. 131 | 132 | [Detailed documentation](https://github.com/databendcloud/databend-jdbc/blob/main/docs/Connection.md) about how to use connection parameters in a Databend jdbc connection. 133 | 134 | ### Special type convert 135 | #### Decimal types 136 | Debezium connectors handle decimals according to the setting of the decimal.handling.mode connector configuration property. 137 | Specifies how the connector should handle values for DECIMAL and NUMERIC columns: 138 | ```properties 139 | # precise (the default) represents them precisely using java.math.BigDecimal values represented in change events in a binary form. 140 | decimal.handling.mode=precise 141 | ``` 142 | 143 | ```properties 144 | # string encodes values as formatted strings, which is easy to consume but semantic information about the real type is lost. 145 | decimal.handling.mode=string 146 | ``` 147 | 148 | ```properties 149 | # double converts values to approximate double-precision floating-point values. 150 | decimal.handling.mode=double 151 | ``` 152 | 153 | #### DateTime types 154 | Convert timestamps between different formats such as Unix epoch, strings, and Connect Date/Timestamp types. Applies to individual fields or to the entire value. 155 | Use the concrete transformation type designed for the record key (`org.apache.kafka.connect.transforms.TimestampConverter$Key`) or value (`org.apache.kafka.connect.transforms.TimestampConverter$Value`). 156 | 157 | ##### Examples 158 | This configuration snippet shows how to use `TimestampConverter` to transform a Unix epoch (represented as an int64 value) into a formatted date string. 159 | 160 | ```properties 161 | debezium.transforms.unwrap.type=io.debezium.transforms.ExtractNewRecordState 162 | debezium.transforms.a.type=org.apache.kafka.connect.transforms.TimestampConverter$Value 163 | debezium.transforms.a.target.type=string 164 | debezium.transforms.a.field=a 165 | debezium.transforms.a.format=yyyy-MM-dd hh:mm:ss 166 | ``` 167 | **NOTE:** 168 | If your table has `timestamp` or `datetime` type, you need to create your target table in databend manually. Because debezium will convert `timestamp` or `datetime` type to `string` type. 169 | 170 | ### Example Configuration 171 | 172 | Read [application.properties.example](../debezium-server-databend-sink/src/main/resources/conf/application.properties.example) 173 | 174 | ### Problems and Points for improvement 175 | - Due to the integration of Debezium Engine, full-load reading phase does not support checkpoint. After a failure, it needs to be re-read. 176 | - Currently, only single concurrency is supported, and horizontal scaling is not supported. 177 | - When ensuring data consistency, Debezium needs to apply locks to the read databases or tables. Global locks may cause the database to hang, while table-level locks can restrict table reads. 178 | 179 | So perhaps in the next step, we can draw inspiration from Netflix DBLog's lock-free algorithm to improve these issues. 180 | 181 | ### Related docs 182 | 183 | [debezium-mysql-connector](https://debezium.io/documentation/reference/2.0/connectors/mysql.html#mysql-property-table-include-list) --------------------------------------------------------------------------------