├── docs ├── index.md ├── images │ ├── debezium-iceberg.png │ ├── rdbms-debezium-iceberg.png │ ├── rdbms-debezium-iceberg_white.png │ ├── debezium-iceberg-architecture.drawio.png │ └── debezium-iceberg.drawio ├── contributing.md ├── icebergevents.md ├── python-runner.md ├── faq.md └── migration.md ├── .dockerignore ├── examples ├── lakekeeper │ ├── notebooks │ │ └── .gitignore │ ├── config │ │ └── application.properties │ └── produce_data.py └── nessie │ ├── config │ └── application.properties │ ├── produce_data.py │ └── docker-compose.yaml ├── debezium-server-iceberg-dist ├── src │ └── main │ │ └── resources │ │ ├── distro │ │ ├── jmx │ │ │ ├── jmxremote.access │ │ │ ├── jmxremote.password │ │ │ └── enable_jmx.sh │ │ ├── lib_metrics │ │ │ └── enable_exporter.sh │ │ ├── run.sh │ │ ├── config │ │ │ └── metrics.yml │ │ └── debezium.py │ │ └── assemblies │ │ └── server-distribution.xml └── README.md ├── .github ├── dependabot.yml └── workflows │ ├── deploy-documentation.yml │ ├── build.yml │ ├── stale.yml │ ├── codeql-analysis.yml │ └── release.yml ├── debezium-server-iceberg-sink └── src │ ├── main │ ├── resources │ │ └── META-INF │ │ │ └── beans.xml │ └── java │ │ └── io │ │ └── debezium │ │ └── server │ │ └── iceberg │ │ ├── mapper │ │ ├── IcebergTableMapper.java │ │ └── DefaultIcebergTableMapper.java │ │ ├── converter │ │ ├── SchemaConverter.java │ │ ├── AbstractVariantObject.java │ │ ├── IcebergSchemaInfo.java │ │ ├── EventConverter.java │ │ └── DateTimeUtils.java │ │ ├── batchsizewait │ │ ├── NoBatchSizeWait.java │ │ ├── BatchSizeWait.java │ │ └── MaxBatchSizeWait.java │ │ ├── GlobalConfig.java │ │ ├── history │ │ └── IcebergSchemaHistoryConfig.java │ │ ├── offset │ │ └── IcebergOffsetBackingStoreConfig.java │ │ ├── tableoperator │ │ ├── Operation.java │ │ ├── PartitionedAppendWriter.java │ │ ├── UnpartitionedDeltaWriter.java │ │ ├── PartitionedDeltaWriter.java │ │ ├── RecordWrapper.java │ │ ├── BaseDeltaTaskWriter.java │ │ └── IcebergTableWriterFactory.java │ │ ├── BatchConfig.java │ │ ├── storage │ │ └── BaseIcebergStorageConfig.java │ │ └── IcebergConfig.java │ └── test │ ├── resources │ ├── json │ │ ├── serde-unnested-delete-key-withschema.json │ │ ├── serde-unnested-order-key-withschema.json │ │ ├── serde-update.json │ │ ├── unwrap-with-schema.json │ │ ├── serde-with-array.json │ │ └── serde-with-schema_geom.json │ ├── mongodb │ │ └── Dockerfile │ └── META-INF │ │ └── services │ │ └── org.eclipse.microprofile.config.spi.ConfigSource │ └── java │ └── io │ └── debezium │ └── server │ └── iceberg │ ├── GlobalConfigProducer.java │ ├── IcebergConfigProducer.java │ ├── DebeziumConfigProducer.java │ ├── mapper │ ├── CustomMapper.java │ └── CustomMapperTest.java │ ├── converter │ ├── JsonEventConverterSchemaDataTest.java │ └── JsonEventConverterBuilderTest.java │ ├── testresources │ ├── CatalogJdbc.java │ ├── TestUtil.java │ ├── CatalogRest.java │ ├── SourceMongoDB.java │ ├── CatalogNessie.java │ ├── SourceMysqlDB.java │ ├── SourcePostgresqlDB.java │ └── S3Minio.java │ ├── tableoperator │ ├── UnpartitionedDeltaWriterTest.java │ └── BaseWriterTest.java │ ├── IcebergChangeConsumerJdbcCatalogTest.java │ ├── GlobalConfigTest.java │ ├── IcebergChangeConsumerRestCatalogTest.java │ ├── IcebergChangeConsumerConnectTest.java │ ├── IcebergChangeConsumerNessieCatalogTest.java │ ├── IcebergChangeConsumerDecimalTest.java │ ├── IcebergEventsChangeConsumerTest.java │ ├── IcebergChangeConsumerMongodbTest.java │ ├── IcebergChangeConsumerExcludedColumnsTest.java │ ├── batchsizewait │ └── MaxBatchSizeWaitTest.java │ ├── history │ └── IcebergSchemaHistoryTest.java │ ├── IcebergChangeConsumerMysqlTest.java │ ├── IcebergChangeConsumerTestUnwraapped.java │ └── IcebergChangeConsumerTemporalIsoStringTest.java ├── python ├── debezium │ ├── __main__.py │ └── __init__.py └── pyproject.toml ├── .run ├── IcebergChangeConsumerTest.run.xml ├── IcebergChangeConsumerTest.testSimpleUpload.run.xml ├── All in debezium-server-iceberg-sink.run.xml ├── package.run.xml ├── dependency_tree.run.xml └── clean,install.run.xml ├── mkdocs.yml ├── Dockerfile ├── README.md └── .gitignore /docs/index.md: -------------------------------------------------------------------------------- 1 | --8<-- "README.md" -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | Dockerfile 2 | **/target/ 3 | 4 | .idea/ 5 | .github/ 6 | .run/ -------------------------------------------------------------------------------- /examples/lakekeeper/notebooks/.gitignore: -------------------------------------------------------------------------------- 1 | spark-warehouse 2 | .ipynb_checkpoints -------------------------------------------------------------------------------- /debezium-server-iceberg-dist/src/main/resources/distro/jmx/jmxremote.access: -------------------------------------------------------------------------------- 1 | monitor readonly 2 | admin readwrite -------------------------------------------------------------------------------- /debezium-server-iceberg-dist/src/main/resources/distro/jmx/jmxremote.password: -------------------------------------------------------------------------------- 1 | admin admin123 2 | monitor monitor123 -------------------------------------------------------------------------------- /docs/images/debezium-iceberg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/debezium-server-iceberg/HEAD/docs/images/debezium-iceberg.png -------------------------------------------------------------------------------- /docs/images/rdbms-debezium-iceberg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/debezium-server-iceberg/HEAD/docs/images/rdbms-debezium-iceberg.png -------------------------------------------------------------------------------- /docs/images/rdbms-debezium-iceberg_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/debezium-server-iceberg/HEAD/docs/images/rdbms-debezium-iceberg_white.png -------------------------------------------------------------------------------- /docs/images/debezium-iceberg-architecture.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/memiiso/debezium-server-iceberg/HEAD/docs/images/debezium-iceberg-architecture.drawio.png -------------------------------------------------------------------------------- /debezium-server-iceberg-dist/README.md: -------------------------------------------------------------------------------- 1 | Copy of 2 | Debezium [debezium-server-dist](https://github.com/debezium/debezium/tree/master/debezium-server/debezium-server-dist) 3 | project 4 | 5 | Authors : Debezium Authors -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | - package-ecosystem: "maven" 8 | directory: "/" 9 | schedule: 10 | interval: "weekly" 11 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/resources/META-INF/beans.xml: -------------------------------------------------------------------------------- 1 | 8 | 9 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/mapper/IcebergTableMapper.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.mapper; 2 | 3 | import org.apache.iceberg.catalog.TableIdentifier; 4 | 5 | public interface IcebergTableMapper { 6 | TableIdentifier mapDestination(String destination); 7 | } 8 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/resources/json/serde-unnested-delete-key-withschema.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": { 3 | "type": "struct", 4 | "fields": [ 5 | { 6 | "type": "int32", 7 | "optional": false, 8 | "field": "id" 9 | } 10 | ], 11 | "optional": false, 12 | "name": "testc.inventory.customers.Key" 13 | }, 14 | "payload": { 15 | "id": 1004 16 | } 17 | } -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/resources/json/serde-unnested-order-key-withschema.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": { 3 | "type": "struct", 4 | "fields": [ 5 | { 6 | "type": "int32", 7 | "optional": false, 8 | "field": "order_number" 9 | } 10 | ], 11 | "optional": false, 12 | "name": "testc.inventory.orders.Key" 13 | }, 14 | "payload": { 15 | "order_number": 10004 16 | } 17 | } -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Debezium Iceberg consumer is a very young project and looking for new maintainers. There are definitively many small/big 4 | improvements to do, including documentation, adding new features to submitting bug reports. 5 | 6 | Please feel free to send pull request, report bugs or open feature request. 7 | 8 | ## License 9 | 10 | By contributing, you agree that your contributions will be licensed under Apache 2.0 License. 11 | -------------------------------------------------------------------------------- /debezium-server-iceberg-dist/src/main/resources/distro/lib_metrics/enable_exporter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # To enable Prometheus JMX exporter, set JMX_EXPORTER_PORT environment variable 3 | 4 | if [ -n "${JMX_EXPORTER_PORT}" ]; then 5 | JMX_EXPORTER_CONFIG=${JMX_EXPORTER_CONFIG:-"config/metrics.yml"} 6 | JMX_EXPORTER_AGENT_JAR=$(find lib_metrics -name "jmx_prometheus_javaagent-*.jar") 7 | export JAVA_OPTS="-javaagent:${JMX_EXPORTER_AGENT_JAR}=0.0.0.0:${JMX_EXPORTER_PORT}:${JMX_EXPORTER_CONFIG} ${JAVA_OPTS}" 8 | fi -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/converter/SchemaConverter.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.converter; 2 | 3 | import org.apache.iceberg.Schema; 4 | import org.apache.iceberg.SortOrder; 5 | 6 | public interface SchemaConverter { 7 | @Override 8 | int hashCode(); 9 | 10 | @Override 11 | boolean equals(Object o); 12 | 13 | Schema icebergSchema(boolean withIdentifierFields); 14 | 15 | default Schema icebergSchema() { 16 | return icebergSchema(true); 17 | } 18 | 19 | SortOrder sortOrder(Schema schema); 20 | } 21 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/resources/mongodb/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mongo:6.0 2 | 3 | LABEL maintainer="Debezium Community" 4 | 5 | ENV REPLICA_SET_HOSTS="localhost" 6 | 7 | # Starting with MongoDB 4.4 the authentication enabled MongoDB requires a key 8 | # for intra-replica set communication 9 | RUN openssl rand -base64 756 > /etc/mongodb.keyfile &&\ 10 | chown mongodb:mongodb /etc/mongodb.keyfile &&\ 11 | chmod 400 /etc/mongodb.keyfile 12 | 13 | COPY start-mongodb.sh /usr/local/bin/ 14 | RUN chmod +x /usr/local/bin/start-mongodb.sh 15 | 16 | ENTRYPOINT ["start-mongodb.sh"] 17 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/batchsizewait/NoBatchSizeWait.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg.batchsizewait; 10 | 11 | import jakarta.enterprise.context.Dependent; 12 | import jakarta.inject.Named; 13 | 14 | /** 15 | * Optimizes batch size around 85%-90% of max,batch.size using dynamically calculated sleep(ms) 16 | * 17 | * @author Ismail Simsek 18 | */ 19 | @Dependent 20 | @Named("NoBatchSizeWait") 21 | public class NoBatchSizeWait implements BatchSizeWait { 22 | } 23 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/batchsizewait/BatchSizeWait.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg.batchsizewait; 10 | 11 | /** 12 | * When enabled dds waiting to the consumer to control batch size. I will turn the processing to batch processing. 13 | * 14 | * @author Ismail Simsek 15 | */ 16 | public interface BatchSizeWait { 17 | 18 | default void initizalize() { 19 | } 20 | 21 | default void waitMs(Integer numRecordsProcessed, Integer processingTimeMs) throws InterruptedException { 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/GlobalConfig.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg; 2 | 3 | import io.quarkus.runtime.annotations.ConfigRoot; 4 | import io.smallrye.config.ConfigMapping; 5 | import io.smallrye.config.WithDefault; 6 | import io.smallrye.config.WithName; 7 | import io.smallrye.config.WithParentName; 8 | import org.jboss.logging.Logger; 9 | 10 | @ConfigRoot 11 | @ConfigMapping 12 | public interface GlobalConfig { 13 | 14 | @WithParentName 15 | IcebergConfig iceberg(); 16 | 17 | @WithParentName 18 | DebeziumConfig debezium(); 19 | 20 | @WithParentName 21 | BatchConfig batch(); 22 | 23 | @WithName("quarkus.log.level") 24 | @WithDefault("INFO") 25 | Logger.Level quarkusLogLevel(); 26 | 27 | } -------------------------------------------------------------------------------- /.github/workflows/deploy-documentation.yml: -------------------------------------------------------------------------------- 1 | name: deploy-mkdocs-documentation 2 | on: 3 | push: 4 | branches: 5 | - master 6 | - main 7 | - docs 8 | permissions: 9 | contents: write 10 | jobs: 11 | deploy: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | - name: Configure Git Credentials 16 | run: | 17 | git config user.name github-actions[bot] 18 | git config user.email 41898282+github-actions[bot]@users.noreply.github.com 19 | - uses: actions/setup-python@v6 20 | with: 21 | python-version: 3.x 22 | - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 23 | - run: pip install mkdocs-material 24 | - run: mkdocs gh-deploy --force -------------------------------------------------------------------------------- /python/debezium/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from debezium import Debezium 4 | 5 | 6 | def main(): 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('--debezium_dir', type=str, default=None, 9 | help='Directory of debezium server application') 10 | parser.add_argument('--conf_dir', type=str, default=None, 11 | help='Directory of application.properties') 12 | parser.add_argument('--java_home', type=str, default=None, 13 | help='JAVA_HOME directory') 14 | _args, args = parser.parse_known_args() 15 | ds = Debezium(debezium_dir=_args.debezium_dir, conf_dir=_args.conf_dir, java_home=_args.java_home) 16 | ds.run(*args) 17 | 18 | 19 | if __name__ == '__main__': 20 | main() 21 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/history/IcebergSchemaHistoryConfig.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.history; 2 | 3 | import io.debezium.config.Configuration; 4 | import io.debezium.server.iceberg.storage.BaseIcebergStorageConfig; 5 | 6 | 7 | public class IcebergSchemaHistoryConfig extends BaseIcebergStorageConfig { 8 | public IcebergSchemaHistoryConfig(Configuration config, String configuration_field_prefix) { 9 | super(config, configuration_field_prefix); 10 | } 11 | 12 | @Override 13 | public String tableName() { 14 | return this.config.getProperty("table-name", "debezium_database_history_storage"); 15 | } 16 | public String getMigrateHistoryFile() { 17 | return config.getProperty("migrate-history-file", ""); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/offset/IcebergOffsetBackingStoreConfig.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.offset; 2 | 3 | import io.debezium.config.Configuration; 4 | import io.debezium.server.iceberg.storage.BaseIcebergStorageConfig; 5 | 6 | 7 | public class IcebergOffsetBackingStoreConfig extends BaseIcebergStorageConfig { 8 | public IcebergOffsetBackingStoreConfig(Configuration config, String configuration_field_prefix) { 9 | super(config, configuration_field_prefix); 10 | } 11 | 12 | @Override 13 | public String tableName() { 14 | return this.config.getProperty("table-name", "debezium_offset_storage"); 15 | } 16 | 17 | public String getMigrateOffsetFile() { 18 | return this.config.getProperty("migrate-offset-file",""); 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /.run/IcebergChangeConsumerTest.run.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 9 | 10 | 18 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/resources/json/serde-update.json: -------------------------------------------------------------------------------- 1 | { 2 | "op": "u", 3 | "ts_ms": 1465491411815, 4 | "before": { 5 | "id": 1004, 6 | "first_name": "Anne-Marie", 7 | "last_name": "Kretchmar", 8 | "email": "annek@noanswer.org" 9 | }, 10 | "after": { 11 | "id": 1004, 12 | "first_name": "Anne", 13 | "last_name": "Kretchmar", 14 | "email": "annek@noanswer.org" 15 | }, 16 | "source": { 17 | "version": "0.10.0.Final", 18 | "connector": "mysql", 19 | "name": "mysql-server-1", 20 | "ts_ms": 0, 21 | "snapshot": false, 22 | "db": "inventory", 23 | "table": "customers", 24 | "server_id": 0, 25 | "gtid": null, 26 | "file": "mysql-bin.000003", 27 | "pos": 154, 28 | "row": 0, 29 | "thread": 7, 30 | "query": "INSERT INTO customers (first_name, last_name, email) VALUES ('Anne', 'Kretchmar', 'annek@noanswer.org')" 31 | } 32 | } -------------------------------------------------------------------------------- /.run/IcebergChangeConsumerTest.testSimpleUpload.run.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 9 | 10 | 18 | -------------------------------------------------------------------------------- /.run/All in debezium-server-iceberg-sink.run.xml: -------------------------------------------------------------------------------- 1 | 8 | 9 | 10 | 12 | 13 | 22 | -------------------------------------------------------------------------------- /python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "setuptools-scm"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "debezium" 7 | version = "0.1.0" 8 | authors = [ 9 | { name = "Memiiso Organization" }, 10 | ] 11 | description = "Debezium Server Python runner" 12 | # readme = "README.md" 13 | requires-python = ">=3.8" 14 | keywords = ["Debezium", "Replication", "Apache", "Iceberg"] 15 | license = { text = "Apache License 2.0" } 16 | classifiers = [ 17 | "Development Status :: 5 - Production/Stable", 18 | "Programming Language :: Python :: 3", 19 | ] 20 | dependencies = [ 21 | "pyjnius==1.6.1" 22 | ] 23 | [project.scripts] 24 | debezium = "debezium.__main__:main" 25 | 26 | [project.urls] 27 | Homepage = "https://github.com/memiiso/debezium-server-iceberg " 28 | Documentation = "https://github.com/memiiso/debezium-server-iceberg " 29 | Repository = "https://github.com/memiiso/debezium-server-iceberg " 30 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/resources/META-INF/services/org.eclipse.microprofile.config.spi.ConfigSource: -------------------------------------------------------------------------------- 1 | # 2 | # /* 3 | # * Copyright memiiso Authors. 4 | # * 5 | # * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | # */ 7 | # 8 | 9 | # 10 | # /* 11 | # * Copyright memiiso Authors. 12 | # * 13 | # * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 14 | # */ 15 | # 16 | 17 | # 18 | # /* 19 | # * Copyright memiiso Authors. 20 | # * 21 | # * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 22 | # */ 23 | # 24 | 25 | # 26 | # /* 27 | # * Copyright memiiso Authors. 28 | # * 29 | # * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 30 | # */ 31 | # 32 | 33 | io.debezium.server.iceberg.TestConfigSource -------------------------------------------------------------------------------- /debezium-server-iceberg-dist/src/main/resources/distro/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # /* 4 | # * Copyright memiiso Authors. 5 | # * 6 | # * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 7 | # */ 8 | # 9 | 10 | LIB_PATH="lib/*" 11 | 12 | if [ "$OSTYPE" = "msys" ] || [ "$OSTYPE" = "cygwin" ]; then 13 | PATH_SEP=";" 14 | else 15 | PATH_SEP=":" 16 | fi 17 | 18 | if [ -z "$JAVA_HOME" ]; then 19 | JAVA_BINARY="java" 20 | else 21 | JAVA_BINARY="$JAVA_HOME/bin/java" 22 | fi 23 | 24 | RUNNER=$(ls debezium-server-*runner.jar) 25 | 26 | ENABLE_DEBEZIUM_SCRIPTING=${ENABLE_DEBEZIUM_SCRIPTING:-false} 27 | if [[ "${ENABLE_DEBEZIUM_SCRIPTING}" == "true" ]]; then 28 | LIB_PATH=$LIB_PATH$PATH_SEP"lib_opt/*" 29 | fi 30 | 31 | source ./jmx/enable_jmx.sh 32 | source ./lib_metrics/enable_exporter.sh 33 | 34 | exec "$JAVA_BINARY" $DEBEZIUM_OPTS $JAVA_OPTS -cp \ 35 | $RUNNER$PATH_SEP$LIB_PATH io.debezium.server.Main -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/GlobalConfigProducer.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg; 2 | 3 | import io.smallrye.config.SmallRyeConfig; 4 | import jakarta.enterprise.context.ApplicationScoped; 5 | import jakarta.enterprise.inject.Produces; 6 | import jakarta.inject.Inject; 7 | import org.eclipse.microprofile.config.Config; 8 | import org.mockito.Mockito; 9 | 10 | /** 11 | * This class provides a mocked instance of GlobalConfig for testing purposes, 12 | * allowing selective overriding of configuration values while preserving the original 13 | * configuration. 14 | */ 15 | public class GlobalConfigProducer { 16 | @Inject 17 | Config config; 18 | 19 | @Produces 20 | @ApplicationScoped 21 | @io.quarkus.test.Mock 22 | GlobalConfig appConfig() { 23 | GlobalConfig appConfig = config.unwrap(SmallRyeConfig.class).getConfigMapping(GlobalConfig.class); 24 | GlobalConfig appConfigSpy = Mockito.spy(appConfig); 25 | return appConfigSpy; 26 | } 27 | 28 | } -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/IcebergConfigProducer.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg; 2 | 3 | import io.smallrye.config.SmallRyeConfig; 4 | import jakarta.enterprise.context.ApplicationScoped; 5 | import jakarta.enterprise.inject.Produces; 6 | import jakarta.inject.Inject; 7 | import org.eclipse.microprofile.config.Config; 8 | import org.mockito.Mockito; 9 | 10 | /** 11 | * This class provides a mocked instance of IcebergConfig for testing purposes, 12 | * allowing selective overriding of configuration values while preserving the original 13 | * configuration. 14 | */ 15 | public class IcebergConfigProducer { 16 | @Inject 17 | Config config; 18 | 19 | @Produces 20 | @ApplicationScoped 21 | @io.quarkus.test.Mock 22 | IcebergConfig appConfig() { 23 | IcebergConfig appConfig = config.unwrap(SmallRyeConfig.class).getConfigMapping(IcebergConfig.class); 24 | IcebergConfig appConfigSpy = Mockito.spy(appConfig); 25 | return appConfigSpy; 26 | } 27 | 28 | } -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/DebeziumConfigProducer.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg; 2 | 3 | import io.smallrye.config.SmallRyeConfig; 4 | import jakarta.enterprise.context.ApplicationScoped; 5 | import jakarta.enterprise.inject.Produces; 6 | import jakarta.inject.Inject; 7 | import org.eclipse.microprofile.config.Config; 8 | import org.mockito.Mockito; 9 | 10 | /** 11 | * This class provides a mocked instance of DebeziumConfig for testing purposes, 12 | * allowing selective overriding of configuration values while preserving the original 13 | * configuration. 14 | */ 15 | public class DebeziumConfigProducer { 16 | @Inject 17 | Config config; 18 | 19 | @Produces 20 | @ApplicationScoped 21 | @io.quarkus.test.Mock 22 | DebeziumConfig appConfig() { 23 | DebeziumConfig appConfig = config.unwrap(SmallRyeConfig.class).getConfigMapping(DebeziumConfig.class); 24 | DebeziumConfig appConfigSpy = Mockito.spy(appConfig); 25 | return appConfigSpy; 26 | } 27 | 28 | } -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/mapper/CustomMapper.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.mapper; 2 | 3 | import io.debezium.server.iceberg.GlobalConfig; 4 | import jakarta.enterprise.context.Dependent; 5 | import jakarta.inject.Inject; 6 | import jakarta.inject.Named; 7 | import org.apache.iceberg.catalog.Namespace; 8 | import org.apache.iceberg.catalog.TableIdentifier; 9 | 10 | @Named("custom-mapper") 11 | @Dependent 12 | public class CustomMapper implements IcebergTableMapper { 13 | @Inject 14 | GlobalConfig config; 15 | 16 | @Override 17 | public TableIdentifier mapDestination(String destination) { 18 | try { 19 | String[] parts = destination.split("\\."); 20 | String tableName = parts[parts.length - 1]; 21 | return TableIdentifier.of(Namespace.of(config.iceberg().namespace()), "custom_mapper_" + tableName); 22 | } catch (Exception e) { 23 | System.out.println("Failed to map:" + destination); 24 | throw new RuntimeException(e); 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Debezium Server Iceberg Consumer 2 | site_url: http://memiiso.github.io/debezium-server-iceberg 3 | repo_url: https://github.com/memiiso/debezium-server-iceberg 4 | theme: 5 | name: material 6 | features: 7 | # - navigation.instant 8 | - navigation.indexes 9 | - navigation.tabs 10 | # - navigation.expand 11 | - toc.integrate 12 | - content.code.copy 13 | - content.tabs.link 14 | nav: 15 | - Home: index.md 16 | - iceberg Consumer: iceberg.md 17 | - icebergevents Consumer: icebergevents.md 18 | - Python Runner: python-runner.md 19 | - Migration Guideline: migration.md 20 | - FAQ: faq.md 21 | - Contributing: contributing.md 22 | 23 | markdown_extensions: 24 | - pymdownx.highlight: 25 | anchor_linenums: true 26 | line_spans: __span 27 | pygments_lang_class: true 28 | - pymdownx.inlinehilite 29 | - pymdownx.snippets 30 | - pymdownx.superfences 31 | - admonition 32 | - pymdownx.details 33 | - abbr 34 | - pymdownx.snippets: 35 | base_path: [ !relative $config_dir ] 36 | check_paths: true -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM maven:3.9.9-eclipse-temurin-21 as builder 2 | ARG RELEASE_VERSION 3 | RUN apt-get -qq update && apt-get -qq install unzip 4 | COPY . /app 5 | WORKDIR /app 6 | RUN mvn clean package -Passembly -Dmaven.test.skip --quiet -Drevision=${RELEASE_VERSION} 7 | RUN unzip /app/debezium-server-iceberg-dist/target/debezium-server-iceberg-dist*.zip -d appdist 8 | RUN mkdir /app/appdist/debezium-server-iceberg/data && \ 9 | chown -R 185 /app/appdist/debezium-server-iceberg && \ 10 | chmod -R g+w,o+w /app/appdist/debezium-server-iceberg 11 | 12 | # Stage 2: Final image 13 | FROM registry.access.redhat.com/ubi8/openjdk-21 14 | 15 | ENV SERVER_HOME=/debezium 16 | 17 | USER root 18 | RUN microdnf clean all 19 | 20 | USER jboss 21 | 22 | COPY --from=builder /app/appdist/debezium-server-iceberg $SERVER_HOME 23 | 24 | # Set the working directory to the Debezium Server home directory 25 | WORKDIR $SERVER_HOME 26 | 27 | # 28 | # Expose the ports and set up volumes for the data, transaction log, and configuration 29 | # 30 | EXPOSE 8080 31 | VOLUME ["/debezium/config","/debezium/data"] 32 | 33 | CMD ["/debezium/run.sh"] -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/tableoperator/Operation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package io.debezium.server.iceberg.tableoperator; 20 | 21 | public enum Operation { 22 | INSERT, 23 | UPDATE, 24 | DELETE, 25 | READ 26 | } 27 | -------------------------------------------------------------------------------- /debezium-server-iceberg-dist/src/main/resources/distro/jmx/enable_jmx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # To enable JMX functionality, export the JMX_HOST and JMX_PORT environment variables. 3 | # Modify the jmxremote.access and jmxremote.password files accordingly. 4 | if [ -n "${JMX_HOST}" -a -n "${JMX_PORT}" ]; then 5 | export JAVA_OPTS="-Dcom.sun.management.jmxremote.ssl=false \ 6 | -Dcom.sun.management.jmxremote.port=${JMX_PORT} \ 7 | -Dcom.sun.management.jmxremote.rmi.port=${JMX_PORT} \ 8 | -Dcom.sun.management.jmxremote.local.only=false \ 9 | -Djava.rmi.server.hostname=${JMX_HOST} \ 10 | -Dcom.sun.management.jmxremote.verbose=true" 11 | 12 | if [ -f "jmx/jmxremote.access" -a -f "jmx/jmxremote.password" ]; then 13 | chmod 600 jmx/jmxremote.password 14 | export JAVA_OPTS="${JAVA_OPTS} -Dcom.sun.management.jmxremote.authenticate=true \ 15 | -Dcom.sun.management.jmxremote.access.file=jmx/jmxremote.access \ 16 | -Dcom.sun.management.jmxremote.password.file=jmx/jmxremote.password" 17 | else 18 | export JAVA_OPTS="${JAVA_OPTS} -Dcom.sun.management.jmxremote.authenticate=false" 19 | fi 20 | fi 21 | -------------------------------------------------------------------------------- /.run/package.run.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /.run/dependency_tree.run.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/converter/JsonEventConverterSchemaDataTest.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.converter; 2 | 3 | import org.junit.jupiter.api.Test; 4 | import org.junit.jupiter.api.condition.DisabledIfEnvironmentVariable; 5 | 6 | import java.util.Set; 7 | 8 | import static org.junit.jupiter.api.Assertions.assertEquals; 9 | 10 | @DisabledIfEnvironmentVariable(named = "DEBEZIUM_FORMAT_VALUE", matches = "connect") 11 | class JsonEventConverterSchemaDataTest { 12 | 13 | @Test 14 | void testIcebergSchemaConverterDataBehaviourAndCloning() { 15 | 16 | IcebergSchemaInfo test = new IcebergSchemaInfo(5); 17 | test.identifierFieldIds().add(3); 18 | assertEquals(6, test.nextFieldId().incrementAndGet()); 19 | assertEquals(Set.of(3), test.identifierFieldIds()); 20 | 21 | // test cloning and then changing nextFieldId is persisting 22 | IcebergSchemaInfo copy = test.copyPreservingMetadata(); 23 | assertEquals(6, test.nextFieldId().get()); 24 | copy.nextFieldId().incrementAndGet(); 25 | assertEquals(7, test.nextFieldId().get()); 26 | 27 | // test cloning and then changing identifier fields is persisting 28 | assertEquals(Set.of(3), copy.identifierFieldIds()); 29 | copy.identifierFieldIds().add(7); 30 | assertEquals(Set.of(3, 7), test.identifierFieldIds()); 31 | 32 | } 33 | 34 | } -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/BatchConfig.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg; 2 | 3 | import io.debezium.config.CommonConnectorConfig; 4 | import io.quarkus.runtime.annotations.ConfigRoot; 5 | import io.smallrye.config.ConfigMapping; 6 | import io.smallrye.config.WithDefault; 7 | import io.smallrye.config.WithName; 8 | 9 | @ConfigRoot 10 | @ConfigMapping 11 | public interface BatchConfig { 12 | @WithName("debezium.source.max.queue.size") 13 | @WithDefault(CommonConnectorConfig.DEFAULT_MAX_QUEUE_SIZE + "") 14 | int sourceMaxQueueSize(); 15 | 16 | @WithName("debezium.source.max.batch.size") 17 | @WithDefault(CommonConnectorConfig.DEFAULT_MAX_BATCH_SIZE + "") 18 | int sourceMaxBatchSize(); 19 | 20 | @WithName("debezium.sink.batch.batch-size-wait.max-wait-ms") 21 | @WithDefault("300000") 22 | int batchSizeWaitMaxWaitMs(); 23 | 24 | @WithName("debezium.sink.batch.batch-size-wait.wait-interval-ms") 25 | @WithDefault("10000") 26 | int batchSizeWaitWaitIntervalMs(); 27 | 28 | @WithName("debezium.sink.batch.batch-size-wait") 29 | @WithDefault("NoBatchSizeWait") 30 | String batchSizeWaitName(); 31 | 32 | @WithName("debezium.sink.batch.concurrent-uploads") 33 | @WithDefault("1") 34 | int concurrentUploads(); 35 | 36 | @WithName("debezium.sink.batch.concurrent-uploads.timeout-minutes") 37 | @WithDefault("60") 38 | int concurrentUploadsTimeoutMinutes(); 39 | 40 | 41 | } -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/testresources/CatalogJdbc.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg.testresources; 10 | 11 | import io.quarkus.test.common.QuarkusTestResourceLifecycleManager; 12 | 13 | import java.util.Map; 14 | import java.util.concurrent.ConcurrentHashMap; 15 | 16 | import org.testcontainers.containers.MySQLContainer; 17 | 18 | public class CatalogJdbc implements QuarkusTestResourceLifecycleManager { 19 | public static final MySQLContainer container = new MySQLContainer<>("mysql:8"); 20 | 21 | @Override 22 | public Map start() { 23 | container.start(); 24 | System.out.println("Jdbc Catalog started: " + container.getJdbcUrl()); 25 | 26 | Map config = new ConcurrentHashMap<>(); 27 | 28 | config.put("debezium.sink.iceberg.type", "jdbc"); 29 | config.put("debezium.sink.iceberg.uri", container.getJdbcUrl()); 30 | config.put("debezium.sink.iceberg.jdbc.user", container.getUsername()); 31 | config.put("debezium.sink.iceberg.jdbc.password", container.getPassword()); 32 | config.put("debezium.sink.iceberg.jdbc.schema-version", "V1"); 33 | 34 | return config; 35 | } 36 | 37 | @Override 38 | public void stop() { 39 | container.stop(); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/tableoperator/PartitionedAppendWriter.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.tableoperator; 2 | 3 | import org.apache.iceberg.FileFormat; 4 | import org.apache.iceberg.PartitionKey; 5 | import org.apache.iceberg.PartitionSpec; 6 | import org.apache.iceberg.Schema; 7 | import org.apache.iceberg.data.InternalRecordWrapper; 8 | import org.apache.iceberg.data.Record; 9 | import org.apache.iceberg.io.FileAppenderFactory; 10 | import org.apache.iceberg.io.FileIO; 11 | import org.apache.iceberg.io.OutputFileFactory; 12 | import org.apache.iceberg.io.PartitionedWriter; 13 | 14 | public class PartitionedAppendWriter extends PartitionedWriter { 15 | private final PartitionKey partitionKey; 16 | final InternalRecordWrapper wrapper; 17 | 18 | public PartitionedAppendWriter(PartitionSpec spec, FileFormat format, 19 | FileAppenderFactory appenderFactory, 20 | OutputFileFactory fileFactory, FileIO io, long targetFileSize, 21 | Schema schema) { 22 | super(spec, format, appenderFactory, fileFactory, io, targetFileSize); 23 | this.partitionKey = new PartitionKey(spec, schema); 24 | this.wrapper = new InternalRecordWrapper(schema.asStruct()); 25 | } 26 | 27 | @Override 28 | protected PartitionKey partition(Record row) { 29 | partitionKey.partition(wrapper.wrap(row)); 30 | return partitionKey; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/tableoperator/UnpartitionedDeltaWriterTest.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.tableoperator; 2 | 3 | import org.apache.iceberg.data.GenericRecord; 4 | import org.apache.iceberg.data.Record; 5 | import org.apache.iceberg.io.WriteResult; 6 | import org.junit.jupiter.api.Assertions; 7 | import org.junit.jupiter.api.Test; 8 | 9 | import java.io.IOException; 10 | 11 | class UnpartitionedDeltaWriterTest extends BaseWriterTest { 12 | 13 | @Test 14 | public void testUnpartitionedDeltaWriter() throws IOException { 15 | UnpartitionedDeltaWriter writer = new UnpartitionedDeltaWriter(table.spec(), format, appenderFactory, fileFactory, 16 | table.io(), 17 | Long.MAX_VALUE, table.schema(), identifierFieldIds, true); 18 | 19 | Record row = GenericRecord.create(SCHEMA); 20 | row.setField("id", "123"); 21 | row.setField("data", "hello world!"); 22 | row.setField("id2", "123"); 23 | row.setField("__op", "u"); 24 | 25 | writer.write(new RecordWrapper(row, Operation.UPDATE)); 26 | WriteResult result = writer.complete(); 27 | 28 | // in upsert mode, each write is a delete + append, so we'll have 1 data file and 1 delete file 29 | Assertions.assertEquals(result.dataFiles().length, 1); 30 | Assertions.assertEquals(result.dataFiles()[0].format(), format); 31 | Assertions.assertEquals(result.deleteFiles().length, 1); 32 | Assertions.assertEquals(result.deleteFiles()[0].format(), format); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/tableoperator/UnpartitionedDeltaWriter.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.tableoperator; 2 | 3 | import java.io.IOException; 4 | import java.util.Set; 5 | 6 | import org.apache.iceberg.FileFormat; 7 | import org.apache.iceberg.PartitionSpec; 8 | import org.apache.iceberg.Schema; 9 | import org.apache.iceberg.data.Record; 10 | import org.apache.iceberg.io.FileAppenderFactory; 11 | import org.apache.iceberg.io.FileIO; 12 | import org.apache.iceberg.io.OutputFileFactory; 13 | 14 | class UnpartitionedDeltaWriter extends BaseDeltaTaskWriter { 15 | private final RowDataDeltaWriter writer; 16 | 17 | UnpartitionedDeltaWriter(PartitionSpec spec, 18 | FileFormat format, 19 | FileAppenderFactory appenderFactory, 20 | OutputFileFactory fileFactory, 21 | FileIO io, 22 | long targetFileSize, 23 | Schema schema, 24 | Set identifierFieldIds, 25 | boolean keepDeletes) { 26 | super(spec, format, appenderFactory, fileFactory, io, targetFileSize, schema, identifierFieldIds, keepDeletes); 27 | this.writer = new RowDataDeltaWriter(null); 28 | } 29 | 30 | @Override 31 | RowDataDeltaWriter route(Record row) { 32 | return writer; 33 | } 34 | 35 | @Override 36 | public void close() throws IOException { 37 | writer.close(); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/mapper/DefaultIcebergTableMapper.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.mapper; 2 | 3 | import io.debezium.server.iceberg.GlobalConfig; 4 | import jakarta.enterprise.context.Dependent; 5 | import jakarta.inject.Inject; 6 | import jakarta.inject.Named; 7 | import org.apache.iceberg.catalog.Namespace; 8 | import org.apache.iceberg.catalog.TableIdentifier; 9 | 10 | @Named("default-mapper") 11 | @Dependent 12 | public class DefaultIcebergTableMapper implements IcebergTableMapper { 13 | @Inject 14 | GlobalConfig config; 15 | 16 | @Override 17 | public TableIdentifier mapDestination(String destination) { 18 | final String tableName = destination 19 | .replaceAll(config.iceberg().destinationRegexp().orElse(""), config.iceberg().destinationRegexpReplace().orElse("")) 20 | .replace(".", "_"); 21 | 22 | if (config.iceberg().destinationUppercaseTableNames()) { 23 | return TableIdentifier.of(Namespace.of(config.iceberg().namespace()), (config.iceberg().tablePrefix().orElse("") + tableName).toUpperCase()); 24 | } else if (config.iceberg().destinationLowercaseTableNames()) { 25 | return TableIdentifier.of(Namespace.of(config.iceberg().namespace()), (config.iceberg().tablePrefix().orElse("") + tableName).toLowerCase()); 26 | } else { 27 | return TableIdentifier.of(Namespace.of(config.iceberg().namespace()), config.iceberg().tablePrefix().orElse("") + tableName); 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /.run/clean,install.run.xml: -------------------------------------------------------------------------------- 1 | 8 | 9 | 10 | 12 | 13 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /docs/icebergevents.md: -------------------------------------------------------------------------------- 1 | # DEPRECATED 2 | 3 | Using the `iceberg` consumer with the following settings is recommended to achieve the same results: 4 | 5 | ```properties 6 | # Store nested data in variant fields 7 | debezium.sink.iceberg.nested-as-variant=true 8 | # Ensure event flattening is disabled (flattening is the default behavior) 9 | debezium.transforms=, 10 | ``` 11 | 12 | # `icebergevents` Consumer 13 | 14 | This consumer appends all Change Data Capture (CDC) events as JSON strings to a single Iceberg table. The table is 15 | partitioned by `event_destination` and `event_sink_timestamptz` for efficient data organization and query performance. 16 | 17 | ````properties 18 | debezium.sink.type=icebergevents 19 | debezium.sink.iceberg.catalog-name=default 20 | ```` 21 | 22 | Iceberg table definition: 23 | 24 | ```java 25 | static final String TABLE_NAME = "debezium_events"; 26 | static final Schema TABLE_SCHEMA = new Schema( 27 | required(1, "event_destination", Types.StringType.get()), 28 | optional(2, "event_key", Types.StringType.get()), 29 | optional(3, "event_value", Types.StringType.get()), 30 | optional(4, "event_sink_epoch_ms", Types.LongType.get()), 31 | optional(5, "event_sink_timestamptz", Types.TimestampType.withZone()) 32 | ); 33 | static final PartitionSpec TABLE_PARTITION = PartitionSpec.builderFor(TABLE_SCHEMA) 34 | .identity("event_destination") 35 | .hour("event_sink_timestamptz") 36 | .build(); 37 | static final SortOrder TABLE_SORT_ORDER = SortOrder.builderFor(TABLE_SCHEMA) 38 | .asc("event_sink_epoch_ms", NullOrder.NULLS_LAST) 39 | .build(); 40 | ``` 41 | -------------------------------------------------------------------------------- /docs/python-runner.md: -------------------------------------------------------------------------------- 1 | # Python Runner for Debezium Server 2 | 3 | It's possible to use python to run,operate debezium server 4 | 5 | For convenience this project additionally provides Python scripts to automate the startup, shutdown, and configuration 6 | of Debezium Server. 7 | Using Python, you can do various Debezium Server operation and take programmatic, dynamic, debezium configuration. 8 | example: 9 | 10 | ```commandline 11 | pip install git+https://github.com/memiiso/debezium-server-iceberg.git@master#subdirectory=python 12 | debezium 13 | # running with custom arguments 14 | debezium --debezium_dir=/my/debezium_server/dir/ --java_home=/my/java/homedir/ 15 | ``` 16 | 17 | ```python 18 | from debezium import Debezium 19 | 20 | d = Debezium(debezium_dir="/dbz/server/dir", java_home='/java/home/dir') 21 | java_args = [] 22 | java_args.append("-Dquarkus.log.file.enable=true") 23 | java_args.append("-Dquarkus.log.file.path=/logs/dbz_logfile.log") 24 | d.run(*java_args) 25 | ``` 26 | 27 | ```python 28 | import os 29 | from debezium import DebeziumRunAsyn 30 | 31 | java_args = [] 32 | # using python we can dynamically influence debezium 33 | # by chaning its config within python 34 | if my_custom_condition_check is True: 35 | # Option 1: set config using java arg 36 | java_args.append("-Dsnapshot.mode=always") 37 | # Option 2: set config using ENV variable 38 | os.environ["SNAPSHOT_MODE"] = "always" 39 | 40 | java_args.append("-Dquarkus.log.file.enable=true") 41 | java_args.append("-Dquarkus.log.file.path=/logs/dbz_logfile.log") 42 | d = DebeziumRunAsyn(debezium_dir="/dbz/server/dir", java_home='/java/home/dir', java_args=java_args) 43 | d.run() 44 | d.join() 45 | ``` -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build Java Project 2 | 3 | on: 4 | push: 5 | branches: [ master, '*.*' ] 6 | paths-ignore: 7 | - '.github/**' 8 | - '.idea/**' 9 | - '.run/**' 10 | pull_request: 11 | branches: [ master, '*.*' ] 12 | paths-ignore: 13 | - '.github/**' 14 | - '.idea/**' 15 | - '.run/**' 16 | 17 | env: 18 | SPARK_LOCAL_IP: 127.0.0.1 19 | 20 | jobs: 21 | build-java-project-json-format: 22 | name: Build-Test (Json Format) 23 | runs-on: ubuntu-latest 24 | env: 25 | DEBEZIUM_FORMAT_VALUE: json 26 | DEBEZIUM_FORMAT_KEY: json 27 | steps: 28 | - name: Checkout Repository 29 | uses: actions/checkout@v4 30 | - name: Set up Java 31 | uses: actions/setup-java@v5 32 | with: 33 | distribution: 'temurin' 34 | java-version: 21 35 | cache: 'maven' 36 | - name: Build with Maven 37 | run: mvn -B --no-transfer-progress package --file pom.xml -Dsurefire.skipAfterFailureCount=1 38 | 39 | 40 | build-java-project-connect-format: 41 | name: Build-Test (Connect Format) 42 | runs-on: ubuntu-latest 43 | needs: build-java-project-json-format 44 | env: 45 | DEBEZIUM_FORMAT_VALUE: connect 46 | DEBEZIUM_FORMAT_KEY: connect 47 | steps: 48 | - name: Checkout Repository 49 | uses: actions/checkout@v4 50 | 51 | - name: Set up Java 52 | uses: actions/setup-java@v5 53 | with: 54 | distribution: 'temurin' 55 | java-version: 21 56 | cache: 'maven' 57 | - name: Build with Maven (Connect Format) 58 | run: mvn -B --no-transfer-progress package --file pom.xml -Dsurefire.skipAfterFailureCount=1 59 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/testresources/TestUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg.testresources; 10 | 11 | import io.debezium.embedded.EmbeddedEngineChangeEvent; 12 | import io.debezium.engine.DebeziumEngine; 13 | 14 | import java.security.SecureRandom; 15 | 16 | import org.apache.kafka.connect.source.SourceRecord; 17 | 18 | public class TestUtil { 19 | static final String AB = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; 20 | static final SecureRandom rnd = new SecureRandom(); 21 | 22 | 23 | public static int randomInt(int low, int high) { 24 | return rnd.nextInt(high - low) + low; 25 | } 26 | 27 | public static String randomString(int len) { 28 | StringBuilder sb = new StringBuilder(len); 29 | for (int i = 0; i < len; i++) 30 | sb.append(AB.charAt(rnd.nextInt(AB.length()))); 31 | return sb.toString(); 32 | } 33 | 34 | public static DebeziumEngine.RecordCommitter getCommitter() { 35 | return new DebeziumEngine.RecordCommitter() { 36 | public synchronized void markProcessed(SourceRecord record) { 37 | } 38 | 39 | @Override 40 | public void markProcessed(Object record) { 41 | } 42 | 43 | public synchronized void markBatchFinished() { 44 | } 45 | 46 | @Override 47 | public void markProcessed(Object record, DebeziumEngine.Offsets sourceOffsets) { 48 | } 49 | 50 | @Override 51 | public DebeziumEngine.Offsets buildOffsets() { 52 | return null; 53 | } 54 | }; 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/resources/json/unwrap-with-schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": { 3 | "type": "struct", 4 | "fields": [ 5 | { 6 | "type": "int32", 7 | "optional": false, 8 | "field": "id" 9 | }, 10 | { 11 | "type": "int32", 12 | "optional": false, 13 | "name": "io.debezium.time.Date", 14 | "version": 1, 15 | "field": "order_date" 16 | }, 17 | { 18 | "type": "int32", 19 | "optional": false, 20 | "field": "purchaser" 21 | }, 22 | { 23 | "type": "int32", 24 | "optional": false, 25 | "field": "quantity" 26 | }, 27 | { 28 | "type": "int32", 29 | "optional": false, 30 | "field": "product_id" 31 | }, 32 | { 33 | "type": "string", 34 | "optional": true, 35 | "field": "__op" 36 | }, 37 | { 38 | "type": "string", 39 | "optional": true, 40 | "field": "__table" 41 | }, 42 | { 43 | "type": "int64", 44 | "optional": true, 45 | "field": "__lsn" 46 | }, 47 | { 48 | "type": "int64", 49 | "optional": true, 50 | "field": "__source_ts_ms" 51 | }, 52 | { 53 | "type": "string", 54 | "optional": true, 55 | "field": "__deleted" 56 | } 57 | ], 58 | "optional": false, 59 | "name": "testc.inventory.orders.Value" 60 | }, 61 | "payload": { 62 | "id": 10003, 63 | "order_date": 16850, 64 | "purchaser": 1002, 65 | "quantity": 2, 66 | "product_id": 106, 67 | "__op": "r", 68 | "__table": "orders", 69 | "__lsn": 33832960, 70 | "__source_ts_ms": 1596309876678, 71 | "__deleted": "false" 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/testresources/CatalogRest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg.testresources; 10 | 11 | import io.quarkus.test.common.QuarkusTestResourceLifecycleManager; 12 | import org.testcontainers.containers.GenericContainer; 13 | import org.testcontainers.containers.wait.strategy.Wait; 14 | import org.testcontainers.utility.DockerImageName; 15 | 16 | import java.util.Map; 17 | import java.util.concurrent.ConcurrentHashMap; 18 | 19 | public class CatalogRest implements QuarkusTestResourceLifecycleManager { 20 | public static final int REST_CATALOG_PORT = 8181; 21 | public static final String REST_CATALOG_IMAGE = "apache/iceberg-rest-fixture"; 22 | 23 | public static final GenericContainer container = new GenericContainer<>(DockerImageName.parse(REST_CATALOG_IMAGE)) 24 | .withExposedPorts(REST_CATALOG_PORT) 25 | .waitingFor(Wait.forLogMessage(".*Started Server.*", 1)); 26 | 27 | public static String getHostUrl() { 28 | return String.format("http://%s:%s", container.getHost(), container.getMappedPort(REST_CATALOG_PORT)); 29 | } 30 | 31 | @Override 32 | public Map start() { 33 | container.start(); 34 | System.out.println("Rest Catalog started: " + getHostUrl()); 35 | 36 | Map config = new ConcurrentHashMap<>(); 37 | 38 | config.put("debezium.sink.iceberg.type", "rest"); 39 | config.put("debezium.sink.iceberg.uri", CatalogRest.getHostUrl()); 40 | 41 | return config; 42 | } 43 | 44 | @Override 45 | public void stop() { 46 | container.stop(); 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/IcebergChangeConsumerJdbcCatalogTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg; 10 | 11 | import com.google.common.collect.Lists; 12 | import io.debezium.server.iceberg.testresources.CatalogJdbc; 13 | import io.debezium.server.iceberg.testresources.S3Minio; 14 | import io.debezium.server.iceberg.testresources.SourcePostgresqlDB; 15 | import io.quarkus.test.common.QuarkusTestResource; 16 | import io.quarkus.test.junit.QuarkusTest; 17 | import org.apache.iceberg.data.Record; 18 | import org.apache.iceberg.io.CloseableIterable; 19 | import org.awaitility.Awaitility; 20 | import org.junit.jupiter.api.Test; 21 | 22 | import java.time.Duration; 23 | 24 | /** 25 | * Integration test that verifies basic reading from PostgreSQL database and writing to iceberg destination. 26 | * 27 | * @author Ismail Simsek 28 | */ 29 | @QuarkusTest 30 | @QuarkusTestResource(value = S3Minio.class, restrictToAnnotatedClass = true) 31 | @QuarkusTestResource(value = SourcePostgresqlDB.class, restrictToAnnotatedClass = true) 32 | @QuarkusTestResource(value = CatalogJdbc.class, restrictToAnnotatedClass = true) 33 | public class IcebergChangeConsumerJdbcCatalogTest extends BaseTest { 34 | 35 | @Test 36 | public void testSimpleUpload() { 37 | Awaitility.await().atMost(Duration.ofSeconds(120)).until(() -> { 38 | try { 39 | CloseableIterable result = getTableDataV2("testc.inventory.customers"); 40 | return Lists.newArrayList(result).size() >= 3; 41 | } catch (Exception e) { 42 | return false; 43 | } 44 | }); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/resources/json/serde-with-array.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": { 3 | "type": "struct", 4 | "fields": [ 5 | { 6 | "type": "string", 7 | "optional": true, 8 | "field": "name" 9 | }, 10 | { 11 | "type": "array", 12 | "items": { 13 | "type": "int32", 14 | "optional": true 15 | }, 16 | "optional": true, 17 | "field": "pay_by_quarter" 18 | }, 19 | { 20 | "type": "array", 21 | "items": { 22 | "type": "string", 23 | "optional": true 24 | }, 25 | "optional": true, 26 | "field": "schedule" 27 | }, 28 | { 29 | "type": "string", 30 | "optional": true, 31 | "field": "__op" 32 | }, 33 | { 34 | "type": "string", 35 | "optional": true, 36 | "field": "__table" 37 | }, 38 | { 39 | "type": "int64", 40 | "optional": true, 41 | "field": "__source_ts_ms" 42 | }, 43 | { 44 | "type": "string", 45 | "optional": true, 46 | "field": "__db" 47 | }, 48 | { 49 | "type": "string", 50 | "optional": true, 51 | "field": "__deleted" 52 | } 53 | ], 54 | "optional": false, 55 | "name": "testc.inventory.array_data.Value" 56 | }, 57 | "payload": { 58 | "name": "Bill", 59 | "pay_by_quarter": [ 60 | 10000, 61 | 10001, 62 | 10002, 63 | 10003 64 | ], 65 | "schedule": [ 66 | "[Ljava.lang.String;@508917a0", 67 | "[Ljava.lang.String;@7412bd2" 68 | ], 69 | "__op": "c", 70 | "__table": "array_data", 71 | "__source_ts_ms": 1638128893618, 72 | "__db": "postgres", 73 | "__deleted": "false" 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/GlobalConfigTest.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg; 2 | 3 | import io.quarkus.test.junit.QuarkusTest; 4 | import io.quarkus.test.junit.QuarkusTestProfile; 5 | import io.quarkus.test.junit.TestProfile; 6 | import org.jboss.logging.Logger; 7 | import org.junit.jupiter.api.Assertions; 8 | import org.junit.jupiter.api.Test; 9 | 10 | import java.util.HashMap; 11 | import java.util.Map; 12 | 13 | import static io.debezium.server.iceberg.TestConfigSource.ICEBERG_CATALOG_NAME; 14 | import static io.debezium.server.iceberg.TestConfigSource.ICEBERG_WAREHOUSE_S3A; 15 | 16 | @QuarkusTest 17 | @TestProfile(GlobalConfigTest.TestProfile.class) 18 | public class GlobalConfigTest extends BaseTest { 19 | 20 | @Test 21 | void configLoadsCorrectly() { 22 | Assertions.assertEquals(ICEBERG_CATALOG_NAME, config.iceberg().catalogName()); 23 | // tests are running with false 24 | Assertions.assertEquals(false, config.iceberg().upsert()); 25 | Assertions.assertEquals(ICEBERG_WAREHOUSE_S3A, config.iceberg().warehouseLocation()); 26 | 27 | Assertions.assertTrue(config.iceberg().icebergConfigs().containsKey("warehouse")); 28 | Assertions.assertTrue(config.iceberg().icebergConfigs().containsValue(ICEBERG_WAREHOUSE_S3A)); 29 | Assertions.assertTrue(config.iceberg().icebergConfigs().containsKey("table-namespace")); 30 | Assertions.assertTrue(config.iceberg().icebergConfigs().containsKey("catalog-name")); 31 | Assertions.assertTrue(config.iceberg().icebergConfigs().containsValue(ICEBERG_CATALOG_NAME)); 32 | Assertions.assertEquals(Logger.Level.ERROR, config.quarkusLogLevel()); 33 | } 34 | 35 | public static class TestProfile implements QuarkusTestProfile { 36 | @Override 37 | public Map getConfigOverrides() { 38 | Map config = new HashMap<>(); 39 | config.put("quarkus.log.level", "ERROR"); 40 | return config; 41 | } 42 | } 43 | 44 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![License](http://img.shields.io/:license-apache%202.0-brightgreen.svg)](http://www.apache.org/licenses/LICENSE-2.0.html) 2 | ![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat) 3 | ![Create Release](https://github.com/memiiso/debezium-server-iceberg/actions/workflows/release.yml/badge.svg) 4 | 5 | # Debezium Iceberg Consumer 6 | 7 | This project implements Debezium Server Iceberg consumer 8 | see [Debezium Server](https://debezium.io/documentation/reference/operations/debezium-server.html). It enables real-time 9 | replication of Change Data Capture (CDC) events from any database to Iceberg tables. Without requiring Spark, Kafka or 10 | Streaming platform in between. 11 | 12 | See the [Documentation Page](https://memiiso.github.io/debezium-server-iceberg/) for more details. 13 | 14 | ![Debezium Iceberg](https://raw.githubusercontent.com/memiiso/debezium-server-iceberg/master/docs/images/debezium-iceberg-architecture.drawio.png) 15 | 16 | ## Installation 17 | - Requirements: 18 | - JDK 21 19 | - Maven 20 | ### Building from source code 21 | 22 | ```bash 23 | git clone https://github.com/memiiso/debezium-server-iceberg.git 24 | cd debezium-server-iceberg 25 | mvn -Passembly -Dmaven.test.skip package 26 | # unzip and run the application 27 | unzip debezium-server-iceberg-dist/target/debezium-server-iceberg-dist*.zip -d appdist 28 | cd appdist/debezium-server-iceberg 29 | mv config/application.properties.example config/application.properties 30 | bash run.sh 31 | ``` 32 | 33 | ## Contributing 34 | 35 | The Memiiso community welcomes anyone that wants to help out in any way, whether that includes reporting problems, 36 | helping with documentation, or contributing code changes to fix bugs, add tests, or implement new features. 37 | See [contributing document](docs/contributing.md) for details. 38 | 39 | ### Contributors 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: "Close Stale Issues and PRs" 2 | on: 3 | schedule: 4 | - cron: '0 0 * * *' 5 | 6 | permissions: 7 | # All other permissions are set to none 8 | issues: write 9 | pull-requests: write 10 | 11 | jobs: 12 | stale: 13 | if: github.repository_owner == 'memiiso' 14 | runs-on: ubuntu-22.04 15 | steps: 16 | - uses: actions/stale@v10.1.0 17 | with: 18 | # stale issues 19 | stale-issue-label: 'stale' 20 | exempt-issue-labels: 'not-stale' 21 | days-before-issue-stale: 180 22 | days-before-issue-close: 14 23 | stale-issue-message: > 24 | This issue has been automatically marked as stale because it has been open for 180 days 25 | with no activity. It will be closed in next 14 days if no further activity occurs. To 26 | permanently prevent this issue from being considered stale, add the label 'not-stale', 27 | but commenting on the issue is preferred when possible. 28 | close-issue-message: > 29 | This issue has been closed because it has not received any activity in the last 14 days 30 | since being marked as 'stale' 31 | # stale PRs 32 | stale-pr-label: 'stale' 33 | exempt-pr-labels: 'not-stale,security' 34 | stale-pr-message: 'This pull request has been marked as stale due to 30 days of inactivity. It will be closed in 1 week if no further activity occurs. If you think that’s incorrect or this pull request requires a review, please simply write any comment. If closed, you can revive the PR at any time. Thank you for your contributions.' 35 | close-pr-message: 'This pull request has been closed due to lack of activity. This is not a judgement on the merit of the PR in any way. It is just a way of keeping the PR queue manageable. If you think that is incorrect, or the pull request requires review, you can revive the PR at any time.' 36 | days-before-pr-stale: 30 37 | days-before-pr-close: 7 38 | ascending: true 39 | operations-per-run: 200 40 | -------------------------------------------------------------------------------- /debezium-server-iceberg-dist/src/main/resources/distro/config/metrics.yml: -------------------------------------------------------------------------------- 1 | startDelaySeconds: 0 2 | ssl: false 3 | lowercaseOutputName: false 4 | lowercaseOutputLabelNames: false 5 | rules: 6 | - pattern: "kafka.producer]+)><>([^:]+)" 7 | name: "kafka_producer_metrics_$2" 8 | type: GAUGE 9 | labels: 10 | client: "$1" 11 | - pattern: "kafka.producer]+), node-id=([^>]+)><>([^:]+)" 12 | name: "kafka_producer_node_metrics_$3" 13 | type: GAUGE 14 | labels: 15 | client: "$1" 16 | node: "$2" 17 | - pattern: "kafka.producer]+), topic=([^>]+)><>([^:]+)" 18 | name: "kafka_producer_topic_metrics_$3" 19 | type: GAUGE 20 | labels: 21 | client: "$1" 22 | topic: "$2" 23 | - pattern: "kafka.connect([^:]+):" 24 | name: "kafka_connect_worker_metrics_$1" 25 | type: GAUGE 26 | - pattern: "kafka.connect<>([^:]+)" 27 | name: "kafka_connect_metrics_$2" 28 | type: GAUGE 29 | labels: 30 | client: "$1" 31 | - pattern: "debezium.([^:]+)]+)><>RowsScanned" 32 | name: "debezium_metrics_RowsScanned" 33 | type: GAUGE 34 | labels: 35 | plugin: "$1" 36 | name: "$3" 37 | context: "$2" 38 | table: "$4" 39 | - pattern: "debezium.([^:]+)]+)>([^:]+)" 40 | name: "debezium_metrics_$6" 41 | type: GAUGE 42 | labels: 43 | plugin: "$1" 44 | name: "$2" 45 | task: "$3" 46 | context: "$4" 47 | database: "$5" 48 | - pattern: "debezium.([^:]+)]+)>([^:]+)" 49 | name: "debezium_metrics_$5" 50 | type: GAUGE 51 | labels: 52 | plugin: "$1" 53 | name: "$2" 54 | task: "$3" 55 | context: "$4" 56 | - pattern: "debezium.([^:]+)]+)>([^:]+)" 57 | name: "debezium_metrics_$4" 58 | type: GAUGE 59 | labels: 60 | plugin: "$1" 61 | name: "$3" 62 | context: "$2" 63 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/testresources/SourceMongoDB.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg.testresources; 10 | 11 | import io.quarkus.test.common.QuarkusTestResourceLifecycleManager; 12 | 13 | import java.time.Duration; 14 | import java.util.List; 15 | import java.util.Map; 16 | import java.util.concurrent.ConcurrentHashMap; 17 | 18 | import org.testcontainers.containers.GenericContainer; 19 | import org.testcontainers.containers.wait.strategy.Wait; 20 | import org.testcontainers.images.builder.ImageFromDockerfile; 21 | 22 | public class SourceMongoDB implements QuarkusTestResourceLifecycleManager { 23 | 24 | public static final int MONGODB_PORT = 27017; 25 | public static final GenericContainer container = new GenericContainer( 26 | new ImageFromDockerfile("debezium_mongodb", false) 27 | .withFileFromClasspath("Dockerfile", "mongodb/Dockerfile") 28 | .withFileFromClasspath("start-mongodb.sh", "mongodb/start-mongodb.sh")) 29 | 30 | .waitingFor(Wait.forLogMessage(".*Successfully initialized inventory database.*", 1)) 31 | .withStartupTimeout(Duration.ofSeconds(120L)); 32 | 33 | @Override 34 | public Map start() { 35 | container.setPortBindings(List.of(MONGODB_PORT+":"+MONGODB_PORT)); 36 | container.withExposedPorts(MONGODB_PORT).start(); 37 | 38 | Map params = new ConcurrentHashMap<>(); 39 | params.put("%mongodb.debezium.source.mongodb.connection.string", 40 | "mongodb://" + container.getHost() + ":" + container.getMappedPort(MONGODB_PORT) + "/?replicaSet=rs0" 41 | ); 42 | params.put("%mongodb.debezium.source.mongodb.authsource", "admin"); 43 | params.put("%mongodb.debezium.source.mongodb.user", "debezium"); 44 | params.put("%mongodb.debezium.source.mongodb.password", "dbz"); 45 | //params.put("%mongodb.debezium.source.mongodb.ssl.enabled", "false"); 46 | return params; 47 | } 48 | 49 | @Override 50 | public void stop() { 51 | if (container != null) { 52 | container.stop(); 53 | } 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/resources/json/serde-with-schema_geom.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": { 3 | "type": "struct", 4 | "fields": [ 5 | { 6 | "type": "int32", 7 | "optional": false, 8 | "default": 0, 9 | "field": "id" 10 | }, 11 | { 12 | "type": "struct", 13 | "fields": [ 14 | { 15 | "type": "string", 16 | "optional": false, 17 | "field": "wkb" 18 | }, 19 | { 20 | "type": "int32", 21 | "optional": true, 22 | "field": "srid" 23 | } 24 | ], 25 | "optional": true, 26 | "name": "io.debezium.data.geometry.Geometry", 27 | "version": 1, 28 | "doc": "Geometry", 29 | "field": "g" 30 | }, 31 | { 32 | "type": "struct", 33 | "fields": [ 34 | { 35 | "type": "string", 36 | "optional": false, 37 | "field": "wkb" 38 | }, 39 | { 40 | "type": "int32", 41 | "optional": true, 42 | "field": "srid" 43 | } 44 | ], 45 | "optional": true, 46 | "name": "io.debezium.data.geometry.Geometry", 47 | "version": 1, 48 | "doc": "Geometry", 49 | "field": "h" 50 | }, 51 | { 52 | "type": "string", 53 | "optional": true, 54 | "field": "__op" 55 | }, 56 | { 57 | "type": "string", 58 | "optional": true, 59 | "field": "__table" 60 | }, 61 | { 62 | "type": "int64", 63 | "optional": true, 64 | "field": "__source_ts_ms" 65 | }, 66 | { 67 | "type": "string", 68 | "optional": true, 69 | "field": "__db" 70 | }, 71 | { 72 | "type": "string", 73 | "optional": true, 74 | "field": "__deleted" 75 | } 76 | ], 77 | "optional": false, 78 | "name": "testc.inventory.geom.Value" 79 | }, 80 | "payload": { 81 | "id": 1, 82 | "g": { 83 | "wkb": "AQEAAAAAAAAAAADwPwAAAAAAAPA/", 84 | "srid": 123 85 | }, 86 | "h": null, 87 | "__op": "r", 88 | "__table": "geom", 89 | "__source_ts_ms": 1634844424986, 90 | "__db": "postgres", 91 | "__deleted": "false" 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/converter/AbstractVariantObject.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package io.debezium.server.iceberg.converter; 20 | 21 | import org.apache.iceberg.variants.ShreddedObject; 22 | import org.apache.iceberg.variants.VariantMetadata; 23 | import org.apache.iceberg.variants.VariantObject; 24 | import org.apache.iceberg.variants.VariantValue; 25 | import org.apache.iceberg.variants.Variants; 26 | 27 | import java.nio.ByteBuffer; 28 | 29 | public abstract class AbstractVariantObject implements VariantObject { 30 | 31 | protected final ShreddedObject shreddedObject; 32 | protected final VariantMetadata metadata; 33 | 34 | protected AbstractVariantObject(VariantMetadata metadata) { 35 | this.metadata = metadata; 36 | this.shreddedObject = Variants.object(this.metadata); 37 | } 38 | 39 | public VariantMetadata metadata() { 40 | return this.metadata; 41 | } 42 | 43 | @Override 44 | public VariantValue get(String name) { 45 | return shreddedObject.get(name); 46 | } 47 | 48 | @Override 49 | public Iterable fieldNames() { 50 | return shreddedObject.fieldNames(); 51 | } 52 | 53 | @Override 54 | public int numFields() { 55 | return shreddedObject.numFields(); 56 | } 57 | 58 | @Override 59 | public int sizeInBytes() { 60 | return shreddedObject.sizeInBytes(); 61 | } 62 | 63 | @Override 64 | public int writeTo(ByteBuffer buffer, int offset) { 65 | return shreddedObject.writeTo(buffer, offset); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/testresources/CatalogNessie.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.testresources; 2 | 3 | import io.quarkus.test.common.QuarkusTestResourceLifecycleManager; 4 | import org.testcontainers.containers.GenericContainer; 5 | import org.testcontainers.containers.wait.strategy.HttpWaitStrategy; 6 | import org.testcontainers.utility.DockerImageName; 7 | 8 | import java.time.Duration; 9 | import java.util.Map; 10 | import java.util.concurrent.ConcurrentHashMap; 11 | import java.util.concurrent.TimeUnit; 12 | 13 | public class CatalogNessie implements QuarkusTestResourceLifecycleManager { 14 | private static final String NESSIE_IMAGE = "projectnessie/nessie:latest"; 15 | private static final int NESSIE_PORT = 19120; 16 | private GenericContainer nessieContainer = new GenericContainer<>(DockerImageName.parse(NESSIE_IMAGE)) 17 | .withNetworkAliases("nessie") 18 | .withEnv("QUARKUS_PROFILE", "prod") 19 | .withEnv("QUARKUS_HTTP_PORT", String.valueOf(NESSIE_PORT)) 20 | .withEnv("QUARKUS_LOG_LEVEL", "INFO") 21 | .withExposedPorts(NESSIE_PORT) 22 | .waitingFor(new HttpWaitStrategy() 23 | .forPort(NESSIE_PORT) 24 | .forPath("/q/health") 25 | .withStartupTimeout(Duration.ofSeconds(120))); 26 | 27 | @Override 28 | public Map start() { 29 | long startTime = System.nanoTime(); // Get time before start 30 | nessieContainer.start(); 31 | long endTime = System.nanoTime(); // Get time after start 32 | double durationSeconds = TimeUnit.NANOSECONDS.toMillis(endTime - startTime) / 1000.0; // Convert nanoseconds to seconds 33 | System.out.println("Nessie started: " + getNessieUri() + " duration: " + durationSeconds); 34 | 35 | Map config = new ConcurrentHashMap<>(); 36 | 37 | config.put("debezium.sink.iceberg.type", "nessie"); 38 | config.put("debezium.sink.iceberg.uri", getNessieUri() + "/api/v2"); 39 | config.put("debezium.sink.iceberg.ref", "main"); 40 | return config; 41 | } 42 | 43 | @Override 44 | public void stop() { 45 | if (nessieContainer != null) { 46 | nessieContainer.stop(); 47 | } 48 | } 49 | 50 | public String getNessieUri() { 51 | if (nessieContainer != null && nessieContainer.isRunning()) { 52 | return "http://" + nessieContainer.getHost() + ":" + nessieContainer.getMappedPort(NESSIE_PORT); 53 | } 54 | return null; 55 | } 56 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .gemini 2 | .flattened-pom.xml 3 | 4 | deploy.sh 5 | .ipynb_checkpoints 6 | 7 | # Debezium gitignore 8 | 9 | activemq-data/ 10 | .idea/ 11 | *.iml 12 | *.ipr 13 | *.iws 14 | .metadata/ 15 | .recommenders/ 16 | .classpath 17 | .project 18 | .cache 19 | .settings/ 20 | .factorypath 21 | .checkstyle 22 | .gradle/ 23 | .vscode/ 24 | build/ 25 | deploy/ 26 | target/ 27 | mods/ 28 | *.swp 29 | epom 30 | log 31 | npm-debug.log 32 | .DS_Store 33 | phantomjsdriver.log 34 | 35 | generated-sources/ 36 | 37 | /state/ 38 | bin/ 39 | 40 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 41 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 42 | 43 | # User-specific stuff 44 | .idea/**/workspace.xml 45 | .idea/**/tasks.xml 46 | .idea/**/usage.statistics.xml 47 | .idea/**/dictionaries 48 | .idea/**/shelf 49 | 50 | # Generated files 51 | .idea/**/contentModel.xml 52 | 53 | # Sensitive or high-churn files 54 | .idea/**/dataSources/ 55 | .idea/**/dataSources.ids 56 | .idea/**/dataSources.local.xml 57 | .idea/**/sqlDataSources.xml 58 | .idea/**/dynamic.xml 59 | .idea/**/uiDesigner.xml 60 | .idea/**/dbnavigator.xml 61 | 62 | # Gradle 63 | .idea/**/gradle.xml 64 | .idea/**/libraries 65 | 66 | # Gradle and Maven with auto-import 67 | # When using Gradle or Maven with auto-import, you should exclude module files, 68 | # since they will be recreated, and may cause churn. Uncomment if using 69 | # auto-import. 70 | # .idea/artifacts 71 | # .idea/compiler.xml 72 | # .idea/jarRepositories.xml 73 | # .idea/modules.xml 74 | # .idea/*.iml 75 | # .idea/modules 76 | # *.iml 77 | # *.ipr 78 | 79 | # CMake 80 | cmake-build-*/ 81 | 82 | # Mongo Explorer plugin 83 | .idea/**/mongoSettings.xml 84 | 85 | # File-based project format 86 | *.iws 87 | 88 | # IntelliJ 89 | out/ 90 | 91 | # mpeltonen/sbt-idea plugin 92 | .idea_modules/ 93 | 94 | # JIRA plugin 95 | atlassian-ide-plugin.xml 96 | 97 | # Cursive Clojure plugin 98 | .idea/replstate.xml 99 | 100 | # Crashlytics plugin (for Android Studio and IntelliJ) 101 | com_crashlytics_export_strings.xml 102 | crashlytics.properties 103 | crashlytics-build.properties 104 | fabric.properties 105 | 106 | # Editor-based Rest Client 107 | .idea/httpRequests 108 | 109 | # Android studio 3.1+ serialized cache file 110 | .idea/caches/build_file_checksums.ser 111 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/tableoperator/PartitionedDeltaWriter.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.tableoperator; 2 | 3 | import java.io.IOException; 4 | import java.io.UncheckedIOException; 5 | import java.util.Map; 6 | import java.util.Set; 7 | 8 | import org.apache.iceberg.FileFormat; 9 | import org.apache.iceberg.PartitionKey; 10 | import org.apache.iceberg.PartitionSpec; 11 | import org.apache.iceberg.Schema; 12 | import org.apache.iceberg.data.Record; 13 | import org.apache.iceberg.io.FileAppenderFactory; 14 | import org.apache.iceberg.io.FileIO; 15 | import org.apache.iceberg.io.OutputFileFactory; 16 | import com.google.common.collect.Maps; 17 | import org.apache.iceberg.util.Tasks; 18 | 19 | class PartitionedDeltaWriter extends BaseDeltaTaskWriter { 20 | 21 | private final PartitionKey partitionKey; 22 | 23 | private final Map writers = Maps.newHashMap(); 24 | 25 | PartitionedDeltaWriter(PartitionSpec spec, 26 | FileFormat format, 27 | FileAppenderFactory appenderFactory, 28 | OutputFileFactory fileFactory, 29 | FileIO io, 30 | long targetFileSize, 31 | Schema schema, 32 | Set identifierFieldIds, 33 | boolean keepDeletes) { 34 | super(spec, format, appenderFactory, fileFactory, io, targetFileSize, schema, identifierFieldIds, keepDeletes); 35 | this.partitionKey = new PartitionKey(spec, schema); 36 | } 37 | 38 | @Override 39 | RowDataDeltaWriter route(Record row) { 40 | partitionKey.partition(wrapper().wrap(row)); 41 | 42 | RowDataDeltaWriter writer = writers.get(partitionKey); 43 | if (writer == null) { 44 | // NOTICE: we need to copy a new partition key here, in case of messing up the keys in writers. 45 | PartitionKey copiedKey = partitionKey.copy(); 46 | writer = new RowDataDeltaWriter(copiedKey); 47 | writers.put(copiedKey, writer); 48 | } 49 | 50 | return writer; 51 | } 52 | 53 | @Override 54 | public void close() { 55 | try { 56 | Tasks.foreach(writers.values()) 57 | .throwFailureWhenFinished() 58 | .noRetry() 59 | .run(RowDataDeltaWriter::close, IOException.class); 60 | 61 | writers.clear(); 62 | } catch (IOException e) { 63 | throw new UncheckedIOException("Failed to close equality delta writer", e); 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/IcebergChangeConsumerRestCatalogTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg; 10 | 11 | import com.google.common.collect.Lists; 12 | import io.debezium.server.iceberg.testresources.CatalogRest; 13 | import io.debezium.server.iceberg.testresources.S3Minio; 14 | import io.debezium.server.iceberg.testresources.SourcePostgresqlDB; 15 | import io.quarkus.test.common.QuarkusTestResource; 16 | import io.quarkus.test.junit.QuarkusTest; 17 | import org.apache.iceberg.catalog.Namespace; 18 | import org.apache.iceberg.catalog.TableIdentifier; 19 | import org.apache.iceberg.data.Record; 20 | import org.apache.iceberg.io.CloseableIterable; 21 | import org.awaitility.Awaitility; 22 | import org.junit.jupiter.api.Assertions; 23 | import org.junit.jupiter.api.Test; 24 | import org.junit.jupiter.api.condition.DisabledIfEnvironmentVariable; 25 | 26 | import java.time.Duration; 27 | import java.util.List; 28 | 29 | /** 30 | * Integration test that verifies basic reading from PostgreSQL database and writing to iceberg destination. 31 | * 32 | * @author Ismail Simsek 33 | */ 34 | @QuarkusTest 35 | @QuarkusTestResource(value = CatalogRest.class, restrictToAnnotatedClass = true) 36 | @QuarkusTestResource(value = S3Minio.class, restrictToAnnotatedClass = true) 37 | @QuarkusTestResource(value = SourcePostgresqlDB.class, restrictToAnnotatedClass = true) 38 | @DisabledIfEnvironmentVariable(named = "GITHUB_ACTIONS", matches = "true") 39 | public class IcebergChangeConsumerRestCatalogTest extends BaseTest { 40 | 41 | @Test 42 | public void testSimpleUpload() { 43 | Awaitility.await().atMost(Duration.ofSeconds(120)).until(() -> { 44 | try { 45 | CloseableIterable result = getTableDataV2("testc.inventory.customers"); 46 | return Lists.newArrayList(result).size() >= 3; 47 | } catch (Exception e) { 48 | return false; 49 | } 50 | }); 51 | 52 | List tables = consumer.icebergCatalog.listTables(Namespace.of(consumer.config.iceberg().namespace())); 53 | Assertions.assertTrue(tables.contains(TableIdentifier.of(Namespace.of(consumer.config.iceberg().namespace()), "debezium_offset_storage_table"))); 54 | Assertions.assertTrue(tables.contains(TableIdentifier.of(Namespace.of(consumer.config.iceberg().namespace()), "debeziumcdc_testc_inventory_customers"))); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/storage/BaseIcebergStorageConfig.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.storage; 2 | 3 | import com.google.common.collect.Maps; 4 | import io.debezium.config.Configuration; 5 | import io.debezium.server.iceberg.IcebergUtil; 6 | import org.apache.iceberg.CatalogUtil; 7 | import org.apache.iceberg.catalog.Catalog; 8 | import org.apache.iceberg.catalog.Namespace; 9 | import org.apache.iceberg.catalog.TableIdentifier; 10 | import org.eclipse.microprofile.config.ConfigProvider; 11 | 12 | import java.util.Map; 13 | import java.util.Properties; 14 | 15 | 16 | public abstract class BaseIcebergStorageConfig { 17 | private static final String PROP_SINK_PREFIX = "debezium.sink."; 18 | public Properties config = new Properties(); 19 | 20 | public BaseIcebergStorageConfig(Configuration config, String configuration_field_prefix) { 21 | Configuration confIcebergSubset = config.subset(configuration_field_prefix + "iceberg.", true); 22 | confIcebergSubset.forEach(this.config::put); 23 | 24 | // debezium is doing config filtering before passing it down to this class! 25 | // so we are taking additional config using ConfigProvider with this we take full iceberg config 26 | Map icebergConf = IcebergUtil.getConfigSubset(ConfigProvider.getConfig(), PROP_SINK_PREFIX + "iceberg."); 27 | icebergConf.forEach(this.config::putIfAbsent); 28 | } 29 | 30 | public String catalogName() { 31 | return this.config.getProperty("catalog-name", "default"); 32 | } 33 | 34 | public String tableNamespace() { 35 | return this.config.getProperty("table-namespace", "default"); 36 | } 37 | 38 | abstract public String tableName(); 39 | 40 | public org.apache.hadoop.conf.Configuration hadoopConfig() { 41 | final org.apache.hadoop.conf.Configuration hadoopConfig = new org.apache.hadoop.conf.Configuration(); 42 | config.forEach((key, value) -> hadoopConfig.set((String) key, (String) value)); 43 | return hadoopConfig; 44 | } 45 | 46 | public Map icebergProperties() { 47 | return Maps.fromProperties(config); 48 | } 49 | 50 | public Catalog icebergCatalog() { 51 | return CatalogUtil.buildIcebergCatalog(this.catalogName(), 52 | this.icebergProperties(), this.hadoopConfig()); 53 | } 54 | 55 | public String tableFullName() { 56 | return String.format("%s.%s", this.tableNamespace(), this.tableName()); 57 | } 58 | 59 | public TableIdentifier tableIdentifier() { 60 | return TableIdentifier.of(Namespace.of(this.tableNamespace()), this.tableName()); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/tableoperator/BaseWriterTest.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.tableoperator; 2 | 3 | import io.debezium.server.iceberg.IcebergUtil; 4 | import org.apache.iceberg.*; 5 | import org.apache.iceberg.data.GenericAppenderFactory; 6 | import org.apache.iceberg.encryption.PlaintextEncryptionManager; 7 | import org.apache.iceberg.inmemory.InMemoryFileIO; 8 | import org.apache.iceberg.io.OutputFileFactory; 9 | import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; 10 | import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; 11 | import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; 12 | import org.apache.iceberg.types.Types; 13 | import org.junit.jupiter.api.BeforeEach; 14 | 15 | import java.util.Set; 16 | 17 | import static org.mockito.Mockito.mock; 18 | import static org.mockito.Mockito.when; 19 | 20 | public class BaseWriterTest { 21 | 22 | protected InMemoryFileIO fileIO; 23 | protected Table table; 24 | FileFormat format; 25 | GenericAppenderFactory appenderFactory; 26 | OutputFileFactory fileFactory; 27 | Set identifierFieldIds; 28 | 29 | protected static final Schema SCHEMA = 30 | new Schema( 31 | ImmutableList.of( 32 | Types.NestedField.required(1, "id", Types.StringType.get()), 33 | Types.NestedField.required(2, "data", Types.StringType.get()), 34 | Types.NestedField.required(3, "id2", Types.StringType.get()), 35 | Types.NestedField.required(4, "__op", Types.StringType.get()) 36 | ), 37 | ImmutableSet.of(1, 3)); 38 | 39 | protected static final PartitionSpec SPEC = 40 | PartitionSpec.builderFor(SCHEMA).identity("data").build(); 41 | 42 | @BeforeEach 43 | public void before() { 44 | fileIO = new InMemoryFileIO(); 45 | 46 | table = mock(Table.class); 47 | when(table.schema()).thenReturn(SCHEMA); 48 | when(table.spec()).thenReturn(PartitionSpec.unpartitioned()); 49 | when(table.io()).thenReturn(fileIO); 50 | when(table.locationProvider()) 51 | .thenReturn(LocationProviders.locationsFor("file", ImmutableMap.of())); 52 | when(table.encryption()).thenReturn(PlaintextEncryptionManager.instance()); 53 | when(table.properties()).thenReturn(ImmutableMap.of()); 54 | 55 | format = IcebergUtil.getTableFileFormat(table); 56 | appenderFactory = IcebergUtil.getTableAppender(table); 57 | fileFactory = IcebergUtil.getTableOutputFileFactory(table, format); 58 | identifierFieldIds = table.schema().identifierFieldIds(); 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/tableoperator/RecordWrapper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package io.debezium.server.iceberg.tableoperator; 20 | 21 | import org.apache.iceberg.data.Record; 22 | import org.apache.iceberg.types.Types.StructType; 23 | 24 | import java.util.Map; 25 | 26 | public class RecordWrapper implements Record { 27 | 28 | private final Record delegate; 29 | private final Operation op; 30 | 31 | public RecordWrapper(Record delegate, Operation op) { 32 | this.delegate = delegate; 33 | this.op = op; 34 | } 35 | 36 | public Operation op() { 37 | return op; 38 | } 39 | 40 | @Override 41 | public StructType struct() { 42 | return delegate.struct(); 43 | } 44 | 45 | @Override 46 | public Object getField(String name) { 47 | return delegate.getField(name); 48 | } 49 | 50 | @Override 51 | public void setField(String name, Object value) { 52 | delegate.setField(name, value); 53 | } 54 | 55 | @Override 56 | public Object get(int pos) { 57 | return delegate.get(pos); 58 | } 59 | 60 | @Override 61 | public Record copy() { 62 | return new RecordWrapper(delegate.copy(), op); 63 | } 64 | 65 | @Override 66 | public Record copy(Map overwriteValues) { 67 | return new RecordWrapper(delegate.copy(overwriteValues), op); 68 | } 69 | 70 | @Override 71 | public int size() { 72 | return delegate.size(); 73 | } 74 | 75 | @Override 76 | public T get(int pos, Class javaClass) { 77 | return delegate.get(pos, javaClass); 78 | } 79 | 80 | @Override 81 | public void set(int pos, T value) { 82 | delegate.set(pos, value); 83 | } 84 | 85 | @Override 86 | public String toString() { 87 | return delegate.toString(); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/mapper/CustomMapperTest.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.mapper; 2 | 3 | import io.debezium.server.iceberg.BaseSparkTest; 4 | import io.debezium.server.iceberg.testresources.CatalogJdbc; 5 | import io.debezium.server.iceberg.testresources.S3Minio; 6 | import io.debezium.server.iceberg.testresources.SourcePostgresqlDB; 7 | import io.quarkus.test.common.QuarkusTestResource; 8 | import io.quarkus.test.junit.QuarkusTest; 9 | import io.quarkus.test.junit.QuarkusTestProfile; 10 | import io.quarkus.test.junit.TestProfile; 11 | import org.awaitility.Awaitility; 12 | import org.junit.jupiter.api.Assertions; 13 | import org.junit.jupiter.api.Test; 14 | 15 | import java.time.Duration; 16 | import java.util.HashMap; 17 | import java.util.Map; 18 | 19 | import static io.debezium.server.iceberg.TestConfigSource.ICEBERG_CATALOG_TABLE_NAMESPACE; 20 | import static org.junit.jupiter.api.Assertions.assertEquals; 21 | 22 | @QuarkusTest 23 | @QuarkusTestResource(value = S3Minio.class, restrictToAnnotatedClass = true) 24 | @QuarkusTestResource(value = SourcePostgresqlDB.class, restrictToAnnotatedClass = true) 25 | @QuarkusTestResource(value = CatalogJdbc.class, restrictToAnnotatedClass = true) 26 | @TestProfile(CustomMapperTest.TestProfile.class) 27 | public class CustomMapperTest extends BaseSparkTest { 28 | 29 | @Test 30 | public void testCustomMapper() throws Exception { 31 | assertEquals(sinkType, "iceberg"); 32 | String sql = """ 33 | DROP TABLE IF EXISTS inventory.sample; 34 | CREATE TABLE IF NOT EXISTS inventory.sample (id INTEGER, val INTEGER); 35 | """; 36 | SourcePostgresqlDB.runSQL(sql); 37 | SourcePostgresqlDB.runSQL("INSERT INTO inventory.sample (id, val) VALUES (1, 123)"); 38 | Awaitility.await().atMost(Duration.ofSeconds(320)).until(() -> { 39 | try { 40 | var df = spark.newSession().table(ICEBERG_CATALOG_TABLE_NAMESPACE + ".custom_mapper_sample"); 41 | Assertions.assertEquals(1, df.count()); 42 | 43 | return true; 44 | } catch (Exception e) { 45 | e.printStackTrace(); 46 | return false; 47 | } 48 | }); 49 | } 50 | 51 | public static class TestProfile implements QuarkusTestProfile { 52 | @Override 53 | public Map getConfigOverrides() { 54 | Map config = new HashMap<>(); 55 | config.put("debezium.sink.iceberg.table-mapper", "custom-mapper"); 56 | return config; 57 | } 58 | } 59 | } -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/IcebergChangeConsumerConnectTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg; 10 | 11 | import io.debezium.server.iceberg.testresources.CatalogNessie; 12 | import io.debezium.server.iceberg.testresources.S3Minio; 13 | import io.debezium.server.iceberg.testresources.SourcePostgresqlDB; 14 | import io.quarkus.test.common.QuarkusTestResource; 15 | import io.quarkus.test.junit.QuarkusTest; 16 | import io.quarkus.test.junit.QuarkusTestProfile; 17 | import io.quarkus.test.junit.TestProfile; 18 | import org.apache.spark.sql.Dataset; 19 | import org.apache.spark.sql.Row; 20 | import org.awaitility.Awaitility; 21 | import org.junit.jupiter.api.Test; 22 | import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; 23 | 24 | import java.time.Duration; 25 | import java.util.HashMap; 26 | import java.util.Map; 27 | 28 | import static org.junit.jupiter.api.Assertions.assertEquals; 29 | 30 | /** 31 | * Integration test that verifies basic reading from PostgreSQL database and writing to iceberg destination. 32 | * 33 | * @author Ismail Simsek 34 | */ 35 | @QuarkusTest 36 | @QuarkusTestResource(value = S3Minio.class, restrictToAnnotatedClass = true) 37 | @QuarkusTestResource(value = SourcePostgresqlDB.class, restrictToAnnotatedClass = true) 38 | @QuarkusTestResource(value = CatalogNessie.class, restrictToAnnotatedClass = true) 39 | @TestProfile(IcebergChangeConsumerConnectTest.TestProfile.class) 40 | @EnabledIfEnvironmentVariable(named = "DEBEZIUM_FORMAT_VALUE", matches = "connect") 41 | public class IcebergChangeConsumerConnectTest extends BaseSparkTest { 42 | 43 | @Test 44 | public void testSimpleUpload() { 45 | assertEquals("connect", config.debezium().keyValueChangeEventFormat()); 46 | 47 | Awaitility.await().atMost(Duration.ofSeconds(120)).until(() -> { 48 | try { 49 | Dataset ds = getTableData("testc.inventory.customers"); 50 | ds.show(false); 51 | return ds.count() >= 3; 52 | } catch (Exception e) { 53 | e.printStackTrace(); 54 | return false; 55 | } 56 | }); 57 | } 58 | 59 | public static class TestProfile implements QuarkusTestProfile { 60 | @Override 61 | public Map getConfigOverrides() { 62 | Map config = new HashMap<>(); 63 | config.put("debezium.format.value", "connect"); 64 | config.put("debezium.format.key", "connect"); 65 | return config; 66 | } 67 | } 68 | 69 | } 70 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/batchsizewait/MaxBatchSizeWait.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg.batchsizewait; 10 | 11 | import io.debezium.DebeziumException; 12 | import io.debezium.server.DebeziumMetrics; 13 | import io.debezium.server.iceberg.BatchConfig; 14 | import jakarta.enterprise.context.Dependent; 15 | import jakarta.inject.Inject; 16 | import jakarta.inject.Named; 17 | import org.slf4j.Logger; 18 | import org.slf4j.LoggerFactory; 19 | 20 | /** 21 | * Optimizes batch size around 85%-90% of max,batch.size using dynamically calculated sleep(ms) 22 | * 23 | * @author Ismail Simsek 24 | */ 25 | @Dependent 26 | @Named("MaxBatchSizeWait") 27 | public class MaxBatchSizeWait implements BatchSizeWait { 28 | protected static final Logger LOGGER = LoggerFactory.getLogger(MaxBatchSizeWait.class); 29 | 30 | @Inject 31 | BatchConfig config; 32 | @Inject 33 | DebeziumMetrics dbzMetrics; 34 | 35 | @Override 36 | public void initizalize() throws DebeziumException { 37 | assert config.batchSizeWaitWaitIntervalMs() < config.batchSizeWaitMaxWaitMs() : "`wait-interval-ms` cannot be bigger than `max-wait-ms`"; 38 | } 39 | 40 | @Override 41 | public void waitMs(Integer numRecordsProcessed, Integer processingTimeMs) throws InterruptedException { 42 | 43 | // don't wait if snapshot process is running 44 | if (dbzMetrics.snapshotRunning()) { 45 | return; 46 | } 47 | 48 | LOGGER.debug("Processed {}, QueueCurrentSize:{}, QueueTotalCapacity:{}, SecondsBehindSource:{}, SnapshotCompleted:{}", 49 | numRecordsProcessed, 50 | dbzMetrics.streamingQueueCurrentSize(), 51 | config.sourceMaxQueueSize(), 52 | (int) (dbzMetrics.streamingMilliSecondsBehindSource() / 1000), 53 | dbzMetrics.snapshotCompleted() 54 | ); 55 | 56 | int totalWaitMs = 0; 57 | while (totalWaitMs < config.batchSizeWaitMaxWaitMs() && dbzMetrics.streamingQueueCurrentSize() < config.sourceMaxBatchSize()) { 58 | totalWaitMs += config.batchSizeWaitWaitIntervalMs(); 59 | LOGGER.debug("Sleeping {} Milliseconds, QueueCurrentSize:{} < maxBatchSize:{}", 60 | config.batchSizeWaitWaitIntervalMs(), dbzMetrics.streamingQueueCurrentSize(), config.sourceMaxBatchSize()); 61 | 62 | Thread.sleep(config.batchSizeWaitWaitIntervalMs()); 63 | } 64 | 65 | LOGGER.debug("Total wait {} Milliseconds, QueueCurrentSize:{} < maxBatchSize:{}", 66 | totalWaitMs, dbzMetrics.streamingQueueCurrentSize(), config.sourceMaxBatchSize()); 67 | 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- 1 | # Frequently Asked Questions (FAQ) 2 | 3 | ???+ question "How does the connector handle deletes, and what are the performance implications?" 4 | 5 | This connector writes data to Iceberg tables using the V2 specification. To optimize write performance, delete events are recorded in delete files, avoiding costly data file rewrites. While this approach significantly improves write performance, it can impact read performance, especially in `upsert` mode. In `append` mode, this performance trade-off is not applicable. 6 | 7 | To optimize read performance, you must run periodic table maintenance jobs to compact data and rewrite the delete files. This is especially critical for `upsert` mode. 8 | 9 | ???+ question "Does the connector support schema evolution?" 10 | 11 | Full schema evolution, such as converting incompatible data types, is not currently supported. However, **schema expansion**—including adding new fields or promoting field data types—is supported. To enable this behavior, set the `debezium.sink.iceberg.allow-field-addition` configuration property to `true`. 12 | 13 | For a more robust way to handle schema changes, you can configure the connector to store all nested data in a `variant` field. This approach can seamlessly absorb many schema changes. 14 | 15 | ```properties 16 | # Store nested data in variant fields 17 | debezium.sink.iceberg.nested-as-variant=true 18 | # Ensure event flattening is disabled (flattening is the default behavior) 19 | debezium.transforms=, 20 | ``` 21 | 22 | ???+ question "How can I replicate only specific tables from my source database?" 23 | 24 | By default, the Debezium connector replicates all tables in the database, which can result in unnecessary load. To avoid replicating tables you don't need, configure the `debezium.source.table.include.list` property to specify the exact tables to replicate. This will streamline your data pipeline and reduce overhead. For more details, refer to the [Debezium server source](https://debezium.io/documentation/reference/stable/connectors/mysql.html#mysql-property-table-include-list documentation. 25 | 26 | ???+ question "How do I configure AWS S3 credentials?" 27 | 28 | You can set up AWS credentials in one of the following ways: 29 | 30 | - **In `application.properties`**: Use the `debezium.sink.iceberg.fs.s3a.access.key` and `debezium.sink.iceberg.fs.s3a.secret.key` properties. 31 | - **As environment variables**: Set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`. 32 | - **Using Hadoop's configuration**: Set up the `HADOOP_HOME` environment variable and add S3A configuration to `core-site.xml`. More information can be found [here](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html#Authenticating_with_S3). 33 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/IcebergChangeConsumerNessieCatalogTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg; 10 | 11 | import io.debezium.server.iceberg.testresources.CatalogNessie; 12 | import io.debezium.server.iceberg.testresources.S3Minio; 13 | import io.debezium.server.iceberg.testresources.SourcePostgresqlDB; 14 | import io.quarkus.test.common.QuarkusTestResource; 15 | import io.quarkus.test.junit.QuarkusTest; 16 | import org.apache.spark.sql.Dataset; 17 | import org.apache.spark.sql.Row; 18 | import org.awaitility.Awaitility; 19 | import org.junit.jupiter.api.Test; 20 | import org.junit.jupiter.api.condition.DisabledIfEnvironmentVariable; 21 | 22 | import java.sql.SQLException; 23 | import java.time.Duration; 24 | 25 | /** 26 | * Integration test that verifies basic reading from PostgreSQL database and writing to iceberg destination. 27 | * 28 | * @author Ismail Simsek 29 | */ 30 | @QuarkusTest 31 | @QuarkusTestResource(value = S3Minio.class, restrictToAnnotatedClass = true) 32 | @QuarkusTestResource(value = SourcePostgresqlDB.class, restrictToAnnotatedClass = true) 33 | @QuarkusTestResource(value = CatalogNessie.class, restrictToAnnotatedClass = true) 34 | @DisabledIfEnvironmentVariable(named = "GITHUB_ACTIONS", matches = "true") 35 | public class IcebergChangeConsumerNessieCatalogTest extends BaseSparkTest { 36 | 37 | @Test 38 | public void testSimpleUpload() throws InterruptedException, SQLException, ClassNotFoundException { 39 | Awaitility.await().atMost(Duration.ofSeconds(120)).until(() -> { 40 | try { 41 | Dataset df = getTableData("testc.inventory.customers"); 42 | df.show(false); 43 | return df.count() >= 3; 44 | } catch (Exception e) { 45 | return false; 46 | } 47 | }); 48 | 49 | int startId = 1005; 50 | int numberOfRecords = 5; // You can change this value 51 | for (int i = 0; i < numberOfRecords; i++) { 52 | int currentId = startId + i; 53 | String firstName = "FirstName" + currentId; 54 | String lastName = "LastName" + currentId; 55 | String email = "user" + currentId + "@example.com"; 56 | String insertStatement = String.format( 57 | "INSERT INTO inventory.customers (id, first_name, last_name, email) VALUES (%d, '%s', '%s', '%s');", 58 | currentId, 59 | firstName, 60 | lastName, 61 | email 62 | ); 63 | SourcePostgresqlDB.runSQL(insertStatement); 64 | Thread.sleep(3000); 65 | Dataset df = getTableData("testc.inventory.customers"); 66 | df.show(false); 67 | } 68 | } 69 | 70 | 71 | } 72 | -------------------------------------------------------------------------------- /examples/nessie/config/application.properties: -------------------------------------------------------------------------------- 1 | # Use iceberg sink 2 | debezium.sink.type=iceberg 3 | # Iceberg sink config 4 | debezium.sink.iceberg.table-prefix=debeziumcdc_ 5 | debezium.sink.iceberg.upsert=true 6 | debezium.sink.iceberg.upsert-keep-deletes=true 7 | debezium.sink.iceberg.write.format.default=parquet 8 | # S3 config using Nessie catalog And S3FileIO 9 | debezium.sink.iceberg.type=nessie 10 | #debezium.sink.iceberg.catalog-impl=org.apache.iceberg.nessie.NessieCatalog 11 | debezium.sink.iceberg.uri=http://nessie:19120/api/v2 12 | debezium.sink.iceberg.ref=main 13 | debezium.sink.iceberg.warehouse=s3://warehouse 14 | debezium.sink.iceberg.table-namespace=icebergdata 15 | debezium.sink.iceberg.catalog-name=nessie 16 | # Use S3FileIO 17 | debezium.sink.iceberg.io-impl=org.apache.iceberg.io.ResolvingFileIO 18 | debezium.sink.iceberg.s3.endpoint=http://minio:9000 19 | debezium.sink.iceberg.s3.path-style-access=true 20 | debezium.sink.iceberg.s3.access-key-id=admin 21 | debezium.sink.iceberg.s3.secret-access-key=password 22 | # postgres source 23 | debezium.source.connector.class=io.debezium.connector.postgresql.PostgresConnector 24 | debezium.source.offset.flush.interval.ms=0 25 | debezium.source.database.hostname=postgresqlsourcedb 26 | debezium.source.database.port=5432 27 | debezium.source.database.user=postgres 28 | debezium.source.database.password=postgres 29 | debezium.source.database.dbname=postgres 30 | debezium.source.database.server.name=tutorial 31 | debezium.source.database.server.id=1234 32 | debezium.source.schema.include.list=inventory 33 | debezium.source.topic.prefix=dbz 34 | # saving debezium state data to destination, iceberg tables 35 | # see https://debezium.io/documentation/reference/stable/development/engine.html#advanced-consuming 36 | debezium.source.offset.storage=io.debezium.server.iceberg.offset.IcebergOffsetBackingStore 37 | debezium.source.offset.storage.iceberg.table-name=debezium_offset_storage_table 38 | # see https://debezium.io/documentation/reference/stable/development/engine.html#database-history-properties 39 | debezium.source.schema.history.internal=io.debezium.server.iceberg.history.IcebergSchemaHistory 40 | debezium.source.schema.history.internal.iceberg.table-name=debezium_database_history_storage_table 41 | # enable event schemas - mandatory 42 | debezium.format.value.schemas.enable=true 43 | debezium.format.key.schemas.enable=true 44 | debezium.format.value=connect 45 | debezium.format.key=connect 46 | # do event flattening. unwrap message! 47 | debezium.transforms=unwrap 48 | debezium.transforms.unwrap.type=io.debezium.transforms.ExtractNewRecordState 49 | debezium.transforms.unwrap.add.fields=op,table,source.ts_ns,db 50 | debezium.transforms.unwrap.delete.tombstone.handling.mode=rewrite 51 | debezium.transforms.unwrap.drop.tombstones=true 52 | # ############ SET LOG LEVELS ############ 53 | quarkus.log.level=INFO 54 | quarkus.log.console.json=false 55 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/converter/IcebergSchemaInfo.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.converter; 2 | 3 | import org.apache.iceberg.types.Types; 4 | 5 | import java.util.ArrayList; 6 | import java.util.HashSet; 7 | import java.util.List; 8 | import java.util.Set; 9 | import java.util.concurrent.atomic.AtomicInteger; 10 | 11 | /** 12 | * A record class (Java 14+) representing schema data for Iceberg records. 13 | * This class helps manage the fields, identifier fields, and the next available 14 | * field ID when building Iceberg schemas. 15 | * 16 | * @param fields A list of `Types.NestedField` objects representing the fields 17 | * in the schema. `Types.NestedField` contains information about 18 | * the field's ID, name, type, and nullability. 19 | * @param identifierFieldIds A set of integer IDs that identify the fields that are 20 | * part of the record's key or identifier. 21 | * @param nextFieldId An `AtomicInteger` that keeps track of the next available 22 | * field ID to ensure unique IDs are assigned to new fields. Using 23 | * an `AtomicInteger` makes this class thread-safe. 24 | */ 25 | 26 | public record IcebergSchemaInfo(List fields, Set identifierFieldIds, 27 | AtomicInteger nextFieldId) { 28 | 29 | /** 30 | * Constructor for `IcebergSchemaInfo` that initializes the `fields` list and 31 | * `identifierFieldIds` set to empty and sets the `nextFieldId` to the provided 32 | * value. 33 | * 34 | * @param nextFieldId The starting ID to use for new fields. 35 | */ 36 | public IcebergSchemaInfo(Integer nextFieldId) { 37 | this(new ArrayList<>(), new HashSet<>(), new AtomicInteger(nextFieldId)); 38 | } 39 | 40 | /** 41 | * Default constructor for `IcebergSchemaInfo` that initializes the `fields` 42 | * list and `identifierFieldIds` set to empty and sets the `nextFieldId` to 1. 43 | */ 44 | public IcebergSchemaInfo() { 45 | this(1); 46 | } 47 | 48 | /** 49 | * Creates a copy of this `IcebergSchemaInfo` object, but *keeps* the original's 50 | * `identifierFieldIds` and `nextFieldId`. This is useful when you want to 51 | * create a new schema builder based on an existing one but need to preserve 52 | * the identifier field information and the next field ID counter. The `fields` 53 | * list is initialized as a new empty list in the copy. 54 | * 55 | * @return A new `IcebergSchemaInfo` object with the same identifier fields and 56 | * next field ID, but an empty fields list. 57 | */ 58 | public IcebergSchemaInfo copyPreservingMetadata() { 59 | return new IcebergSchemaInfo(new ArrayList<>(), this.identifierFieldIds, this.nextFieldId); 60 | } 61 | } -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/converter/JsonEventConverterBuilderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg.converter; 10 | 11 | import io.debezium.server.iceberg.BaseTest; 12 | import io.debezium.server.iceberg.testresources.CatalogNessie; 13 | import io.quarkus.test.common.QuarkusTestResource; 14 | import io.quarkus.test.junit.QuarkusTest; 15 | import org.apache.iceberg.Schema; 16 | import org.apache.iceberg.types.Types; 17 | import org.junit.jupiter.api.Assertions; 18 | import org.junit.jupiter.api.Test; 19 | import org.junit.jupiter.api.condition.DisabledIfEnvironmentVariable; 20 | 21 | import java.util.List; 22 | import java.util.Set; 23 | 24 | import static org.apache.iceberg.types.Types.NestedField.optional; 25 | import static org.apache.iceberg.types.Types.NestedField.required; 26 | 27 | 28 | /** 29 | * @author Ismail Simsek 30 | */ 31 | @QuarkusTest 32 | @QuarkusTestResource(value = CatalogNessie.class, restrictToAnnotatedClass = true) 33 | @DisabledIfEnvironmentVariable(named = "DEBEZIUM_FORMAT_VALUE", matches = "connect") 34 | class JsonEventConverterBuilderTest extends BaseTest { 35 | 36 | @Test 37 | public void testIcebergChangeEventBuilder() { 38 | Schema schema1 = new Schema( 39 | List.of( 40 | required(1, "id", Types.IntegerType.get()), 41 | optional(2, "data", Types.StringType.get()), 42 | optional(3, "preferences", Types.StructType.of( 43 | optional(4, "feature1", Types.BooleanType.get()), 44 | optional(5, "feature2", Types.BooleanType.get()) 45 | )) 46 | ) 47 | , Set.of(1) 48 | ); 49 | 50 | JsonEventConverter t = eventBuilder. 51 | addKeyField("id", 1) 52 | .addField("data", "testdatavalue") 53 | .addField("preferences", "feature1", true) 54 | .addField("preferences", "feature2", true) 55 | .build(); 56 | Assertions.assertTrue(schema1.sameSchema(t.icebergSchema())); 57 | 58 | Schema schema2 = new Schema( 59 | optional(1, "id", Types.IntegerType.get()), 60 | optional(2, "data", Types.StringType.get()), 61 | optional(3, "preferences", Types.StructType.of( 62 | optional(4, "feature1", Types.BooleanType.get()), 63 | optional(5, "feature2", Types.BooleanType.get()) 64 | )) 65 | ); 66 | 67 | JsonEventConverter t2 = eventBuilder. 68 | addField("id", 1) 69 | .addField("data", "testdatavalue") 70 | .addField("preferences", "feature1", true) 71 | .addField("preferences", "feature2", true) 72 | .build(); 73 | Assertions.assertEquals(schema2.identifierFieldIds(), t2.icebergSchema().identifierFieldIds()); 74 | Assertions.assertTrue(schema2.sameSchema(t2.icebergSchema())); 75 | } 76 | 77 | 78 | } -------------------------------------------------------------------------------- /examples/lakekeeper/config/application.properties: -------------------------------------------------------------------------------- 1 | # Use iceberg sink 2 | debezium.sink.type=iceberg 3 | # Iceberg sink config 4 | debezium.sink.iceberg.table-prefix=debeziumcdc_ 5 | debezium.sink.iceberg.upsert=true 6 | debezium.sink.iceberg.upsert-keep-deletes=true 7 | debezium.sink.iceberg.write.format.default=parquet 8 | # S3 config using lakekeeper catalog And S3FileIO 9 | debezium.sink.iceberg.type=rest 10 | #debezium.sink.iceberg.catalog-impl=org.apache.iceberg.nessie.NessieCatalog 11 | debezium.sink.iceberg.uri=http://lakekeeper:8181/catalog 12 | debezium.sink.iceberg.ref=main 13 | debezium.sink.iceberg.warehouse=iceberg_warehouse 14 | debezium.sink.iceberg.table-namespace=icebergdata 15 | debezium.sink.iceberg.catalog-name=lakekeeper 16 | # Use S3FileIO 17 | debezium.sink.iceberg.io-impl=org.apache.iceberg.io.ResolvingFileIO 18 | debezium.sink.iceberg.s3.endpoint=http://minio:9000 19 | debezium.sink.iceberg.s3.path-style-access=true 20 | debezium.sink.iceberg.s3.access-key-id=admin 21 | debezium.sink.iceberg.s3.secret-access-key=password 22 | # postgres source 23 | debezium.source.connector.class=io.debezium.connector.postgresql.PostgresConnector 24 | debezium.source.offset.flush.interval.ms=0 25 | debezium.source.database.hostname=postgresqlsourcedb 26 | debezium.source.database.port=5432 27 | debezium.source.database.user=postgres 28 | debezium.source.database.password=postgres 29 | debezium.source.database.dbname=postgres 30 | debezium.source.database.server.name=tutorial 31 | debezium.source.database.server.id=1234 32 | debezium.source.schema.include.list=inventory 33 | debezium.source.topic.prefix=dbz 34 | # saving debezium state data to destination, iceberg tables 35 | # see https://debezium.io/documentation/reference/stable/development/engine.html#advanced-consuming 36 | debezium.source.offset.storage=io.debezium.server.iceberg.offset.IcebergOffsetBackingStore 37 | debezium.source.offset.storage.iceberg.table-name=debezium_offset_storage_table 38 | # see https://debezium.io/documentation/reference/stable/development/engine.html#database-history-properties 39 | debezium.source.schema.history.internal=io.debezium.server.iceberg.history.IcebergSchemaHistory 40 | debezium.source.schema.history.internal.iceberg.table-name=debezium_database_history_storage_table 41 | # enable event schemas - mandatory 42 | debezium.format.value.schemas.enable=true 43 | debezium.format.key.schemas.enable=true 44 | debezium.format.value=connect 45 | debezium.format.key=connect 46 | # do event flattening. unwrap message! 47 | debezium.transforms=unwrap 48 | debezium.transforms.unwrap.type=io.debezium.transforms.ExtractNewRecordState 49 | debezium.transforms.unwrap.add.fields=op,table,source.ts_ns,db 50 | debezium.transforms.unwrap.delete.tombstone.handling.mode=rewrite 51 | debezium.transforms.unwrap.drop.tombstones=true 52 | # ############ SET LOG LEVELS ############ 53 | quarkus.log.level=INFO 54 | quarkus.log.console.json=false 55 | quarkus.log.category."org.apache.hadoop".level=WARN 56 | #quarkus.log.category."org.apache.iceberg.SnapshotProducer".level=WARN 57 | quarkus.log.category."org.apache.iceberg.CatalogUtil".level=WARN 58 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/tableoperator/BaseDeltaTaskWriter.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.tableoperator; 2 | 3 | import com.google.common.collect.Sets; 4 | import org.apache.iceberg.*; 5 | import org.apache.iceberg.data.InternalRecordWrapper; 6 | import org.apache.iceberg.data.Record; 7 | import org.apache.iceberg.io.BaseTaskWriter; 8 | import org.apache.iceberg.io.FileAppenderFactory; 9 | import org.apache.iceberg.io.FileIO; 10 | import org.apache.iceberg.io.OutputFileFactory; 11 | import org.apache.iceberg.types.TypeUtil; 12 | 13 | import java.io.IOException; 14 | import java.util.Set; 15 | 16 | abstract class BaseDeltaTaskWriter extends BaseTaskWriter { 17 | 18 | private final Schema schema; 19 | private final Schema deleteSchema; 20 | private final InternalRecordWrapper wrapper; 21 | private final InternalRecordWrapper keyWrapper; 22 | private final boolean keepDeletes; 23 | private final RecordProjection keyProjection; 24 | 25 | BaseDeltaTaskWriter(PartitionSpec spec, 26 | FileFormat format, 27 | FileAppenderFactory appenderFactory, 28 | OutputFileFactory fileFactory, 29 | FileIO io, 30 | long targetFileSize, 31 | Schema schema, 32 | Set identifierFieldIds, 33 | boolean keepDeletes) { 34 | super(spec, format, appenderFactory, fileFactory, io, targetFileSize); 35 | this.schema = schema; 36 | this.deleteSchema = TypeUtil.select(schema, Sets.newHashSet(identifierFieldIds)); 37 | this.wrapper = new InternalRecordWrapper(schema.asStruct()); 38 | this.keyWrapper = new InternalRecordWrapper(deleteSchema.asStruct()); 39 | this.keyProjection = RecordProjection.create(schema, deleteSchema); 40 | this.keepDeletes = keepDeletes; 41 | } 42 | 43 | abstract RowDataDeltaWriter route(Record row); 44 | 45 | InternalRecordWrapper wrapper() { 46 | return wrapper; 47 | } 48 | 49 | @Override/**/ 50 | public void write(Record row) throws IOException { 51 | RowDataDeltaWriter writer = route(row); 52 | Operation rowOperation = ((RecordWrapper) row).op(); 53 | if (rowOperation == Operation.INSERT) { 54 | // new row 55 | writer.write(row); 56 | } else if (rowOperation == Operation.DELETE && !keepDeletes) { 57 | // deletes. doing hard delete. when keepDeletes = FALSE we dont keep deleted record 58 | writer.deleteKey(keyProjection.wrap(row)); 59 | } else { 60 | writer.deleteKey(keyProjection.wrap(row)); 61 | writer.write(row); 62 | } 63 | } 64 | 65 | public class RowDataDeltaWriter extends BaseEqualityDeltaWriter { 66 | RowDataDeltaWriter(PartitionKey partition) { 67 | super(partition, schema, deleteSchema); 68 | } 69 | 70 | @Override 71 | protected StructLike asStructLike(Record data) { 72 | return wrapper.wrap(data); 73 | } 74 | 75 | @Override 76 | protected StructLike asStructLikeKey(Record data) { 77 | return keyWrapper.wrap(data); 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ "master" ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ "master" ] 20 | schedule: 21 | - cron: '36 4 * * 3' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'java', 'python' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 38 | 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v4 42 | - name: Set up JDK 43 | uses: actions/setup-java@v5 44 | with: 45 | distribution: 'temurin' 46 | java-version: 21 47 | cache: 'maven' 48 | 49 | # Initializes the CodeQL tools for scanning. 50 | - name: Initialize CodeQL 51 | uses: github/codeql-action/init@v4 52 | with: 53 | languages: ${{ matrix.language }} 54 | # If you wish to specify custom queries, you can do so here or in a config file. 55 | # By default, queries listed here will override any specified in a config file. 56 | # Prefix the list here with "+" to use these queries and those in the config file. 57 | 58 | # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 59 | # queries: security-extended,security-and-quality 60 | 61 | 62 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 63 | # If this step fails, then you should remove it and run the build manually (see below) 64 | - name: Autobuild 65 | uses: github/codeql-action/autobuild@v4 66 | 67 | # ℹ️ Command-line programs to run using the OS shell. 68 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 69 | 70 | # If the Autobuild fails above, remove it and uncomment the following three lines. 71 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 72 | 73 | # - run: | 74 | # echo "Run, Build Application using script" 75 | # ./location_of_script_within_repo/buildscript.sh 76 | 77 | - name: Perform CodeQL Analysis 78 | uses: github/codeql-action/analyze@v4 79 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/IcebergChangeConsumerDecimalTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg; 10 | 11 | import io.debezium.server.iceberg.testresources.CatalogNessie; 12 | import io.debezium.server.iceberg.testresources.S3Minio; 13 | import io.debezium.server.iceberg.testresources.SourcePostgresqlDB; 14 | import io.quarkus.test.common.QuarkusTestResource; 15 | import io.quarkus.test.junit.QuarkusTest; 16 | import io.quarkus.test.junit.QuarkusTestProfile; 17 | import io.quarkus.test.junit.TestProfile; 18 | import org.apache.spark.sql.Dataset; 19 | import org.apache.spark.sql.Row; 20 | import org.awaitility.Awaitility; 21 | import org.junit.jupiter.api.Assertions; 22 | import org.junit.jupiter.api.Test; 23 | 24 | import java.time.Duration; 25 | import java.util.HashMap; 26 | import java.util.Map; 27 | 28 | import static org.junit.jupiter.api.Assertions.assertEquals; 29 | 30 | /** 31 | * Integration test that verifies basic reading from PostgreSQL database and writing to iceberg destination. 32 | * 33 | * @author Ismail Simsek 34 | */ 35 | @QuarkusTest 36 | @QuarkusTestResource(value = S3Minio.class, restrictToAnnotatedClass = true) 37 | @QuarkusTestResource(value = SourcePostgresqlDB.class, restrictToAnnotatedClass = true) 38 | @QuarkusTestResource(value = CatalogNessie.class, restrictToAnnotatedClass = true) 39 | @TestProfile(IcebergChangeConsumerDecimalTest.TestProfile.class) 40 | public class IcebergChangeConsumerDecimalTest extends BaseSparkTest { 41 | 42 | @Test 43 | public void testConsumingNumerics() throws Exception { 44 | assertEquals(sinkType, "iceberg"); 45 | String sql = "\n" + 46 | " DROP TABLE IF EXISTS inventory.data_types;\n" + 47 | " CREATE TABLE IF NOT EXISTS inventory.data_types (\n" + 48 | " c_id INTEGER ,\n" + 49 | " c_decimal DECIMAL(18,6)\n" + 50 | " );"; 51 | SourcePostgresqlDB.runSQL(sql); 52 | sql = "INSERT INTO inventory.data_types (c_id, c_decimal) " + 53 | "VALUES (1, '1234566.34456'::decimal)"; 54 | SourcePostgresqlDB.runSQL(sql); 55 | Awaitility.await().atMost(Duration.ofSeconds(320)).until(() -> { 56 | try { 57 | Dataset df = getTableData("testc.inventory.data_types"); 58 | df.show(false); 59 | 60 | Assertions.assertEquals(1, df.count()); 61 | Assertions.assertEquals(1, df.filter("c_id = 1 AND c_decimal = CAST('1234566.344560' AS DECIMAL(18,6))").count(), "c_decimal not matching"); 62 | return true; 63 | } catch (Exception | AssertionError e) { 64 | e.printStackTrace(); 65 | return false; 66 | } 67 | }); 68 | } 69 | 70 | public static class TestProfile implements QuarkusTestProfile { 71 | @Override 72 | public Map getConfigOverrides() { 73 | Map config = new HashMap<>(); 74 | config.put("debezium.sink.iceberg.destination-regexp", "\\d"); 75 | config.put("debezium.source.decimal.handling.mode", "precise"); 76 | return config; 77 | } 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /docs/images/debezium-iceberg.drawio: -------------------------------------------------------------------------------- 1 | 7Vxbc6O4Ev41eTQFSGB4TOLxzFRlqmaTqrM7+5KSQcFaA+KAHNvz648Ewlwk3xKc9UniqhlDIyRZX3fr65aUK3CbrL/mKJv/oCGOr2wzXF+ByZVt+9Dl/wvBphI4FqwEUU7CSmQ1ggfyG0uhKaVLEuKiU5BRGjOSdYUBTVMcsI4M5TlddYs90bjbaoYirAgeAhSr0j9JyOaV1HPMRv4Nk2het2yZ8kmC6sJSUMxRSFctEfhyBW5zSll1laxvcSzGrh6X6r3pjqfbjuU4Zce8MAWzxdfbb4vZ/d3jcnMfF9f3wQjIzj2jeCl/8W1Ml6HsMtvU41AsMAvEDzKvwA1dspik+HY75kIY5SgkvC+3NKY5l6U05e/ezFkS8zuLX67mhOGHDAWizhVXFy57oimTqFt2fS+bFbXyUcvEdbKOhH4ZaFVAI8rpMiub/M5x1z595JePQflTeCUspwtcd+zKBp7j3fhQNEfiuNfhZ5wzwhXgOiaRqJtR0RSSdzF+YqJG/itIGt2VdxNgyp7rmghRMceh/DkZzkmCGRaFcj52KI1i/LMR3qAlo0U1HqK8CrLEXfQSr1siCfpXTHlV+YYXkU+hKy1OWqDtSsxXjT5DW5rpvKXLdq0cSNpQtK27UTN+ITXtBK2DitIp+sYxTMNy2HZpTkuxBhklszNK1tg0xo46UMBRB8p1Def1I7Umf/1nE/wxs0Yg+9P9+f1vpxiNrMMjVVsISUpX1h4XvSrX0js0w/FPrm2MlEY0o4zRhBeIxYMbFCyiEoWWUj+VH16kbOy6yBrzR/XNE1kL3G5kfyZzxoSvvhYDYU+DMAUG4Vb7RDi+uRHwFu1piBjiX0Je8G82xyMc4wilbFRgxiW+K0vNUIGNLI0GgNx1unbhAQVuz1TR9gawCi3W4B1iDQ9iHdOIFuK3Ty2P4zX9sXn4425k2d5AKEMD2D2cva2oBbXrG2Csse1GPDjg41PcoHnYDVbTXE1TwDCO0fJ6ZqKZPmyosZNBpg/tuNU/ojVOOOSsTd7SnM25UqUo/tJIb7oj2ZS5o8JQyvH7BzO2kWREzMPd0eVDmG/+ku+XN7/EDXf98naybj+cbGoG00EF7kOloMs8wIenA4byCLM95byqnBiVvRjnOEaMPHe57uB4eRo9d2MmOVMHSPe/S1o/GFU8iDsT03KzdfOQX0Xie4Jn+DdZJnVts7x+8oBzruGqvJbwn1E13RWH5Lkvel0Pvwe8i3l0oFnhFNstD+sDesxVP08oDHk6NflnIBfiH55pbfMtp9ra0+5zvW/gUk5wDcOZsnz1JyWlZkuMgN+dJQHoUdrKN8m32mFmryJYg7urosp5KRVd5znatIplokCxu8PQ1Xd4Z7/2l+cXVQ8apdoO7ivouxpeH+R0Q1C2HTGslrZ1mJ2eyIW44PUY/8xoURhU+DMuqn1vyeCEZyNc9QXrK/sjvx/rco+2wz1kyekG8Sm2Z3V9iu8YAJrNR3UxjmM4muiNiwE4k5+x/NPxv3hOf6HxG/CgAvmbxm/2EWmN/wNjX61WozDHCaFGillMnjYGyjKBbhKVGJeUZiSsfrQibD5KUYKHM2xg98jCEYbtaax6bDjnsmqg4vqxyMLY7IXUsGdTx5IFpaK+cQ5EFpR2XPO0fnXLn4cs1HmGdxfWHgxXweDxqh5WC3Z9C7R7LmKH3g6G8OVEwr3I9IgQ+b1HpeBfj0rBEWzxY000/agUDhWVKhWdKSqFJ0al8C2iUvgZlb5RVAouMiqFL1hVvED8LyxQOQbr8wUq2nX294HzMospCo0VWZAEhwRJWxf3mbgvrTpJ6tTDMpnx77H4Zwr/yUnJHD9OY5IuHoUuGMWzeN3ic3u2HmkfD6ck/c0X1thXtMIyLcPX7L9wzrX9wn4XaoHmKEUGEU5+lY0CzlUFTLW6CF2wTbElamo6cjoYZTkuGB0OXdfrbRrx1bVRy9JQSDA2IDwTus67QJflJKUVuqgoMCtq2YgOaJ5jv0sLLY/7cAVCH2zn4jaI0D4Tgv9KHkBl/U1mwLC32YBfV620wY7UAF4T1n6P3/5qPWreEjf1SzvRbGcP9u0mO3cs4oy7xm6PPUNs96w/vQqPToFZ3QgA9CsaKDJxXL/bfW/YlJYWmhcsf1ygL3oRAXkS/0BDQB64ancIiGPZDf/oPh3OwfWV1vIsA9otkmqr3s4xxho2Av3tfsjheer7CEh5QGIEOUYcRrkulkj1KDDh7IRfpHjVYia82yzKcakQw8QkHbChaRuWqwCsQ9cxjfEA23/06B6xz+1jpbV8s2uVAL4wreXDAxUNNHns6vBZJw9rd7a83C51erLcrLPOjbDMPyv59Ndm5c1g620aIbAncCJm4V2tzWrBNdfrDfdMhZqev8cZ132SajL3P/hMQlLMW48xytNWGT78s4P5/aP2onE/xHRm1TtDoTlWcSDhn5AwLO1ct5LQtf0B/KQPumQImI5RH0xqx266fa2W6RnnYv72EcHbSScj2sdr3CvlkIrUyJ2Hd3qgbY+yiKpRHsia9/rSE3IlvWja1kTTULcgYw2xIqN3QccE0wdPRvUHHfDGvuwedGXVa/tC26zaPKZzxOggMdp/wCngHSpPIu3QIt0hLYUxaU9t5bia1qozWzf8Vnd6a7YMFpgNpFE99gvrBdi2RkEN1x0gN6PPvLvvgujK1LqByuBFhkNV1v17K+suw5iQiJNuVacxKgbC1ulTkjpubvtvR+MtoLnNtg7vL3QB74egLPdoxeu4Q5sD2+4/eUhHie2eg6r9QTse1+nwGSmILhb/ECp8u8wR4+juUePpBDG04ojP6bLAn4p+fKL9CEW3wNtqOjhmefST3F0quXPrPNaW3I3fjNzp9ekFB3g/yZ0eW78fCvqGp2Yy35zfATUefABn8NlbO32F1+4E68fmAc5BzMeqq3d1a+LnCuKBGnR9m0wfPmHrwuZdGmzqssHX20/Ueo7S7aOm5szeFjU1aX/9e5mLrPRNTGef8HVpsdPbQ+h6hqvSGF+zQfR8CB6xaH+Qtmz/2JV5gH3u2/hfr9VZGtKZ0GCxzIwE5eJL0HIdufbLzwmY7mDtA0ANfdvogc2Ziqc6WTDWoG2fjja/bf64WLUm1/yFNvDlfw== -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/IcebergEventsChangeConsumerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg; 10 | 11 | import io.debezium.server.iceberg.testresources.CatalogNessie; 12 | import io.debezium.server.iceberg.testresources.S3Minio; 13 | import io.debezium.server.iceberg.testresources.SourceMysqlDB; 14 | import io.quarkus.test.common.QuarkusTestResource; 15 | import io.quarkus.test.junit.QuarkusTest; 16 | import io.quarkus.test.junit.QuarkusTestProfile; 17 | import io.quarkus.test.junit.TestProfile; 18 | import org.apache.spark.sql.Dataset; 19 | import org.apache.spark.sql.Row; 20 | import org.awaitility.Awaitility; 21 | import org.eclipse.microprofile.config.inject.ConfigProperty; 22 | import org.junit.jupiter.api.Assertions; 23 | import org.junit.jupiter.api.Test; 24 | import org.junit.jupiter.api.condition.DisabledIfEnvironmentVariable; 25 | 26 | import java.time.Duration; 27 | import java.util.HashMap; 28 | import java.util.Map; 29 | 30 | import static io.debezium.server.iceberg.TestConfigSource.ICEBERG_CATALOG_TABLE_NAMESPACE; 31 | 32 | /** 33 | * 34 | * @author Ismail Simsek 35 | */ 36 | @QuarkusTest 37 | @QuarkusTestResource(value = S3Minio.class, restrictToAnnotatedClass = true) 38 | @QuarkusTestResource(value = SourceMysqlDB.class, restrictToAnnotatedClass = true) 39 | @QuarkusTestResource(value = CatalogNessie.class, restrictToAnnotatedClass = true) 40 | @TestProfile(IcebergEventsChangeConsumerTest.TestProfile.class) 41 | @DisabledIfEnvironmentVariable(named = "DEBEZIUM_FORMAT_VALUE", matches = "connect") 42 | @Deprecated 43 | public class IcebergEventsChangeConsumerTest extends BaseSparkTest { 44 | @ConfigProperty(name = "debezium.sink.type") 45 | String sinkType; 46 | 47 | @Test 48 | public void testSimpleUpload() { 49 | Assertions.assertEquals(sinkType, "icebergevents"); 50 | Awaitility.await().atMost(Duration.ofSeconds(120)).until(() -> { 51 | try { 52 | Dataset ds = spark.newSession().sql("SELECT * FROM "+ICEBERG_CATALOG_TABLE_NAMESPACE+".debezium_events"); 53 | ds.show(false); 54 | return ds.count() >= 10 55 | && ds.select("event_destination").distinct().count() >= 2; 56 | } catch (Exception e) { 57 | return false; 58 | } 59 | }); 60 | 61 | // S3Minio.listFiles(); 62 | } 63 | 64 | public static class TestProfile implements QuarkusTestProfile { 65 | @Override 66 | public Map getConfigOverrides() { 67 | Map config = new HashMap<>(); 68 | config.put("debezium.sink.type", "icebergevents"); 69 | config.put("quarkus.profile", "mysql"); 70 | config.put("debezium.format.value", "json"); 71 | config.put("debezium.format.key", "json"); 72 | config.put("%mysql.debezium.source.connector.class", "io.debezium.connector.mysql.MySqlConnector"); 73 | config.put("%mysql.debezium.source.table.whitelist", "inventory.customers,inventory.test_delete_table"); 74 | return config; 75 | } 76 | 77 | @Override 78 | public String getConfigProfile() { 79 | return "mysql"; 80 | } 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/IcebergChangeConsumerMongodbTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg; 10 | 11 | import io.debezium.server.iceberg.testresources.CatalogNessie; 12 | import io.debezium.server.iceberg.testresources.S3Minio; 13 | import io.debezium.server.iceberg.testresources.SourceMongoDB; 14 | import io.quarkus.test.common.QuarkusTestResource; 15 | import io.quarkus.test.junit.QuarkusTest; 16 | import io.quarkus.test.junit.QuarkusTestProfile; 17 | import io.quarkus.test.junit.TestProfile; 18 | import org.apache.spark.sql.Dataset; 19 | import org.apache.spark.sql.Row; 20 | import org.awaitility.Awaitility; 21 | import org.junit.jupiter.api.Test; 22 | 23 | import java.time.Duration; 24 | import java.util.HashMap; 25 | import java.util.Map; 26 | 27 | /** 28 | * 29 | * @author Ismail Simsek 30 | */ 31 | @QuarkusTest 32 | @QuarkusTestResource(value = S3Minio.class, restrictToAnnotatedClass = true) 33 | @QuarkusTestResource(value = SourceMongoDB.class, restrictToAnnotatedClass = true) 34 | @QuarkusTestResource(value = CatalogNessie.class, restrictToAnnotatedClass = true) 35 | @TestProfile(IcebergChangeConsumerMongodbTest.TestProfile.class) 36 | public class IcebergChangeConsumerMongodbTest extends BaseSparkTest { 37 | 38 | @Test 39 | public void testSimpleUpload() { 40 | 41 | Awaitility.await().atMost(Duration.ofSeconds(180)).until(() -> { 42 | try { 43 | Dataset df = getTableData("testc.inventory.products"); 44 | df.show(); 45 | return df.filter("_id is not null").count() >= 4; 46 | } catch (Exception e) { 47 | //e.printStackTrace(); 48 | return false; 49 | } 50 | }); 51 | } 52 | 53 | public static class TestProfile implements QuarkusTestProfile { 54 | @Override 55 | public Map getConfigOverrides() { 56 | Map config = new HashMap<>(); 57 | config.put("quarkus.profile", "mongodb"); 58 | config.put("%mongodb.debezium.source.connector.class", "io.debezium.connector.mongodb.MongoDbConnector"); 59 | config.put("%mongodb.debezium.transforms.unwrap.type", "io.debezium.connector.mongodb.transforms.ExtractNewDocumentState"); 60 | config.put("%mongodb.debezium.transforms.unwrap.add.fields", "op,source.ts_ns,db"); 61 | config.put("%mongodb.debezium.sink.iceberg.allow-field-addition", "false"); 62 | config.put("%mongodb.debezium.source.topic.prefix", "testc"); 63 | config.put("%mongodb.debezium.source.database.include.list", "inventory"); // ok 64 | config.put("%mongodb.debezium.source.collection.include.list", "inventory.products"); 65 | // IMPORTANT !!! FIX MongoDbConnector KEY FIELD NAME "id"=>"_id" !!! 66 | config.put("%mongodb.debezium.transforms", "unwrap,renamekeyfield"); 67 | config.put("%mongodb.debezium.transforms.renamekeyfield.type", 68 | "org.apache.kafka.connect.transforms.ReplaceField$Key"); 69 | config.put("%mongodb.debezium.transforms.renamekeyfield.renames", "id:_id"); 70 | 71 | return config; 72 | } 73 | 74 | @Override 75 | public String getConfigProfile() { 76 | return "mongodb"; 77 | } 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/testresources/SourceMysqlDB.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg.testresources; 10 | 11 | import io.quarkus.test.common.QuarkusTestResourceLifecycleManager; 12 | 13 | import java.sql.Connection; 14 | import java.sql.DriverManager; 15 | import java.sql.SQLException; 16 | import java.sql.Statement; 17 | import java.time.Duration; 18 | import java.util.Map; 19 | import java.util.concurrent.ConcurrentHashMap; 20 | 21 | import org.slf4j.Logger; 22 | import org.slf4j.LoggerFactory; 23 | import org.testcontainers.containers.GenericContainer; 24 | import org.testcontainers.containers.wait.strategy.Wait; 25 | 26 | public class SourceMysqlDB implements QuarkusTestResourceLifecycleManager { 27 | 28 | public static final String MYSQL_ROOT_PASSWORD = "debezium"; 29 | public static final String MYSQL_USER = "mysqluser"; 30 | public static final String MYSQL_PASSWORD = "mysqlpw"; 31 | public static final String MYSQL_DEBEZIUM_USER = "debezium"; 32 | public static final String MYSQL_DEBEZIUM_PASSWORD = "dbz"; 33 | public static final String MYSQL_IMAGE = "debezium/example-mysql:3.0.0.Final"; 34 | public static final String MYSQL_HOST = "127.0.0.1"; 35 | public static final String MYSQL_DATABASE = "inventory"; 36 | public static final Integer MYSQL_PORT_DEFAULT = 3306; 37 | private static final Logger LOGGER = LoggerFactory.getLogger(SourceMysqlDB.class); 38 | 39 | static private final GenericContainer container = new GenericContainer<>(MYSQL_IMAGE) 40 | .waitingFor(Wait.forLogMessage(".*mysqld: ready for connections.*", 2)) 41 | .withEnv("MYSQL_USER", MYSQL_USER) 42 | .withEnv("MYSQL_PASSWORD", MYSQL_PASSWORD) 43 | .withEnv("MYSQL_ROOT_PASSWORD", MYSQL_ROOT_PASSWORD) 44 | .withExposedPorts(MYSQL_PORT_DEFAULT) 45 | .withStartupTimeout(Duration.ofSeconds(30)); 46 | 47 | public static void runSQL(String query) throws SQLException, ClassNotFoundException { 48 | try { 49 | String url = "jdbc:mysql://" + MYSQL_HOST + ":" + container.getMappedPort(MYSQL_PORT_DEFAULT) + "/" + MYSQL_DATABASE + "?useSSL=false"; 50 | Class.forName("com.mysql.cj.jdbc.Driver"); 51 | Connection con = DriverManager.getConnection(url, MYSQL_USER, MYSQL_PASSWORD); 52 | Statement st = con.createStatement(); 53 | st.execute(query); 54 | con.close(); 55 | } catch (Exception e) { 56 | LOGGER.error(query); 57 | throw e; 58 | } 59 | } 60 | 61 | @Override 62 | public Map start() { 63 | container.start(); 64 | 65 | Map params = new ConcurrentHashMap<>(); 66 | params.put("%mysql.debezium.source.database.hostname", MYSQL_HOST); 67 | params.put("%mysql.debezium.source.database.port", container.getMappedPort(MYSQL_PORT_DEFAULT).toString()); 68 | params.put("%mysql.debezium.source.database.user", MYSQL_DEBEZIUM_USER); 69 | params.put("%mysql.debezium.source.database.password", MYSQL_DEBEZIUM_PASSWORD); 70 | params.put("%mysql.debezium.source.database.dbname", MYSQL_DATABASE); 71 | return params; 72 | } 73 | 74 | @Override 75 | public void stop() { 76 | if (container != null) { 77 | container.stop(); 78 | } 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/testresources/SourcePostgresqlDB.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg.testresources; 10 | 11 | import io.quarkus.test.common.QuarkusTestResourceLifecycleManager; 12 | 13 | import java.sql.Connection; 14 | import java.sql.DriverManager; 15 | import java.sql.SQLException; 16 | import java.sql.Statement; 17 | import java.time.Duration; 18 | import java.util.Map; 19 | import java.util.concurrent.ConcurrentHashMap; 20 | 21 | import org.slf4j.Logger; 22 | import org.slf4j.LoggerFactory; 23 | import org.testcontainers.containers.GenericContainer; 24 | import org.testcontainers.containers.wait.strategy.Wait; 25 | 26 | public class SourcePostgresqlDB implements QuarkusTestResourceLifecycleManager { 27 | 28 | public static final String POSTGRES_USER = "postgres"; 29 | public static final String POSTGRES_PASSWORD = "postgres"; 30 | public static final String POSTGRES_DBNAME = "postgres"; 31 | public static final String POSTGRES_IMAGE = "debezium/example-postgres:3.0.0.Final"; 32 | public static final String POSTGRES_HOST = "localhost"; 33 | public static final Integer POSTGRES_PORT_DEFAULT = 5432; 34 | private static final Logger LOGGER = LoggerFactory.getLogger(SourcePostgresqlDB.class); 35 | 36 | private static GenericContainer container = new GenericContainer<>(POSTGRES_IMAGE) 37 | .waitingFor(Wait.forLogMessage(".*database system is ready to accept connections.*", 2)) 38 | .withEnv("POSTGRES_USER", POSTGRES_USER) 39 | .withEnv("POSTGRES_PASSWORD", POSTGRES_PASSWORD) 40 | .withEnv("POSTGRES_DB", POSTGRES_DBNAME) 41 | .withEnv("POSTGRES_INITDB_ARGS", "-E UTF8") 42 | .withEnv("LANG", "en_US.utf8") 43 | .withExposedPorts(POSTGRES_PORT_DEFAULT) 44 | .withStartupTimeout(Duration.ofSeconds(30)); 45 | 46 | public static void runSQL(String query) throws SQLException, ClassNotFoundException { 47 | try { 48 | 49 | String url = "jdbc:postgresql://" + POSTGRES_HOST + ":" + container.getMappedPort(POSTGRES_PORT_DEFAULT) + "/" + POSTGRES_DBNAME; 50 | Class.forName("org.postgresql.Driver"); 51 | Connection con = DriverManager.getConnection(url, POSTGRES_USER, POSTGRES_PASSWORD); 52 | Statement st = con.createStatement(); 53 | st.execute(query); 54 | con.close(); 55 | } catch (Exception e) { 56 | e.printStackTrace(); 57 | throw e; 58 | } 59 | } 60 | 61 | @Override 62 | public Map start() { 63 | container.start(); 64 | try { 65 | SourcePostgresqlDB.runSQL("CREATE EXTENSION hstore;"); 66 | } catch (SQLException | ClassNotFoundException e) { 67 | throw new RuntimeException(e); 68 | } 69 | 70 | Map params = new ConcurrentHashMap<>(); 71 | params.put("debezium.source.database.hostname", POSTGRES_HOST); 72 | params.put("debezium.source.database.port", container.getMappedPort(POSTGRES_PORT_DEFAULT).toString()); 73 | params.put("debezium.source.database.user", POSTGRES_USER); 74 | params.put("debezium.source.database.password", POSTGRES_PASSWORD); 75 | params.put("debezium.source.database.dbname", POSTGRES_DBNAME); 76 | return params; 77 | } 78 | 79 | @Override 80 | public void stop() { 81 | if (container != null) { 82 | container.stop(); 83 | } 84 | } 85 | 86 | } 87 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/IcebergConfig.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg; 2 | 3 | import io.quarkus.runtime.annotations.ConfigRoot; 4 | import io.smallrye.config.ConfigMapping; 5 | import io.smallrye.config.WithDefault; 6 | import io.smallrye.config.WithName; 7 | import org.apache.iceberg.CatalogProperties; 8 | 9 | import java.util.List; 10 | import java.util.Map; 11 | import java.util.Optional; 12 | 13 | import static org.apache.iceberg.TableProperties.DEFAULT_FILE_FORMAT; 14 | import static org.apache.iceberg.TableProperties.DEFAULT_FILE_FORMAT_DEFAULT; 15 | 16 | @ConfigRoot 17 | @ConfigMapping 18 | public interface IcebergConfig { 19 | String PROP_PREFIX = "debezium.sink.iceberg"; 20 | 21 | @WithName(PROP_PREFIX) 22 | Map icebergConfigs(); 23 | 24 | @WithName("debezium.sink.iceberg.upsert-op-field") 25 | @WithDefault("__op") 26 | String cdcOpField(); 27 | 28 | @WithName("debezium.sink.iceberg.upsert-dedup-column") 29 | @WithDefault("__source_ts_ns") 30 | String cdcSourceTsField(); 31 | 32 | @WithName("debezium.sink.iceberg.upsert") 33 | @WithDefault("false") 34 | boolean upsert(); 35 | 36 | @WithName("debezium.sink.iceberg.upsert-keep-deletes") 37 | @WithDefault("true") 38 | boolean keepDeletes(); 39 | 40 | @WithName("debezium.sink.iceberg." + CatalogProperties.WAREHOUSE_LOCATION) 41 | String warehouseLocation(); 42 | 43 | @WithName("debezium.sink.iceberg.table-mapper") 44 | @WithDefault("default-mapper") 45 | String tableMapper(); 46 | 47 | @WithName("debezium.sink.iceberg.destination-regexp") 48 | // @WithDefault("") 49 | Optional destinationRegexp(); 50 | 51 | @WithName("debezium.sink.iceberg.destination-regexp-replace") 52 | // @WithDefault("") 53 | Optional destinationRegexpReplace(); 54 | 55 | @WithName("debezium.sink.iceberg.destination-uppercase-table-names") 56 | @WithDefault("false") 57 | boolean destinationUppercaseTableNames(); 58 | 59 | @WithName("debezium.sink.iceberg.destination-lowercase-table-names") 60 | @WithDefault("false") 61 | boolean destinationLowercaseTableNames(); 62 | 63 | @WithName("debezium.sink.iceberg.table-prefix") 64 | // @WithDefault("") 65 | Optional tablePrefix(); 66 | 67 | @WithName("debezium.sink.iceberg.table-namespace") 68 | @WithDefault("default") 69 | String namespace(); 70 | 71 | @WithName("debezium.sink.iceberg.catalog-name") 72 | @WithDefault("default") 73 | String catalogName(); 74 | 75 | @WithName("debezium.sink.iceberg.create-identifier-fields") 76 | @WithDefault("true") 77 | boolean createIdentifierFields(); 78 | 79 | @WithName("debezium.sink.iceberg." + DEFAULT_FILE_FORMAT) 80 | @WithDefault(DEFAULT_FILE_FORMAT_DEFAULT) 81 | String writeFormat(); 82 | 83 | @WithName("debezium.sink.iceberg.allow-field-addition") 84 | @WithDefault("true") 85 | boolean allowFieldAddition(); 86 | 87 | @WithName("debezium.sink.iceberg.excluded-columns") 88 | Optional> excludedColumns(); 89 | 90 | @WithName("debezium.sink.iceberg.io-impl") 91 | @WithDefault("org.apache.iceberg.io.ResolvingFileIO") 92 | String ioImpl(); 93 | 94 | @WithName("debezium.sink.iceberg.preserve-required-property") 95 | @WithDefault("false") 96 | boolean preserveRequiredProperty(); 97 | 98 | @WithName("debezium.sink.iceberg.nested-as-variant") 99 | @WithDefault("false") 100 | boolean nestedAsVariant(); 101 | 102 | } -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/IcebergChangeConsumerExcludedColumnsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg; 10 | 11 | import io.debezium.server.iceberg.testresources.CatalogNessie; 12 | import io.debezium.server.iceberg.testresources.S3Minio; 13 | import io.debezium.server.iceberg.testresources.SourcePostgresqlDB; 14 | import io.quarkus.test.common.QuarkusTestResource; 15 | import io.quarkus.test.junit.QuarkusTest; 16 | import io.quarkus.test.junit.QuarkusTestProfile; 17 | import io.quarkus.test.junit.TestProfile; 18 | import org.apache.spark.sql.Dataset; 19 | import org.apache.spark.sql.Row; 20 | import org.awaitility.Awaitility; 21 | import org.junit.jupiter.api.Assertions; 22 | import org.junit.jupiter.api.Test; 23 | 24 | import java.time.Duration; 25 | import java.util.Arrays; 26 | import java.util.HashMap; 27 | import java.util.List; 28 | import java.util.Map; 29 | 30 | /** 31 | * Integration test that verifies columns can be excluded from the written iceberg table 32 | * 33 | * @author Ismail Simsek 34 | */ 35 | @QuarkusTest 36 | @QuarkusTestResource(value = S3Minio.class, restrictToAnnotatedClass = true) 37 | @QuarkusTestResource(value = SourcePostgresqlDB.class, restrictToAnnotatedClass = true) 38 | @QuarkusTestResource(value = CatalogNessie.class, restrictToAnnotatedClass = true) 39 | @TestProfile(IcebergChangeConsumerExcludedColumnsTest.TestProfile.class) 40 | public class IcebergChangeConsumerExcludedColumnsTest extends BaseSparkTest { 41 | 42 | @Test 43 | public void testSupportExcludedColumns() throws Exception { 44 | String sql = 45 | "DROP TABLE IF EXISTS inventory.table_with_excluded_column;\n" + 46 | "CREATE TABLE IF NOT EXISTS inventory.table_with_excluded_column (\n" + 47 | " c_id INTEGER ,\n" + 48 | " c_text TEXT ,\n" + 49 | " c_exclude_me TEXT\n" + 50 | ");"; 51 | SourcePostgresqlDB.runSQL(sql); 52 | sql = "INSERT INTO inventory.table_with_excluded_column \n" + 53 | "(c_id, c_text, c_exclude_me) \n" + 54 | "VALUES \n" + 55 | "(1, 'one' , 'should_not_write_to_iceberg' ) \n" + 56 | ",(1, 'two' , 'should_not_write_to_iceberg' )"; 57 | 58 | SourcePostgresqlDB.runSQL(sql); 59 | Awaitility.await().atMost(Duration.ofSeconds(320)).until(() -> { 60 | try { 61 | Dataset df = getTableData("testc.inventory.table_with_excluded_column"); 62 | df.show(false); 63 | df.schema().printTreeString(); 64 | 65 | List columns = Arrays.asList(df.columns()); 66 | 67 | Assertions.assertTrue(columns.contains("c_id")); 68 | Assertions.assertTrue(columns.contains("c_text")); 69 | Assertions.assertFalse(columns.contains("c_exclude_me")); 70 | Assertions.assertFalse(columns.contains("__table")); 71 | Assertions.assertFalse(columns.contains("__db")); 72 | return true; 73 | } catch (Exception | AssertionError e) { 74 | return false; 75 | } 76 | }); 77 | } 78 | 79 | public static class TestProfile implements QuarkusTestProfile { 80 | @Override 81 | public Map getConfigOverrides() { 82 | Map config = new HashMap<>(); 83 | config.put("debezium.sink.iceberg.excluded-columns", "c_exclude_me,__table,__db"); 84 | return config; 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /examples/nessie/produce_data.py: -------------------------------------------------------------------------------- 1 | import psycopg2 2 | import random 3 | import time 4 | from faker import Faker 5 | 6 | # --- Database Connection Details --- 7 | # Based on your Debezium configuration, the host/port/user/password for the *source* DB 8 | DB_HOST = "localhost" 9 | DB_NAME = "postgres" 10 | DB_USER = "postgres" 11 | DB_PASSWORD = "postgres" 12 | DB_PORT = 5432 13 | 14 | # --- Configuration --- 15 | INSERT_INTERVAL_SECONDS = 10 16 | fake = Faker() 17 | 18 | 19 | def get_db_connection(): 20 | """Establishes and returns a PostgreSQL database connection.""" 21 | try: 22 | conn = psycopg2.connect( 23 | host=DB_HOST, 24 | database=DB_NAME, 25 | user=DB_USER, 26 | password=DB_PASSWORD, 27 | port=DB_PORT 28 | ) 29 | return conn 30 | except Exception as error: 31 | print(f"❌ Error connecting to the database: {error}") 32 | return None 33 | 34 | 35 | def insert_random_customer(conn): 36 | first_name = f"test-{fake.first_name()}" 37 | last_name = fake.last_name() 38 | email = f"{first_name.lower()}.{last_name.lower()}{random.randint(1, 100)}@{fake.domain_name()}" 39 | 40 | insert_query = """ 41 | INSERT INTO inventory.customers (first_name, last_name, email) 42 | VALUES (%s, %s, %s) 43 | RETURNING id; 44 | """ 45 | 46 | try: 47 | with conn.cursor() as cur: 48 | cur.execute(insert_query, (first_name, last_name, email)) 49 | new_id = cur.fetchone()[0] 50 | conn.commit() 51 | print(f"✅ Inserted new customer (ID: {new_id}): {first_name} {last_name}") 52 | return new_id 53 | except Exception as error: 54 | print(f"❌ Error inserting customer data: {error}") 55 | conn.rollback() 56 | raise error 57 | 58 | 59 | def insert_random_order(conn, purchaser_id): 60 | """Inserts a new random order into the inventory.orders table.""" 61 | # Ensure there's at least one customer ID to reference 62 | order_date = fake.date_time_this_year() 63 | quantity = random.randint(1, 10) 64 | product_id = random.randint(101, 109) # Assumes existing products 65 | 66 | insert_query = """ 67 | INSERT INTO inventory.orders (order_date, purchaser, quantity, product_id) 68 | VALUES (%s, %s, %s, %s); 69 | """ 70 | 71 | try: 72 | with conn.cursor() as cur: 73 | cur.execute(insert_query, (order_date, purchaser_id, quantity, product_id)) 74 | conn.commit() 75 | print(f"✅ Inserted new order for purchaser ID {purchaser_id} (Product: {product_id}, Quantity: {quantity})") 76 | except Exception as error: 77 | print(f"❌ Error inserting order data: {error}") 78 | conn.rollback() 79 | 80 | 81 | def main(): 82 | """Main function to run the continuous insertion loop.""" 83 | print("🚀 Starting continuous data insertion script...") 84 | 85 | conn = get_db_connection() 86 | try: 87 | while True: 88 | print("-" * 30) 89 | purchaser_id = insert_random_customer(conn) 90 | num_orders = random.randint(0, 3) 91 | for _ in range(num_orders): 92 | insert_random_order(conn, purchaser_id) 93 | 94 | print(f"⏳ Waiting for {INSERT_INTERVAL_SECONDS} seconds...") 95 | time.sleep(INSERT_INTERVAL_SECONDS) 96 | 97 | except KeyboardInterrupt: 98 | print("\n\n🛑 Script stopped by user (Ctrl+C).") 99 | finally: 100 | if conn: 101 | conn.close() 102 | print("Database connection closed.") 103 | 104 | 105 | if __name__ == "__main__": 106 | main() 107 | -------------------------------------------------------------------------------- /examples/lakekeeper/produce_data.py: -------------------------------------------------------------------------------- 1 | import psycopg2 2 | import random 3 | import time 4 | from faker import Faker 5 | 6 | # --- Database Connection Details --- 7 | # Based on your Debezium configuration, the host/port/user/password for the *source* DB 8 | DB_HOST = "localhost" 9 | DB_NAME = "postgres" 10 | DB_USER = "postgres" 11 | DB_PASSWORD = "postgres" 12 | DB_PORT = 5432 13 | 14 | # --- Configuration --- 15 | INSERT_INTERVAL_SECONDS = 10 16 | fake = Faker() 17 | 18 | 19 | def get_db_connection(): 20 | """Establishes and returns a PostgreSQL database connection.""" 21 | try: 22 | conn = psycopg2.connect( 23 | host=DB_HOST, 24 | database=DB_NAME, 25 | user=DB_USER, 26 | password=DB_PASSWORD, 27 | port=DB_PORT 28 | ) 29 | return conn 30 | except Exception as error: 31 | print(f"❌ Error connecting to the database: {error}") 32 | return None 33 | 34 | 35 | def insert_random_customer(conn): 36 | first_name = f"test-{fake.first_name()}" 37 | last_name = fake.last_name() 38 | email = f"{first_name.lower()}.{last_name.lower()}{random.randint(1, 100)}@{fake.domain_name()}" 39 | 40 | insert_query = """ 41 | INSERT INTO inventory.customers (first_name, last_name, email) 42 | VALUES (%s, %s, %s) 43 | RETURNING id; 44 | """ 45 | 46 | try: 47 | with conn.cursor() as cur: 48 | cur.execute(insert_query, (first_name, last_name, email)) 49 | new_id = cur.fetchone()[0] 50 | conn.commit() 51 | print(f"✅ Inserted new customer (ID: {new_id}): {first_name} {last_name}") 52 | return new_id 53 | except Exception as error: 54 | print(f"❌ Error inserting customer data: {error}") 55 | conn.rollback() 56 | raise error 57 | 58 | 59 | def insert_random_order(conn, purchaser_id): 60 | """Inserts a new random order into the inventory.orders table.""" 61 | # Ensure there's at least one customer ID to reference 62 | order_date = fake.date_time_this_year() 63 | quantity = random.randint(1, 10) 64 | product_id = random.randint(101, 109) # Assumes existing products 65 | 66 | insert_query = """ 67 | INSERT INTO inventory.orders (order_date, purchaser, quantity, product_id) 68 | VALUES (%s, %s, %s, %s); 69 | """ 70 | 71 | try: 72 | with conn.cursor() as cur: 73 | cur.execute(insert_query, (order_date, purchaser_id, quantity, product_id)) 74 | conn.commit() 75 | print(f"✅ Inserted new order for purchaser ID {purchaser_id} (Product: {product_id}, Quantity: {quantity})") 76 | except Exception as error: 77 | print(f"❌ Error inserting order data: {error}") 78 | conn.rollback() 79 | 80 | 81 | def main(): 82 | """Main function to run the continuous insertion loop.""" 83 | print("🚀 Starting continuous data insertion script...") 84 | 85 | conn = get_db_connection() 86 | try: 87 | while True: 88 | print("-" * 30) 89 | purchaser_id = insert_random_customer(conn) 90 | num_orders = random.randint(0, 3) 91 | for _ in range(num_orders): 92 | insert_random_order(conn, purchaser_id) 93 | 94 | print(f"⏳ Waiting for {INSERT_INTERVAL_SECONDS} seconds...") 95 | time.sleep(INSERT_INTERVAL_SECONDS) 96 | 97 | except KeyboardInterrupt: 98 | print("\n\n🛑 Script stopped by user (Ctrl+C).") 99 | finally: 100 | if conn: 101 | conn.close() 102 | print("Database connection closed.") 103 | 104 | 105 | if __name__ == "__main__": 106 | main() 107 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/batchsizewait/MaxBatchSizeWaitTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg.batchsizewait; 10 | 11 | import io.debezium.server.iceberg.BaseSparkTest; 12 | import io.debezium.server.iceberg.testresources.CatalogNessie; 13 | import io.debezium.server.iceberg.testresources.S3Minio; 14 | import io.debezium.server.iceberg.testresources.SourcePostgresqlDB; 15 | import io.quarkus.test.common.QuarkusTestResource; 16 | import io.quarkus.test.junit.QuarkusTest; 17 | import io.quarkus.test.junit.QuarkusTestProfile; 18 | import io.quarkus.test.junit.TestProfile; 19 | import org.apache.spark.sql.Dataset; 20 | import org.apache.spark.sql.Row; 21 | import org.awaitility.Awaitility; 22 | import org.eclipse.microprofile.config.inject.ConfigProperty; 23 | import org.junit.jupiter.api.Disabled; 24 | import org.junit.jupiter.api.Test; 25 | import org.junit.jupiter.api.condition.DisabledIfEnvironmentVariable; 26 | 27 | import java.time.Duration; 28 | import java.util.HashMap; 29 | import java.util.Map; 30 | 31 | @QuarkusTest 32 | @TestProfile(MaxBatchSizeWaitTest.TestProfile.class) 33 | @QuarkusTestResource(value = SourcePostgresqlDB.class, restrictToAnnotatedClass = true) 34 | @QuarkusTestResource(value = S3Minio.class, restrictToAnnotatedClass = true) 35 | @QuarkusTestResource(value = CatalogNessie.class, restrictToAnnotatedClass = true) 36 | @DisabledIfEnvironmentVariable(named = "DEBEZIUM_FORMAT_VALUE", matches = "connect") 37 | @Disabled 38 | class MaxBatchSizeWaitTest extends BaseSparkTest { 39 | 40 | @ConfigProperty(name = "debezium.source.max.batch.size", defaultValue = "1000") 41 | Integer maxBatchSize; 42 | 43 | 44 | @Test 45 | public void testBatchsizeWait() throws Exception { 46 | int iteration = 100; 47 | PGCreateTestDataTable(); 48 | for (int i = 0; i <= iteration; i++) { 49 | this.PGLoadTestDataTable(maxBatchSize / 2, true); 50 | } 51 | Awaitility.await().atMost(Duration.ofSeconds(180)).until(() -> { 52 | try { 53 | Dataset df = spark.sql("SELECT substring(input_file_name(),0,260) as input_file, " + 54 | "count(*) as batch_size FROM debeziumevents.debeziumcdc_testc_inventory_test_data group " + 55 | "by 1"); 56 | df.show(false); 57 | // commited batch size should be equal to maxBatchSize 58 | // since timeout is set to high number so the batches should be triggered by hitting maxBatchSize limit 59 | return df.filter("batch_size = " + maxBatchSize).count() >= 3; 60 | } catch (Exception e) { 61 | //e.printStackTrace(); 62 | return false; 63 | } 64 | }); 65 | } 66 | 67 | public static class TestProfile implements QuarkusTestProfile { 68 | @Override 69 | public Map getConfigOverrides() { 70 | Map config = new HashMap<>(); 71 | // wait 72 | config.put("debezium.sink.batch.batch-size-wait", "MaxBatchSizeWait"); 73 | config.put("debezium.source.connector.class", "io.debezium.connector.postgresql.PostgresConnector"); 74 | config.put("debezium.source.max.batch.size", "2000"); 75 | config.put("debezium.source.max.queue.size", "70000"); 76 | //config.put("debezium.source.poll.interval.ms", "1000"); 77 | config.put("debezium.sink.batch.batch-size-wait.max-wait-ms", "999000"); 78 | config.put("debezium.sink.batch.batch-size-wait.wait-interval-ms", "5000"); 79 | config.put("quarkus.log.category.\"io.debezium.server.iceberg.batchsizewait\".level", "DEBUG"); 80 | return config; 81 | } 82 | } 83 | } -------------------------------------------------------------------------------- /examples/nessie/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | 3 | services: 4 | debezium-iceberg: 5 | image: ghcr.io/memiiso/debezium-server-iceberg:latest 6 | container_name: debezium-server-iceberg 7 | networks: 8 | - iceberg_nessie_net 9 | depends_on: 10 | - nessie 11 | - minio 12 | - postgresqlsourcedb 13 | - mc 14 | volumes: 15 | - ./config/application.properties:/debezium/config/application.properties 16 | command: > 17 | bash -c " 18 | mvn dependency:copy -Dartifact=org.apache.iceberg:iceberg-nessie:1.10.0:jar -DoutputDirectory=/debezium/lib/ \ 19 | && mvn dependency:copy -Dartifact=org.projectnessie.nessie:nessie-client:0.104.5:jar -DoutputDirectory=/debezium/lib/ \ 20 | && mvn dependency:copy -Dartifact=org.projectnessie.nessie:nessie-model:0.104.5:jar -DoutputDirectory=/debezium/lib/ \ 21 | && /debezium/run.sh 22 | " 23 | environment: 24 | - AWS_ACCESS_KEY_ID=admin 25 | - AWS_SECRET_ACCESS_KEY=password 26 | - AWS_REGION=us-east-1 27 | 28 | nessie: 29 | image: projectnessie/nessie:latest 30 | container_name: nessie 31 | networks: 32 | - iceberg_nessie_net 33 | ports: 34 | - "19120:19120" 35 | depends_on: 36 | - minio 37 | environment: 38 | - QUARKUS_PROFILE=prod 39 | - QUARKUS_HTTP_PORT=19120 40 | - QUARKUS_LOG_LEVEL=DEBUG 41 | - QUARKUS_OTEL_SDK_DISABLED=true 42 | - NESSIE_CATALOG_DEFAULT_WAREHOUSE=warehouse 43 | - NESSIE_SERVER_AUTHENTICATION_ENABLED=false 44 | - NESSIE_CATALOG_SERVICE_S3_DEFAULT_OPTIONS_ENDPOINT=http://minio:9000 45 | - NESSIE_VERSION_STORE_TYPE=IN_MEMORY 46 | - NESSIE_CATALOG_WAREHOUSES_WAREHOUSE_LOCATION=s3a://warehouse/ 47 | - NESSIE_CATALOG_SERVICE_S3_DEFAULT_OPTIONS_ACCESS_KEY=urn:nessie-secret:quarkus:nessie.catalog.secrets.access-key 48 | - NESSIE_CATALOG_SERVICE_S3_DEFAULT_OPTIONS_PATH_STYLE_ACCESS=true 49 | - NESSIE_CATALOG_SERVICE_S3_DEFAULT_OPTIONS_AUTH_TYPE=STATIC 50 | - NESSIE_CATALOG_SECRETS_ACCESS_KEY_NAME=admin 51 | - NESSIE_CATALOG_SECRETS_ACCESS_KEY_SECRET=password 52 | - NESSIE_CATALOG_SERVICE_S3_DEFAULT_OPTIONS_REGION=us-east-1 53 | - NESSIE_CLIENT_API_VERSION=2 54 | 55 | postgresqlsourcedb: 56 | image: debezium/example-postgres:2.5 57 | container_name: postgresql-source-db 58 | environment: 59 | - POSTGRES_USER=postgres 60 | - POSTGRES_PASSWORD=postgres 61 | - POSTGRES_DB=postgres 62 | - POSTGRES_INITDB_ARGS="-E UTF8" 63 | - LANG=en_US.utf8 64 | networks: 65 | - iceberg_nessie_net 66 | ports: 67 | - "5432:5432" 68 | 69 | # storage 70 | minio: 71 | image: minio/minio 72 | container_name: minio 73 | environment: 74 | - MINIO_ROOT_USER=admin 75 | - MINIO_ROOT_PASSWORD=password 76 | - MINIO_DOMAIN=minio 77 | - MINIO_BUCKET=warehouse 78 | networks: 79 | - iceberg_nessie_net 80 | ports: 81 | - "9001:9001" 82 | - "9000:9000" 83 | command: [ "server", "/data", "--console-address", ":9001" ] 84 | mc: 85 | image: minio/mc 86 | container_name: mc 87 | networks: 88 | - iceberg_nessie_net 89 | depends_on: 90 | - minio 91 | environment: 92 | - AWS_ACCESS_KEY_ID=demo 93 | - AWS_SECRET_ACCESS_KEY=password 94 | - AWS_REGION=us-east-1 95 | entrypoint: > 96 | /bin/sh -c " 97 | until (/usr/bin/mc alias set minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; 98 | /usr/bin/mc mb --ignore-existing minio/warehouse; 99 | /usr/bin/mc anonymous set public minio/warehouse; 100 | exit 0; 101 | " 102 | networks: 103 | iceberg_nessie_net: 104 | name: iceberg_nessie_net 105 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/testresources/S3Minio.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg.testresources; 10 | 11 | import io.debezium.server.iceberg.TestConfigSource; 12 | import io.minio.ListObjectsArgs; 13 | import io.minio.MakeBucketArgs; 14 | import io.minio.MinioClient; 15 | import io.minio.Result; 16 | import io.minio.messages.Bucket; 17 | import io.minio.messages.Item; 18 | import io.quarkus.test.common.QuarkusTestResourceLifecycleManager; 19 | import org.slf4j.Logger; 20 | import org.slf4j.LoggerFactory; 21 | import org.testcontainers.containers.MinIOContainer; 22 | import org.testcontainers.utility.DockerImageName; 23 | 24 | import java.util.List; 25 | import java.util.Map; 26 | import java.util.concurrent.ConcurrentHashMap; 27 | 28 | public class S3Minio implements QuarkusTestResourceLifecycleManager { 29 | 30 | protected static final Logger LOGGER = LoggerFactory.getLogger(S3Minio.class); 31 | static final String DEFAULT_IMAGE = "minio/minio:RELEASE.2025-04-08T15-41-24Z"; 32 | public static MinioClient client; 33 | 34 | static public final MinIOContainer container = new MinIOContainer(DockerImageName.parse(DEFAULT_IMAGE)) 35 | .withUserName(TestConfigSource.S3_MINIO_ACCESS_KEY) 36 | .withPassword(TestConfigSource.S3_MINIO_SECRET_KEY); 37 | 38 | public static void listFiles() { 39 | LOGGER.info("-----------------------------------------------------------------"); 40 | try { 41 | List bucketList = client.listBuckets(); 42 | for (Bucket bucket : bucketList) { 43 | System.out.printf("Bucket:%s ROOT\n", bucket.name()); 44 | Iterable> results = client.listObjects(ListObjectsArgs.builder().bucket(bucket.name()).recursive(true).build()); 45 | for (Result result : results) { 46 | Item item = result.get(); 47 | System.out.printf("Bucket:%s Item:%s Size:%s\n", bucket.name(), item.objectName(), item.size()); 48 | } 49 | } 50 | } catch (Exception e) { 51 | LOGGER.info("Failed listing bucket"); 52 | } 53 | LOGGER.info("-----------------------------------------------------------------"); 54 | 55 | } 56 | 57 | @Override 58 | public void stop() { 59 | container.stop(); 60 | } 61 | 62 | public static String getS3WebURL() { 63 | return String.format("http://%s:%s", container.getHost(), container.getMappedPort(9001)); 64 | } 65 | 66 | @Override 67 | public Map start() { 68 | container.start(); 69 | client = MinioClient 70 | .builder() 71 | .endpoint(container.getS3URL()) 72 | .credentials(container.getUserName(), container.getPassword()) 73 | .build(); 74 | 75 | try { 76 | client.ignoreCertCheck(); 77 | client.makeBucket(MakeBucketArgs.builder() 78 | .region(TestConfigSource.S3_REGION) 79 | .bucket(TestConfigSource.S3_BUCKET_NAME) 80 | .build()); 81 | } catch (Exception e) { 82 | e.printStackTrace(); 83 | } 84 | LOGGER.info("Minio Started\nMinio UI: {}\nMinio S3 URL: {}", getS3WebURL(), container.getS3URL()); 85 | Map config = new ConcurrentHashMap<>(); 86 | // FOR JDBC CATALOG 87 | config.put("debezium.sink.iceberg.s3.endpoint", container.getS3URL()); 88 | config.put("debezium.sink.iceberg.s3.path-style-access", "true"); 89 | config.put("debezium.sink.iceberg.s3.access-key-id", TestConfigSource.S3_MINIO_ACCESS_KEY); 90 | config.put("debezium.sink.iceberg.s3.secret-access-key", TestConfigSource.S3_MINIO_SECRET_KEY); 91 | config.put("debezium.sink.iceberg.client.region", TestConfigSource.S3_REGION); 92 | config.put("debezium.sink.iceberg.io-impl", TestConfigSource.ICEBERG_FILEIO); 93 | 94 | return config; 95 | } 96 | 97 | 98 | } 99 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Create Release 2 | 3 | on: 4 | push: 5 | tags: [ '*.*.*.*' ] 6 | branches: [ master ] 7 | 8 | env: 9 | SPARK_LOCAL_IP: 127.0.0.1 10 | 11 | jobs: 12 | build-and-release: 13 | name: Build and Release 14 | if: github.repository_owner == 'memiiso' 15 | runs-on: ubuntu-latest 16 | permissions: 17 | contents: write 18 | packages: write 19 | actions: write 20 | steps: 21 | - name: Checkout Repository 22 | uses: actions/checkout@v4 23 | 24 | - name: Set up Java 25 | uses: actions/setup-java@v5 26 | with: 27 | distribution: 'temurin' 28 | java-version: 21 29 | cache: 'maven' 30 | 31 | - name: Determine Release Name 32 | id: set_release_version 33 | run: | 34 | if [[ "${{ github.ref_name }}" == "master" ]]; then 35 | echo "RELEASE_VERSION=latest" >> $GITHUB_ENV 36 | else 37 | echo "RELEASE_VERSION=${{ github.ref_name }}" >> $GITHUB_ENV 38 | fi 39 | shell: bash # Explicitly set shell to bash 40 | 41 | - name: Build Project 42 | run: mvn -B --no-transfer-progress clean package -Passembly --file pom.xml -Drevision=${{ env.RELEASE_VERSION }} 43 | 44 | - name: Delete Existing Release (if any) 45 | run: gh release delete ${{ env.RELEASE_VERSION }} --cleanup-tag --yes 46 | continue-on-error: true 47 | env: 48 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 49 | 50 | - name: Create GitHub Release 51 | id: create_release 52 | uses: softprops/action-gh-release@v2 53 | env: 54 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 55 | with: 56 | name: Release ${{ env.RELEASE_VERSION }} 57 | tag_name: ${{ env.RELEASE_VERSION }} 58 | body: Release ${{ env.RELEASE_VERSION }} 59 | draft: false 60 | prerelease: true 61 | - name: Delete Maven Package from Github 62 | uses: paulushcgcj/delete-github-package@1.0.0 63 | with: 64 | type: maven 65 | name: io.debezium.debezium-server-iceberg 66 | version: ${{ env.RELEASE_VERSION }} 67 | continue-on-error: true 68 | - name: Delete Maven Package Dist from Github 69 | uses: paulushcgcj/delete-github-package@1.0.0 70 | with: 71 | type: maven 72 | name: io.debezium.debezium-server-iceberg-dist 73 | version: ${{ env.RELEASE_VERSION }} 74 | continue-on-error: true 75 | - name: Delete Maven Package Sink from Github 76 | uses: paulushcgcj/delete-github-package@1.0.0 77 | with: 78 | type: maven 79 | name: io.debezium.debezium-server-iceberg-sink 80 | version: ${{ env.RELEASE_VERSION }} 81 | continue-on-error: true 82 | - name: Publish ${{ env.RELEASE_VERSION }} to GitHub Packages 83 | run: mvn --batch-mode clean package -Passembly deploy --file pom.xml -Drevision=${{ env.RELEASE_VERSION }} -Dmaven.test.skip=true 84 | env: 85 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 86 | 87 | - name: Login to GHCR 88 | uses: docker/login-action@v3 89 | with: 90 | registry: ghcr.io 91 | username: ${{ github.repository_owner }} 92 | password: ${{ secrets.GITHUB_TOKEN }} 93 | 94 | - name: Build and Push Docker Image 95 | uses: docker/build-push-action@v6 96 | with: 97 | context: ./ 98 | file: ./Dockerfile 99 | push: true 100 | build-args: | 101 | RELEASE_VERSION=${{ env.RELEASE_VERSION }} 102 | tags: ghcr.io/${{ github.repository_owner }}/debezium-server-iceberg:${{ env.RELEASE_VERSION }} 103 | 104 | - name: Delete Untagged Docker Images 105 | uses: dylanratcliffe/delete-untagged-containers@main 106 | with: 107 | package_name: debezium-server-iceberg 108 | token: ${{ secrets.GITHUB_TOKEN }} 109 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/history/IcebergSchemaHistoryTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg.history; 10 | 11 | import io.debezium.server.iceberg.BaseSparkTest; 12 | import io.debezium.server.iceberg.testresources.CatalogNessie; 13 | import io.debezium.server.iceberg.testresources.S3Minio; 14 | import io.debezium.server.iceberg.testresources.SourceMysqlDB; 15 | import io.quarkus.test.common.QuarkusTestResource; 16 | import io.quarkus.test.junit.QuarkusTest; 17 | import io.quarkus.test.junit.QuarkusTestProfile; 18 | import io.quarkus.test.junit.TestProfile; 19 | import org.apache.spark.sql.Dataset; 20 | import org.apache.spark.sql.Row; 21 | import org.awaitility.Awaitility; 22 | import org.junit.jupiter.api.Test; 23 | import org.junit.jupiter.api.condition.DisabledIfEnvironmentVariable; 24 | 25 | import java.sql.SQLException; 26 | import java.time.Duration; 27 | import java.util.HashMap; 28 | import java.util.Map; 29 | 30 | import static io.debezium.server.iceberg.TestConfigSource.ICEBERG_CATALOG_TABLE_NAMESPACE; 31 | 32 | /** 33 | * Integration test that verifies basic reading from PostgreSQL database and writing to iceberg destination. 34 | * 35 | * @author Ismail Simsek 36 | */ 37 | @QuarkusTest 38 | @QuarkusTestResource(value = S3Minio.class, restrictToAnnotatedClass = true) 39 | @QuarkusTestResource(value = SourceMysqlDB.class, restrictToAnnotatedClass = true) 40 | @QuarkusTestResource(value = CatalogNessie.class, restrictToAnnotatedClass = true) 41 | @TestProfile(IcebergSchemaHistoryTest.TestProfile.class) 42 | @DisabledIfEnvironmentVariable(named = "DEBEZIUM_FORMAT_VALUE", matches = "connect") 43 | public class IcebergSchemaHistoryTest extends BaseSparkTest { 44 | @Test 45 | public void testSimpleUpload() throws SQLException, ClassNotFoundException { 46 | String sqlCreate = "CREATE TABLE IF NOT EXISTS inventory.test_schema_history_ddl (" + 47 | " c_id INTEGER ," + 48 | " c_data TEXT," + 49 | " PRIMARY KEY (c_id)" + 50 | " );"; 51 | SourceMysqlDB.runSQL(sqlCreate); 52 | String sqlInsert = 53 | "INSERT INTO inventory.test_schema_history_ddl (c_id, c_data ) " + 54 | "VALUES (1,'data-1'),(2,'data-2'),(3,'data-3');"; 55 | SourceMysqlDB.runSQL(sqlInsert); 56 | 57 | Awaitility.await().atMost(Duration.ofSeconds(120)).until(() -> { 58 | try { 59 | Dataset ds = getTableData("testc.inventory.test_schema_history_ddl"); 60 | return ds.count() >= 2; 61 | } catch (Exception e) { 62 | return false; 63 | } 64 | }); 65 | 66 | // test nested data(struct) consumed 67 | Awaitility.await().atMost(Duration.ofSeconds(120)).until(() -> { 68 | try { 69 | Dataset ds = getTableData(ICEBERG_CATALOG_TABLE_NAMESPACE, "debezium_database_history_storage_table"); 70 | ds.show(10, false); 71 | return ds.count() > 1 72 | && ds.where("history_data ILIKE '%CREATE%TABLE%test_schema_history_ddl%'").count() == 1 73 | ; 74 | } catch (Exception e) { 75 | return false; 76 | } 77 | }); 78 | } 79 | 80 | 81 | public static class TestProfile implements QuarkusTestProfile { 82 | @Override 83 | public Map getConfigOverrides() { 84 | Map config = new HashMap<>(); 85 | config.put("quarkus.profile", "mysql"); 86 | config.put("%mysql.debezium.source.connector.class", "io.debezium.connector.mysql.MySqlConnector"); 87 | // config.put("%mysql.debezium.source.table.whitelist", "inventory.*"); 88 | config.put("debezium.source.schema.history.internal", "io.debezium.server.iceberg.history.IcebergSchemaHistory"); 89 | config.put("debezium.source.schema.history.internal.iceberg.table-name", "debezium_database_history_storage_table"); 90 | config.put("debezium.source.table.whitelist", "inventory.test_schema_history_ddl"); 91 | return config; 92 | } 93 | 94 | @Override 95 | public String getConfigProfile() { 96 | return "mysql"; 97 | } 98 | } 99 | } -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/IcebergChangeConsumerMysqlTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg; 10 | 11 | import com.google.common.collect.Lists; 12 | import io.debezium.jdbc.TemporalPrecisionMode; 13 | import io.debezium.server.iceberg.testresources.CatalogNessie; 14 | import io.debezium.server.iceberg.testresources.S3Minio; 15 | import io.debezium.server.iceberg.testresources.SourceMysqlDB; 16 | import io.quarkus.test.common.QuarkusTestResource; 17 | import io.quarkus.test.junit.QuarkusTest; 18 | import io.quarkus.test.junit.QuarkusTestProfile; 19 | import io.quarkus.test.junit.TestProfile; 20 | import org.apache.iceberg.data.Record; 21 | import org.apache.iceberg.io.CloseableIterable; 22 | import org.awaitility.Awaitility; 23 | import org.junit.jupiter.api.Test; 24 | 25 | import java.time.Duration; 26 | import java.util.HashMap; 27 | import java.util.Map; 28 | 29 | import static org.junit.jupiter.api.Assertions.assertEquals; 30 | 31 | /** 32 | * @author Ismail Simsek 33 | */ 34 | @QuarkusTest 35 | @QuarkusTestResource(value = S3Minio.class, restrictToAnnotatedClass = true) 36 | @QuarkusTestResource(value = SourceMysqlDB.class, restrictToAnnotatedClass = true) 37 | @QuarkusTestResource(value = CatalogNessie.class, restrictToAnnotatedClass = true) 38 | @TestProfile(IcebergChangeConsumerMysqlTest.TestProfile.class) 39 | public class IcebergChangeConsumerMysqlTest extends BaseTest { 40 | 41 | @Test 42 | public void testSimpleUpload() throws Exception { 43 | assertEquals(config.debezium().isEventFlatteningEnabled(), true); 44 | assertEquals(config.debezium().temporalPrecisionMode(), TemporalPrecisionMode.CONNECT); 45 | String sqlCreate = "CREATE TABLE IF NOT EXISTS inventory.test_delete_table (" + 46 | " c_id INTEGER ," + 47 | " c_id2 INTEGER ," + 48 | " c_data TEXT," + 49 | " PRIMARY KEY (c_id, c_id2)" + 50 | " );"; 51 | String sqlInsert = 52 | "INSERT INTO inventory.test_delete_table (c_id, c_id2, c_data ) " + 53 | "VALUES (1,1,'data'),(1,2,'data'),(1,3,'data'),(1,4,'data') ;"; 54 | String sqlDelete = "DELETE FROM inventory.test_delete_table where c_id = 1 ;"; 55 | 56 | SourceMysqlDB.runSQL(sqlCreate); 57 | SourceMysqlDB.runSQL(sqlInsert); 58 | Awaitility.await().atMost(Duration.ofSeconds(60)).until(() -> { 59 | try { 60 | 61 | CloseableIterable result = getTableDataV2("testc.inventory.test_delete_table"); 62 | return Lists.newArrayList(result).size() == 4; 63 | } catch (Exception e) { 64 | return false; 65 | } 66 | }); 67 | 68 | SourceMysqlDB.runSQL(sqlDelete); 69 | SourceMysqlDB.runSQL(sqlInsert); 70 | SourceMysqlDB.runSQL(sqlDelete); 71 | SourceMysqlDB.runSQL(sqlInsert); 72 | 73 | Awaitility.await().atMost(Duration.ofSeconds(120)).until(() -> { 74 | try { 75 | CloseableIterable result = getTableDataV2("testc.inventory.test_delete_table"); 76 | //result.forEach(System.out::println); 77 | //System.out.println("======================"); 78 | return Lists.newArrayList(result).size() >= 20; 79 | } catch (Exception e) { 80 | return false; 81 | } 82 | }); 83 | 84 | } 85 | 86 | public static class TestProfile implements QuarkusTestProfile { 87 | @Override 88 | public Map getConfigOverrides() { 89 | Map config = new HashMap<>(); 90 | config.put("quarkus.profile", "mysql"); 91 | config.put("%mysql.debezium.source.connector.class", "io.debezium.connector.mysql.MySqlConnector"); 92 | config.put("%mysql.debezium.source.table.whitelist", "inventory.customers,inventory.test_delete_table"); 93 | //config.put("%mysql.debezium.source.include.schema.changes", "false"); 94 | return config; 95 | } 96 | 97 | @Override 98 | public String getConfigProfile() { 99 | return "mysql"; 100 | } 101 | } 102 | 103 | } 104 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/IcebergChangeConsumerTestUnwraapped.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg; 10 | 11 | import com.google.common.collect.Lists; 12 | import io.debezium.server.iceberg.testresources.CatalogNessie; 13 | import io.debezium.server.iceberg.testresources.S3Minio; 14 | import io.debezium.server.iceberg.testresources.SourcePostgresqlDB; 15 | import io.quarkus.test.common.QuarkusTestResource; 16 | import io.quarkus.test.junit.QuarkusTest; 17 | import io.quarkus.test.junit.QuarkusTestProfile; 18 | import io.quarkus.test.junit.TestProfile; 19 | import org.apache.iceberg.catalog.TableIdentifier; 20 | import org.apache.iceberg.data.Record; 21 | import org.apache.iceberg.io.CloseableIterable; 22 | import org.apache.spark.sql.Dataset; 23 | import org.apache.spark.sql.Row; 24 | import org.awaitility.Awaitility; 25 | import org.junit.jupiter.api.Test; 26 | 27 | import java.time.Duration; 28 | import java.util.HashMap; 29 | import java.util.Map; 30 | 31 | import static io.debezium.server.iceberg.TestConfigSource.ICEBERG_CATALOG_TABLE_NAMESPACE; 32 | import static org.junit.jupiter.api.Assertions.assertEquals; 33 | import static org.junit.jupiter.api.Assertions.assertTrue; 34 | 35 | /** 36 | * Integration test that verifies basic reading from PostgreSQL database and writing to iceberg destination. 37 | * 38 | * @author Ismail Simsek 39 | */ 40 | @QuarkusTest 41 | @QuarkusTestResource(value = S3Minio.class, restrictToAnnotatedClass = true) 42 | @QuarkusTestResource(value = SourcePostgresqlDB.class, restrictToAnnotatedClass = true) 43 | @QuarkusTestResource(value = CatalogNessie.class, restrictToAnnotatedClass = true) 44 | @TestProfile(IcebergChangeConsumerTestUnwraapped.TestProfile.class) 45 | public class IcebergChangeConsumerTestUnwraapped extends BaseSparkTest { 46 | 47 | 48 | @Test 49 | public void testDebeziumConfig() { 50 | assertTrue(config.debezium().transformsConfigs().containsKey("unwrap.type")); 51 | assertEquals(debeziumConfig.transforms(), ","); 52 | assertEquals(false, config.debezium().isEventFlatteningEnabled()); 53 | 54 | debeziumConfig.transformsConfigs().forEach( (k,v) -> { 55 | LOGGER.error("{} ==> {}", k, v); 56 | } ); 57 | } 58 | 59 | @Test 60 | public void testSimpleUpload() { 61 | 62 | // make sure its not unwrapped 63 | assertEquals(config.debezium().isEventFlatteningEnabled(), false); 64 | 65 | Awaitility.await().atMost(Duration.ofSeconds(120)).until(() -> { 66 | try { 67 | Dataset ds = getTableData("testc.inventory.customers"); 68 | ds.show(false); 69 | return ds.count() >= 3; 70 | } catch (Exception e) { 71 | return false; 72 | } 73 | }); 74 | 75 | // test nested data(struct) consumed 76 | Awaitility.await().atMost(Duration.ofSeconds(120)).until(() -> { 77 | try { 78 | Dataset ds = getTableData("testc.inventory.geom"); 79 | ds.show(false); 80 | return ds.count() >= 3; 81 | } catch (Exception e) { 82 | return false; 83 | } 84 | }); 85 | 86 | Awaitility.await().atMost(Duration.ofSeconds(120)).until(() -> { 87 | try { 88 | CloseableIterable d = getTableDataV2(TableIdentifier.of(ICEBERG_CATALOG_TABLE_NAMESPACE, "debezium_offset_storage_table")); 89 | System.out.println(Lists.newArrayList(d)); 90 | return Lists.newArrayList(d).size() == 1; 91 | } catch (Exception e) { 92 | return false; 93 | } 94 | }); 95 | } 96 | 97 | public static class TestProfile implements QuarkusTestProfile { 98 | @Override 99 | public Map getConfigOverrides() { 100 | Map config = new HashMap<>(); 101 | config.put("debezium.sink.iceberg.write.format.default", "orc"); 102 | config.put("debezium.sink.iceberg.destination-regexp", "\\d"); 103 | config.put("debezium.source.hstore.handling.mode", "map"); 104 | config.put("debezium.transforms", ","); 105 | config.put("debezium.sink.iceberg.create-identifier-fields", "false"); 106 | return config; 107 | } 108 | } 109 | 110 | } 111 | -------------------------------------------------------------------------------- /debezium-server-iceberg-dist/src/main/resources/assemblies/server-distribution.xml: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | 13 | distribution 14 | 15 | 16 | zip 17 | 18 | false 19 | 20 | 21 | ${project.parent.artifactId}/lib 22 | false 23 | runtime 24 | false 25 | true 26 | 27 | org.apache.kafka:kafka-tools:* 28 | javax.ws.rs:javax.ws.rs-api:* 29 | org.apache.kafka:connect-file:* 30 | org.glassfish.jersey.*:*:* 31 | org.eclipse.jetty:*:* 32 | org.codehaus.plexus:*:* 33 | log4j:log4j:* 34 | ch.qos.reload4j:reload4j 35 | io.debezium:debezium-scripting 36 | io.debezium:debezium-scripting-languages 37 | io.prometheus.jmx:jmx_prometheus_javaagent:* 38 | 39 | 40 | 41 | ${project.parent.artifactId}/lib_metrics 42 | false 43 | runtime 44 | false 45 | true 46 | 47 | io.prometheus.jmx:jmx_prometheus_javaagent:* 48 | 49 | 50 | 51 | ${project.parent.artifactId}/lib_opt 52 | false 53 | runtime 54 | false 55 | true 56 | 57 | io.debezium:debezium-scripting:* 58 | io.debezium:debezium-scripting-languages:* 59 | 60 | 61 | 62 | 63 | 64 | 65 | ${project.basedir}/.. 66 | ${project.parent.artifactId} 67 | 68 | README* 69 | CHANGELOG* 70 | CONTRIBUTE* 71 | COPYRIGHT* 72 | LICENSE* 73 | 74 | true 75 | 76 | 77 | 78 | ${project.build.directory} 79 | ${project.parent.artifactId} 80 | 81 | *-runner.jar 82 | LICENSE-3RD-PARTY.txt 83 | 84 | 85 | 86 | src/main/resources/distro 87 | ${project.parent.artifactId} 88 | 89 | **/* 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /debezium-server-iceberg-dist/src/main/resources/distro/debezium.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import jnius_config 3 | import logging 4 | import os 5 | import sys 6 | ##### loggger 7 | import threading 8 | from pathlib import Path 9 | 10 | log = logging.getLogger(name="debezium") 11 | log.setLevel(logging.INFO) 12 | handler = logging.StreamHandler(sys.stdout) 13 | handler.setLevel(logging.INFO) 14 | formatter = logging.Formatter('%(asctime)s %(levelname)s [%(module)s] (%(funcName)s) %(message)s') 15 | handler.setFormatter(formatter) 16 | log.addHandler(handler) 17 | 18 | 19 | ##### 20 | 21 | class Debezium(): 22 | 23 | def __init__(self, debezium_dir: str = None, conf_dir: str = None, java_home: str = None): 24 | if debezium_dir is None: 25 | self.debezium_server_dir: Path = Path(__file__).resolve().parent 26 | else: 27 | if not Path(debezium_dir).is_dir(): 28 | raise Exception("Debezium Server directory '%s' not found" % debezium_dir) 29 | self.debezium_server_dir: Path = Path(debezium_dir) 30 | log.info("Setting Debezium dir to:%s" % self.debezium_server_dir.as_posix()) 31 | 32 | if conf_dir is None: 33 | self.conf_dir = self.debezium_server_dir.joinpath("config") 34 | else: 35 | if not Path(conf_dir).is_dir(): 36 | raise Exception("Debezium conf directory '%s' not found" % conf_dir) 37 | self.conf_dir: Path = Path(conf_dir) 38 | log.info("Setting conf dir to:%s" % self.conf_dir.as_posix()) 39 | 40 | ##### jnius 41 | if java_home: 42 | self.java_home(java_home=java_home) 43 | 44 | DEBEZIUM_CLASSPATH: list = [ 45 | self.debezium_server_dir.joinpath('*').as_posix(), 46 | self.debezium_server_dir.joinpath("lib/*").as_posix(), 47 | self.conf_dir.as_posix()] 48 | self.add_classpath(*DEBEZIUM_CLASSPATH) 49 | 50 | def add_classpath(self, *claspath): 51 | if jnius_config.vm_running: 52 | raise ValueError( 53 | "VM is already running, can't set classpath/options; VM started at %s" % jnius_config.vm_started_at) 54 | 55 | jnius_config.add_classpath(*claspath) 56 | log.info("VM Classpath: %s" % jnius_config.get_classpath()) 57 | 58 | def java_home(self, java_home: str): 59 | if jnius_config.vm_running: 60 | raise ValueError("VM is already running, can't set java home; VM started at" + jnius_config.vm_started_at) 61 | 62 | os.putenv("JAVA_HOME", java_home) 63 | os.environ["JAVA_HOME"] = java_home 64 | log.info("JAVA_HOME set to %s" % java_home) 65 | 66 | # pylint: disable=no-name-in-module 67 | def run(self, *args: str): 68 | 69 | try: 70 | jnius_config.add_options(*args) 71 | log.info("Configured jvm options:%s" % jnius_config.get_options()) 72 | 73 | from jnius import autoclass 74 | DebeziumServer = autoclass('io.debezium.server.Main') 75 | _dbz = DebeziumServer() 76 | return _dbz.main() 77 | finally: 78 | from jnius import detach 79 | detach() 80 | 81 | 82 | class DebeziumRunAsyn(threading.Thread): 83 | def __init__(self, debezium_dir: str, java_args: list, java_home: str = None): 84 | threading.Thread.__init__(self) 85 | self.debezium_dir = debezium_dir 86 | self.java_args = java_args 87 | self.java_home = java_home 88 | self._dbz: Debezium = None 89 | 90 | def run(self): 91 | self._dbz = Debezium(debezium_dir=self.debezium_dir, java_home=self.java_home) 92 | return self._dbz.run(*self.java_args) 93 | 94 | 95 | def main(): 96 | parser = argparse.ArgumentParser() 97 | parser.add_argument('--debezium_dir', type=str, default=None, 98 | help='Directory of debezium server application') 99 | parser.add_argument('--conf_dir', type=str, default=None, 100 | help='Directory of application.properties') 101 | parser.add_argument('--java_home', type=str, default=None, 102 | help='JAVA_HOME directory') 103 | _args, args = parser.parse_known_args() 104 | ds = Debezium(debezium_dir=_args.debezium_dir, conf_dir=_args.conf_dir, java_home=_args.java_home) 105 | ds.run(*args) 106 | 107 | 108 | if __name__ == '__main__': 109 | main() 110 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/tableoperator/IcebergTableWriterFactory.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.tableoperator; 2 | 3 | import io.debezium.server.iceberg.GlobalConfig; 4 | import io.debezium.server.iceberg.IcebergUtil; 5 | import jakarta.enterprise.context.Dependent; 6 | import jakarta.inject.Inject; 7 | import org.apache.iceberg.FileFormat; 8 | import org.apache.iceberg.Table; 9 | import org.apache.iceberg.data.GenericAppenderFactory; 10 | import org.apache.iceberg.data.Record; 11 | import org.apache.iceberg.io.BaseTaskWriter; 12 | import org.apache.iceberg.io.OutputFileFactory; 13 | import org.apache.iceberg.io.UnpartitionedWriter; 14 | import org.apache.iceberg.util.PropertyUtil; 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | 18 | import java.util.Set; 19 | 20 | import static org.apache.iceberg.TableProperties.WRITE_TARGET_FILE_SIZE_BYTES; 21 | import static org.apache.iceberg.TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT; 22 | 23 | /** 24 | * Iceberg Table Writer Factory to get TaskWriter for the table. upsert modes used to return correct writer. 25 | */ 26 | @Dependent 27 | public class IcebergTableWriterFactory { 28 | private static final Logger LOGGER = LoggerFactory.getLogger(IcebergTableWriterFactory.class); 29 | @Inject 30 | GlobalConfig config; 31 | 32 | public BaseTaskWriter create(Table icebergTable) { 33 | 34 | // file format of the table parquet, orc ... 35 | FileFormat format = IcebergUtil.getTableFileFormat(icebergTable); 36 | // appender factory 37 | GenericAppenderFactory appenderFactory = IcebergUtil.getTableAppender(icebergTable); 38 | OutputFileFactory fileFactory = IcebergUtil.getTableOutputFileFactory(icebergTable, format); 39 | // equality Field Ids 40 | long targetFileSize = 41 | PropertyUtil.propertyAsLong( 42 | icebergTable.properties(), WRITE_TARGET_FILE_SIZE_BYTES, WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT); 43 | 44 | if (!config.iceberg().upsert()) { 45 | // RUNNING APPEND MODE 46 | return appendWriter(icebergTable, format, appenderFactory, fileFactory, targetFileSize); 47 | } else if (icebergTable.schema().identifierFieldIds().isEmpty()) { 48 | // ITS UPSERT MODE BUT!!!!! TABLE DON'T HAVE identifierFieldIds(Primary Key) 49 | if (config.iceberg().upsert()) { 50 | LOGGER.info("Table don't have Pk defined upsert is not possible falling back to append!"); 51 | } 52 | return appendWriter(icebergTable, format, appenderFactory, fileFactory, targetFileSize); 53 | } else { 54 | // ITS UPSERT MODE AND TABLE HAS identifierFieldIds(Primary Key) 55 | // USE DELTA WRITERS 56 | return deltaWriter(icebergTable, format, appenderFactory, fileFactory, targetFileSize); 57 | } 58 | } 59 | 60 | private BaseTaskWriter appendWriter(Table icebergTable, FileFormat format, GenericAppenderFactory appenderFactory, OutputFileFactory fileFactory, long targetFileSize) { 61 | 62 | if (icebergTable.spec().isUnpartitioned()) { 63 | // table is un partitioned use un partitioned append writer 64 | return new UnpartitionedWriter<>( 65 | icebergTable.spec(), format, appenderFactory, fileFactory, icebergTable.io(), targetFileSize); 66 | 67 | } else { 68 | // table is partitioned use partitioned append writer 69 | return new PartitionedAppendWriter( 70 | icebergTable.spec(), format, appenderFactory, fileFactory, icebergTable.io(), targetFileSize, icebergTable.schema()); 71 | } 72 | } 73 | 74 | private BaseTaskWriter deltaWriter(Table icebergTable, FileFormat format, GenericAppenderFactory appenderFactory, OutputFileFactory fileFactory, long targetFileSize) { 75 | 76 | Set identifierFieldIds = icebergTable.schema().identifierFieldIds(); 77 | if (icebergTable.spec().isUnpartitioned()) { 78 | // running with upsert mode + un partitioned table 79 | return new UnpartitionedDeltaWriter(icebergTable.spec(), format, appenderFactory, fileFactory, 80 | icebergTable.io(), 81 | targetFileSize, icebergTable.schema(), identifierFieldIds, config.iceberg().keepDeletes()); 82 | } else { 83 | // running with upsert mode + partitioned table 84 | return new PartitionedDeltaWriter(icebergTable.spec(), format, appenderFactory, fileFactory, 85 | icebergTable.io(), 86 | targetFileSize, icebergTable.schema(), identifierFieldIds, config.iceberg().keepDeletes()); 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /docs/migration.md: -------------------------------------------------------------------------------- 1 | # Migration Guide 2 | 3 | This document provides guidance on upgrading the Debezium Iceberg consumer and handling potential migration challenges. 4 | 5 | ## General Upgrade Process 6 | 7 | Please be aware that each release may include backward-incompatible changes. Thorough testing in a staging environment is strongly recommended before deploying any new version to production. 8 | 9 | If you encounter any issues not covered here, please feel free to report them as GitHub Issues. 10 | 11 | ## Handling Incompatible Data Type Changes 12 | 13 | An incompatible data type change can occur in two main scenarios: 14 | 1. **Upgrading the Connector:** A newer version of Debezium might improve how it handles certain data types. For example, it might change its representation of timestamps from a `long` (epoch milliseconds) to a logical `timestamp` type. 15 | 2. **Source Database Schema Change:** The schema in your source database might change in a way that results in an incompatible type change in the Debezium event. 16 | 17 | In either case, the Debezium Iceberg consumer will fail to write the new data and log an error similar to this: 18 | 19 | ``` 20 | java.lang.IllegalArgumentException: Cannot change column type: order_created_ts_ms: long -> timestamp 21 | ``` 22 | 23 | To handle such a change, you need to perform a manual migration step on your Iceberg table. The strategy is to rename the old column, allowing the consumer to create a new column with the correct type for incoming data. 24 | 25 | ### Migration Steps 26 | 27 | Let's use the example of a column `order_created_ts_ms` changing from `long` to `timestamp`. Migrating consumer from 0.8.x to 0.9.x. 28 | 29 | 1. **Stop the Debezium Server** to prevent further write attempts. 30 | 31 | 2. **Adjust the Table Schema** 32 | 33 | You have two primary options to resolve the schema mismatch. Choose the one that best fits your table size and operational requirements. 34 | 35 | **Option 1: Rewrite the Table (for small tables)** 36 | 37 | If your table is small, you can rewrite its entire contents while converting the problematic column to the new data type. This approach avoids having separate columns for old and new data but can be very expensive for large tables. 38 | 39 | ⚠️ **Warning:** This operation rewrites the entire table and can be very slow and costly. It is generally not recommended for large production tables. 40 | 41 | Using Spark SQL, you can replace the table with the result of a query. The new table schema will be inferred from the `SELECT` statement. 42 | 43 | ```sql 44 | -- Make sure to include ALL columns from the original table to avoid data loss. 45 | INSERT OVERWRITE my_catalog.my_db.my_table 46 | SELECT 47 | id, 48 | -- other_column_1, 49 | -- other_column_2, 50 | timestamp_millis(order_created_ts_ms) AS order_created_ts_ms 51 | FROM my_catalog.my_db.my_table; 52 | ``` 53 | 54 | **Option 2: Rename the Column (Recommended for large tables)** 55 | 56 | This is the **recommended approach for most production scenarios**. Renaming a column is a fast, metadata-only operation that does not require rewriting any data files. It is nearly instantaneous, making it ideal for large tables. 57 | 58 | You can use any tool that supports Iceberg table management, such as Spark, Flink, or the Iceberg REST catalog API. 59 | 60 | Using Spark SQL: 61 | ```sql 62 | ALTER TABLE my_catalog.my_db.my_table RENAME COLUMN order_created_ts_ms TO order_created_ts_ms_legacy; 63 | ``` 64 | 65 | 3. **Upgrade and Restart the Debezium Server**. 66 | 67 | ### What Happens Next? 68 | 69 | When the consumer processes the new events, it will find that the `order_created_ts_ms` column no longer exists. It will then add it to the table schema as a new column with the correct `timestamp` type. 70 | 71 | After this process, your table will have both columns: 72 | - `order_created_ts_ms_legacy` (`long`): Contains the old data. New rows will have `null` in this column. 73 | - `order_created_ts_ms` (`timestamp`): Contains the new data. Old rows will have `null` in this column. 74 | 75 | This approach preserves all your data while allowing the schema to evolve to accommodate the new data type. You can later decide to backfill the data and consolidate it into a single column if needed. 76 | 77 | or you can simply could use COALESCE and read consolidated data 78 | ```sql 79 | SELECT COALESCE(timestamp_millis(order_created_ts_ms_legacy), order_created_ts_ms) AS order_created_ts_ms FROM my_catalog.my_db.my_table 80 | ``` 81 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/test/java/io/debezium/server/iceberg/IcebergChangeConsumerTemporalIsoStringTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright memiiso Authors. 4 | * * 5 | * * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 6 | * 7 | */ 8 | 9 | package io.debezium.server.iceberg; 10 | 11 | import io.debezium.server.iceberg.testresources.CatalogNessie; 12 | import io.debezium.server.iceberg.testresources.S3Minio; 13 | import io.debezium.server.iceberg.testresources.SourcePostgresqlDB; 14 | import io.quarkus.test.common.QuarkusTestResource; 15 | import io.quarkus.test.junit.QuarkusTest; 16 | import io.quarkus.test.junit.QuarkusTestProfile; 17 | import io.quarkus.test.junit.TestProfile; 18 | import org.apache.spark.sql.Dataset; 19 | import org.apache.spark.sql.Row; 20 | import org.apache.spark.sql.types.DataTypes; 21 | import org.awaitility.Awaitility; 22 | import org.junit.jupiter.api.Assertions; 23 | import org.junit.jupiter.api.Test; 24 | 25 | import java.time.Duration; 26 | import java.util.HashMap; 27 | import java.util.Map; 28 | 29 | /** 30 | * Integration test that verifies basic reading from PostgreSQL database and writing to iceberg destination. 31 | * 32 | * @author Ismail Simsek 33 | */ 34 | @QuarkusTest 35 | @QuarkusTestResource(value = S3Minio.class, restrictToAnnotatedClass = true) 36 | @QuarkusTestResource(value = SourcePostgresqlDB.class, restrictToAnnotatedClass = true) 37 | @QuarkusTestResource(value = CatalogNessie.class, restrictToAnnotatedClass = true) 38 | @TestProfile(IcebergChangeConsumerTemporalIsoStringTest.TestProfile.class) 39 | public class IcebergChangeConsumerTemporalIsoStringTest extends BaseSparkTest { 40 | 41 | @Test 42 | public void testConsumingVariousDataTypes() throws Exception { 43 | String sql = 44 | "DROP TABLE IF EXISTS inventory.data_types;\n" + 45 | "CREATE TABLE IF NOT EXISTS inventory.data_types (\n" + 46 | " c_id INTEGER ,\n" + 47 | " c_date DATE,\n" + 48 | " c_time TIME,\n" + 49 | " c_timestamp TIMESTAMP,\n" + 50 | " c_timestamptz TIMESTAMPTZ\n" + 51 | ");"; 52 | SourcePostgresqlDB.runSQL(sql); 53 | sql = "INSERT INTO inventory.data_types \n" + 54 | "(c_id, c_date, c_time, c_timestamp, c_timestamptz) \n" + 55 | "VALUES \n" + 56 | "(1, null, null, null, null) \n" + 57 | ",(2, CURRENT_DATE , CURRENT_TIME, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP ) \n" + 58 | ",(3, '2024-01-02'::DATE , '18:04:00'::TIME, '2023-10-11 10:30:00'::timestamp, '2023-11-12 10:30:00+02'::timestamptz ) "; 59 | 60 | SourcePostgresqlDB.runSQL(sql); 61 | Awaitility.await().atMost(Duration.ofSeconds(320)).until(() -> { 62 | try { 63 | Dataset df = getTableData("testc.inventory.data_types"); 64 | df.show(false); 65 | df.schema().printTreeString(); 66 | 67 | Assertions.assertEquals(3, df.count(), "Incorrect row count"); 68 | // Validate date field and values 69 | Assertions.assertEquals(DataTypes.DateType, getSchemaField(df, "c_date").dataType()); 70 | Assertions.assertEquals(1, df.filter("c_id = 3 AND c_date = to_date('2024-01-02', 'yyyy-MM-dd')").count()); 71 | // Validate time field and values 72 | System.out.println(getSchemaField(df, "c_timestamp").dataType()); 73 | Assertions.assertEquals(DataTypes.TimestampNTZType, getSchemaField(df, "c_timestamp").dataType()); 74 | Assertions.assertEquals(1, df.filter("c_id = 3 AND c_timestamp = to_timestamp('2023-10-11 10:30:00')").count()); 75 | Assertions.assertEquals(DataTypes.TimestampType, getSchemaField(df, "c_timestamptz").dataType()); 76 | Assertions.assertEquals(1, df.filter("c_id = 3 AND c_timestamptz = to_timestamp('2023-11-12 10:30:00+02')").count()); 77 | // time type is kept as string, because spark does not support time type 78 | Assertions.assertEquals(DataTypes.StringType, getSchemaField(df, "c_time").dataType()); 79 | Assertions.assertEquals(1, df.filter("c_id = 3 AND c_time = '18:04:00Z'").count()); 80 | return true; 81 | } catch (Exception | AssertionError e) { 82 | e.printStackTrace(); 83 | return false; 84 | } 85 | }); 86 | } 87 | 88 | public static class TestProfile implements QuarkusTestProfile { 89 | @Override 90 | public Map getConfigOverrides() { 91 | Map config = new HashMap<>(); 92 | config.put("debezium.source.hstore.handling.mode", "map"); 93 | // config.put("debezium.source.table.whitelist", "inventory.data_types"); 94 | config.put("debezium.source.time.precision.mode", "isostring"); 95 | return config; 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /python/debezium/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | import threading 5 | from pathlib import Path 6 | 7 | 8 | class LoggerClass: 9 | def __init__(self): 10 | self._log = None 11 | 12 | @property 13 | def log(self): 14 | if not self._log: 15 | self._log = logging.getLogger(name="debezium") 16 | self._log.setLevel(logging.INFO) 17 | if not self._log.hasHandlers(): 18 | handler = logging.StreamHandler(sys.stdout) 19 | handler.setLevel(logging.INFO) 20 | formatter = logging.Formatter('%(asctime)s %(levelname)s [%(module)s] (%(funcName)s) %(message)s') 21 | handler.setFormatter(formatter) 22 | self._log.addHandler(handler) 23 | return self._log 24 | 25 | 26 | class Debezium(LoggerClass): 27 | 28 | def __init__(self, debezium_dir: str = None, conf_dir: str = None, java_home: str = None): 29 | super().__init__() 30 | if debezium_dir is None: 31 | self.debezium_server_dir: Path = Path(__file__).resolve().parent 32 | else: 33 | if not Path(debezium_dir).is_dir(): 34 | raise Exception("Debezium Server directory '%s' not found" % debezium_dir) 35 | self.debezium_server_dir: Path = Path(debezium_dir) 36 | self.log.info("Setting Debezium dir to:%s" % self.debezium_server_dir.as_posix()) 37 | 38 | if conf_dir is None: 39 | self.conf_dir = self.debezium_server_dir.joinpath("config") 40 | else: 41 | if not Path(conf_dir).is_dir(): 42 | raise Exception("Debezium conf directory '%s' not found" % conf_dir) 43 | self.conf_dir: Path = Path(conf_dir) 44 | self.log.info("Setting conf dir to:%s" % self.conf_dir.as_posix()) 45 | 46 | if java_home: 47 | os.putenv("JAVA_HOME", java_home) 48 | os.environ["JAVA_HOME"] = java_home 49 | self.log.info("JAVA_HOME env variable set to %s" % java_home) 50 | 51 | def _jnius_config(self, *java_args): 52 | import jnius_config 53 | 54 | if jnius_config.vm_running: 55 | raise ValueError( 56 | "VM is already running, can't set classpath/options; VM started at %s" % jnius_config.vm_started_at) 57 | 58 | # NOTE this needs to be set before add_classpath 59 | jnius_config.add_options(*java_args) 60 | 61 | debezium_classpath: list = [ 62 | self.debezium_server_dir.joinpath('*').as_posix(), 63 | self.debezium_server_dir.joinpath("lib/*").as_posix(), 64 | self.conf_dir.as_posix()] 65 | 66 | jnius_config.add_classpath(*debezium_classpath) 67 | self.log.info("VM Classpath: %s" % jnius_config.get_classpath()) 68 | return jnius_config 69 | 70 | def _sanitize(self, jvm_option: str): 71 | """Sanitizes jvm argument like `my.property.secret=xyz` if it contains secret. 72 | >>> dbz = Debezium() 73 | >>> dbz._sanitize("source.pwd=pswd") 74 | 'source.pwd=*****' 75 | >>> dbz._sanitize("source.password=pswd") 76 | 'source.password=*****' 77 | >>> dbz._sanitize("source.secret=pswd") 78 | 'source.secret=*****' 79 | """ 80 | if any(x in jvm_option.lower() for x in ['pwd', 'password', 'secret', 'apikey', 'apitoken']): 81 | head, sep, tail = jvm_option.partition('=') 82 | return head + '=*****' 83 | else: 84 | return jvm_option 85 | 86 | # pylint: disable=no-name-in-module 87 | def run(self, *java_args: str): 88 | jnius_config = self._jnius_config(java_args) 89 | try: 90 | __jvm_options: list = [self._sanitize(p) for p in jnius_config.get_options()] 91 | self.log.info("Configured jvm options:%s" % __jvm_options) 92 | 93 | from jnius import autoclass 94 | DebeziumServer = autoclass('io.debezium.server.Main') 95 | _dbz = DebeziumServer() 96 | return _dbz.main() 97 | finally: 98 | from jnius import detach 99 | detach() 100 | 101 | 102 | class DebeziumRunAsyn(threading.Thread): 103 | def __init__(self, debezium_dir: str, java_args: list, java_home: str = None): 104 | threading.Thread.__init__(self) 105 | self.debezium_dir = debezium_dir 106 | self.java_args = java_args 107 | self.java_home = java_home 108 | self._dbz: Debezium = None 109 | 110 | def run(self): 111 | self._dbz = Debezium(debezium_dir=self.debezium_dir, java_home=self.java_home) 112 | return self._dbz.run(*self.java_args) 113 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/converter/EventConverter.java: -------------------------------------------------------------------------------- 1 | package io.debezium.server.iceberg.converter; 2 | import io.debezium.server.iceberg.tableoperator.Operation; 3 | import io.debezium.server.iceberg.tableoperator.RecordWrapper; 4 | import jakarta.annotation.Nullable; 5 | import jakarta.validation.constraints.NotNull; 6 | import org.apache.iceberg.Schema; 7 | import org.apache.iceberg.SortOrder; 8 | 9 | /** 10 | * Interface for converting CDC events from various formats (e.g., Json, Debezium Connect format) 11 | * into Iceberg records and extracting relevant metadata. 12 | */ 13 | public interface EventConverter { 14 | 15 | /** 16 | * Extracts the key part of the CDC event. 17 | * The actual type depends on the source event format (e.g., Struct, String). 18 | * 19 | * @param The expected type of the key. 20 | * @return The event key, or potentially null if the event has no key. 21 | */ 22 | @Nullable 23 | T key(); 24 | 25 | /** 26 | * Checks if the event contains key data. Useful for distinguishing 27 | * events with explicit null keys from events without keys. 28 | * 29 | * @return true if key data is present, false otherwise. 30 | */ 31 | boolean hasKeyData(); 32 | 33 | 34 | /** 35 | * Extracts the value/payload part of the CDC event. 36 | * The actual type depends on the source event format (e.g., Struct, String). 37 | * 38 | * @param The expected type of the value. 39 | * @return The event value, or null for delete events (tombstones). 40 | */ 41 | @Nullable 42 | T value(); 43 | 44 | /** 45 | * Extracts the source timestamp of the event. 46 | * 47 | * @return The timestamp, or null if not available in the event. 48 | */ 49 | @NotNull 50 | Long cdcSourceTsValue(); 51 | 52 | /** 53 | * Extracts the CDC operation type (Create, Update, Delete, Read). 54 | * 55 | * @return The {@link Operation} enum value. 56 | */ 57 | @NotNull 58 | Operation cdcOpValue(); 59 | 60 | /** 61 | * Provides a converter capable of transforming the event's schema representation 62 | * into an Iceberg {@link Schema}. 63 | * 64 | * @return The schema converter instance. 65 | */ 66 | @NotNull 67 | SchemaConverter schemaConverter(); 68 | 69 | /** 70 | * Indicates whether this event represents a schema change event rather than a data change event. 71 | * 72 | * @return true if it's a schema change event, false otherwise. 73 | */ 74 | boolean isSchemaChangeEvent(); 75 | 76 | /** 77 | * Gets the Iceberg {@link Schema} that corresponds to the data payload (`value()`) 78 | * of this specific event, potentially derived from schema information embedded within the event. 79 | * This might differ from the target table's schema if schema evolution is occurring. 80 | * Returns null if the event is a schema change event or has no associated data schema. 81 | * 82 | * @return The Iceberg schema for the event's data, or null. 83 | */ 84 | @Nullable 85 | Schema icebergSchema(boolean withIdentifierFields); 86 | 87 | default Schema icebergSchema() { 88 | return icebergSchema(true); 89 | } 90 | 91 | /** 92 | * Gets the Iceberg {@link SortOrder} that corresponds to the data key of this specific event. 93 | * @param schema The Iceberg schema for {@link SortOrder.Builder}. 94 | * @return The Iceberg {@link SortOrder}. 95 | */ 96 | @Nullable 97 | SortOrder sortOrder(Schema schema); 98 | 99 | /** 100 | * Gets the destination identifier (e.g., logical table name) for this event. 101 | * 102 | * @return The destination string. 103 | */ 104 | @NotNull 105 | String destination(); 106 | 107 | /** 108 | * Converts the event data into a {@link RecordWrapper} suitable for direct append operations, 109 | * using the provided target Iceberg schema. This might optimize by only including necessary fields 110 | * for an append (e.g., the 'after' state). 111 | * 112 | * @param schema The target Iceberg schema to conform to. 113 | * @return A {@link RecordWrapper} containing the data formatted for appending. 114 | */ 115 | @NotNull 116 | RecordWrapper convertAsAppend(@NotNull Schema schema); // Added @NotNull 117 | 118 | /** 119 | * Converts the event data into a {@link RecordWrapper} suitable for general iceberg consumption 120 | * (Create, Update, Delete), using the provided target Iceberg schema. 121 | * 122 | * @param schema The target Iceberg schema to conform to. 123 | * @return A {@link RecordWrapper} containing the data formatted for iceberg table. 124 | */ 125 | @NotNull 126 | RecordWrapper convert(@NotNull Schema schema); // Added @NotNull 127 | } 128 | -------------------------------------------------------------------------------- /debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/converter/DateTimeUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright Debezium Authors. 3 | * 4 | * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 5 | */ 6 | package io.debezium.server.iceberg.converter; 7 | 8 | import io.debezium.time.Conversions; 9 | import org.apache.iceberg.util.DateTimeUtil; 10 | 11 | import java.sql.Timestamp; 12 | import java.time.Duration; 13 | import java.time.Instant; 14 | import java.time.LocalDate; 15 | import java.time.LocalDateTime; 16 | import java.time.LocalTime; 17 | import java.time.OffsetDateTime; 18 | import java.time.ZoneId; 19 | import java.time.ZoneOffset; 20 | import java.time.ZonedDateTime; 21 | import java.time.temporal.ChronoUnit; 22 | import java.util.Date; 23 | import java.util.TimeZone; 24 | import java.util.concurrent.TimeUnit; 25 | 26 | public class DateTimeUtils { 27 | 28 | private DateTimeUtils() { 29 | } 30 | 31 | public static Instant toInstantFromNanos(long epochNanos) { 32 | final long epochSeconds = TimeUnit.NANOSECONDS.toSeconds(epochNanos); 33 | final long adjustment = TimeUnit.NANOSECONDS.toNanos(epochNanos % TimeUnit.SECONDS.toNanos(1)); 34 | return Instant.ofEpochSecond(epochSeconds, adjustment); 35 | } 36 | 37 | public static ZonedDateTime toZonedDateTimeFromDate(Date date, TimeZone timeZone) { 38 | return toZonedDateTimeFromDate(date, timeZone.toZoneId()); 39 | } 40 | 41 | public static ZonedDateTime toZonedDateTimeFromDate(Date date, ZoneId zoneId) { 42 | return date.toInstant().atZone(zoneId); 43 | } 44 | 45 | public static ZonedDateTime toZonedDateTimeFromInstantEpochMicros(long epochMicros) { 46 | return Conversions.toInstantFromMicros(epochMicros).atZone(ZoneOffset.UTC); 47 | } 48 | 49 | public static ZonedDateTime toZonedDateTimeFromInstantEpochNanos(long epochNanos) { 50 | return ZonedDateTime.ofInstant(toInstantFromNanos(epochNanos), ZoneOffset.UTC); 51 | } 52 | 53 | public static LocalDate toLocalDateOfEpochDays(long epochDays) { 54 | return LocalDate.ofEpochDay(epochDays); 55 | } 56 | 57 | public static LocalDate toLocalDateFromDate(Date date) { 58 | return toLocalDateFromInstantEpochMillis(date.getTime()); 59 | } 60 | 61 | public static LocalDate toLocalDateFromInstantEpochMillis(long epochMillis) { 62 | return LocalDate.ofInstant(Instant.ofEpochMilli(epochMillis), ZoneOffset.UTC); 63 | } 64 | 65 | public static LocalTime toLocalTimeFromDurationMilliseconds(long durationMillis) { 66 | return LocalTime.ofNanoOfDay(Duration.of(durationMillis, ChronoUnit.MILLIS).toNanos()); 67 | } 68 | 69 | public static LocalTime toLocalTimeFromDurationMicroseconds(long durationMicros) { 70 | return LocalTime.ofNanoOfDay(Duration.of(durationMicros, ChronoUnit.MICROS).toNanos()); 71 | } 72 | 73 | public static LocalTime toLocalTimeFromDurationNanoseconds(long durationNanos) { 74 | return LocalTime.ofNanoOfDay(Duration.of(durationNanos, ChronoUnit.NANOS).toNanos()); 75 | } 76 | 77 | public static LocalTime toLocalTimeFromUtcDate(Date date) { 78 | return date.toInstant().atOffset(ZoneOffset.UTC).toLocalTime(); 79 | } 80 | 81 | public static LocalDateTime toLocalDateTimeFromDate(Date date) { 82 | return toLocalDateTimeFromInstantEpochMillis(date.getTime()); 83 | } 84 | 85 | public static LocalDateTime toLocalDateTimeFromInstantEpochMillis(long epochMillis) { 86 | return LocalDateTime.ofInstant(Conversions.toInstantFromMillis(epochMillis), ZoneOffset.UTC); 87 | } 88 | 89 | public static LocalDateTime toLocalDateTimeFromInstantEpochMicros(long epochMicros) { 90 | return LocalDateTime.ofInstant(Conversions.toInstantFromMicros(epochMicros), ZoneOffset.UTC); 91 | } 92 | 93 | public static LocalDateTime toLocalDateTimeFromInstantEpochNanos(long epochNanos) { 94 | return LocalDateTime.ofInstant(toInstantFromNanos(epochNanos), ZoneOffset.UTC); 95 | } 96 | 97 | public static Timestamp toTimestampFromMillis(long epochMilliseconds) { 98 | final Instant instant = Conversions.toInstantFromMillis(epochMilliseconds); 99 | final Timestamp ts = new Timestamp(instant.toEpochMilli()); 100 | ts.setNanos(instant.getNano()); 101 | return ts; 102 | } 103 | 104 | protected static LocalDateTime timestampFromMillis(long millisFromEpoch) { 105 | return ChronoUnit.MILLIS.addTo(DateTimeUtil.EPOCH, millisFromEpoch).toLocalDateTime(); 106 | } 107 | 108 | protected static OffsetDateTime timestamptzFromNanos(long nanosFromEpoch) { 109 | return ChronoUnit.NANOS.addTo(DateTimeUtil.EPOCH, nanosFromEpoch); 110 | } 111 | 112 | protected static OffsetDateTime timestamptzFromMillis(long millisFromEpoch) { 113 | return ChronoUnit.MILLIS.addTo(DateTimeUtil.EPOCH, millisFromEpoch); 114 | } 115 | } 116 | --------------------------------------------------------------------------------